add

2025-08-10 17:57:37 +03:00 · 2019-06-13 14:03:00 +03:00
parent 53b74e6db4
commit 7612db75b1
496 changed files with 202963 additions and 24 deletions
--- a/libraries/FastLED-3.2.0/platforms/arm/common/m0clockless.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/common/m0clockless.h
@@ -0,0 +1,318 @@
+#ifndef __INC_M0_CLOCKLESS_H
+#define __INC_M0_CLOCKLESS_H
+
+struct M0ClocklessData {
+  uint8_t d[3];
+  uint8_t e[3];
+  uint8_t adj;
+  uint8_t pad;
+  uint32_t s[3];
+};
+
+
+template<int HI_OFFSET, int LO_OFFSET, int T1, int T2, int T3, EOrder RGB_ORDER, int WAIT_TIME>int
+showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, uint32_t num_leds, struct M0ClocklessData *pData) {
+  // Lo register variables
+  register uint32_t scratch=0;
+  register struct M0ClocklessData *base = pData;
+  register volatile uint32_t *port = _port;
+  register uint32_t d=0;
+  register uint32_t counter=num_leds;
+  register uint32_t bn=0;
+  register uint32_t b=0;
+  register uint32_t bitmask = _bitmask;
+
+  // high register variable
+  register const uint8_t *leds = _leds;
+#if (FASTLED_SCALE8_FIXED == 1)
+  pData->s[0]++;
+  pData->s[1]++;
+  pData->s[2]++;
+#endif
+  asm __volatile__ (
+    ///////////////////////////////////////////////////////////////////////////
+    //
+    // asm macro definitions - used to assemble the clockless output
+    //
+    ".ifnotdef fl_delay_def;"
+#ifdef FASTLED_ARM_M0_PLUS
+    "  .set fl_is_m0p, 1;"
+    "  .macro m0pad;"
+    "    nop;"
+    "  .endm;"
+#else
+    "  .set fl_is_m0p, 0;"
+    "  .macro m0pad;"
+    "  .endm;"
+#endif
+    "  .set fl_delay_def, 1;"
+    "  .set fl_delay_mod, 4;"
+    "  .if fl_is_m0p == 1;"
+    "    .set fl_delay_mod, 3;"
+    "  .endif;"
+    "  .macro fl_delay dtime, reg=r0;"
+    "    .if (\\dtime > 0);"
+    "      .set dcycle, (\\dtime / fl_delay_mod);"
+    "      .set dwork, (dcycle * fl_delay_mod);"
+    "      .set drem, (\\dtime - dwork);"
+    "      .rept (drem);"
+    "        nop;"
+    "      .endr;"
+    "      .if dcycle > 0;"
+    "        mov \\reg, #dcycle;"
+    "        delayloop_\\@:;"
+    "        sub \\reg, #1;"
+    "        bne delayloop_\\@;"
+    "	     .if fl_is_m0p == 0;"
+    "          nop;"
+    "        .endif;"
+    "      .endif;"
+    "    .endif;"
+    "  .endm;"
+
+    "  .macro mod_delay dtime,b1,b2,reg;"
+    "    .set adj, (\\b1 + \\b2);"
+    "    .if adj < \\dtime;"
+    "      .set dtime2, (\\dtime - adj);"
+    "      fl_delay dtime2, \\reg;"
+    "    .endif;"
+    "  .endm;"
+
+    // check the bit and drop the line low if it isn't set
+    "  .macro qlo4 b,bitmask,port,loff	;"
+    "    lsl \\b, #1			;"
+    "    bcs skip_\\@			;"
+    "    str \\bitmask, [\\port, \\loff]	;"
+    "    skip_\\@:			;"
+    "    m0pad;"
+    "  .endm				;"
+
+    // set the pin hi or low (determined by the offset passed in )
+    "  .macro qset2 bitmask,port,loff;"
+    "    str \\bitmask, [\\port, \\loff];"
+    "    m0pad;"
+    "  .endm;"
+
+    // Load up the next led byte to work with, put it in bn
+    "  .macro loadleds3 leds, bn, rled, scratch;"
+    "    mov \\scratch, \\leds;"
+    "    ldrb \\bn, [\\scratch, \\rled];"
+    "  .endm;"
+
+    // check whether or not we should dither
+    "  .macro loaddither7 bn,d,base,rdither;"
+    "    ldrb \\d, [\\base, \\rdither];"
+    "    lsl \\d, #24;"  //; shift high for the qadd w/bn
+    "    lsl \\bn, #24;" //; shift high for the qadd w/d
+    "    bne chkskip_\\@;" //; if bn==0, clear d;"
+    "    eor \\d, \\d;" //; clear d;"
+    "    m0pad;"
+    "    chkskip_\\@:;"
+    "  .endm;"
+
+    // Do the qadd8 for dithering -- there's two versions of this.  The m0 version
+    // takes advantage of the 3 cycle branch to do two things after the branch,
+    // while keeping timing constant.  The m0+, however, branches in 2 cycles, so
+    // we have to work around that a bit more.  This is one of the few times
+    // where the m0 will actually be _more_ efficient than the m0+
+    "  .macro dither5 bn,d;"
+    "  .syntax unified;"
+    "    .if fl_is_m0p == 0;"
+    "      adds \\bn, \\d;"         // do the add
+    "      bcc dither5_1_\\@;"
+    "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
+    "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
+    "      dither5_1_\\@:;"
+    "      nop;"                    // nop to keep timing in line
+    "    .else;"
+    "      adds \\bn, \\d;"         // do the add"
+    "      bcc dither5_2_\\@;"
+    "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
+    "      dither5_2_\\@:;"
+    "      bcc dither5_3_\\@;"
+    "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
+    "      dither5_3_\\@:;"
+    "    .endif;"
+    "  .syntax divided;"
+    "  .endm;"
+
+    // Do our scaling
+    "  .macro scale4 bn, base, scale, scratch;"
+    "    ldr \\scratch, [\\base, \\scale];"
+    "    lsr \\bn, \\bn, #24;"                  // bring bn back down to its low 8 bits
+    "    mul \\bn, \\scratch;"                  // do the multiply
+    "  .endm;"
+
+    // swap bn into b
+    "  .macro swapbbn1 b,bn;"
+    "    lsl \\b, \\bn, #16;"  // put the 8 bits we want for output high
+    "  .endm;"
+
+    // adjust the dithering value for the next time around (load e from memory
+    // to do the math)
+    "  .macro adjdither7 base,d,rled,eoffset,scratch;"
+    "    ldrb \\d, [\\base, \\rled];"
+    "    ldrb \\scratch,[\\base,\\eoffset];"          // load e
+    "    .syntax unified;"
+    "    subs \\d, \\scratch, \\d;"                   // d=e-d
+    "    .syntax divided;"
+    "    strb \\d, [\\base, \\rled];"                 // save d
+    "  .endm;"
+
+    // increment the led pointer (base+6 has what we're incrementing by)
+    "  .macro incleds3   leds, base, scratch;"
+    "    ldrb \\scratch, [\\base, #6];"               // load incremen
+    "    add \\leds, \\leds, \\scratch;"              // update leds pointer
+    "  .endm;"
+
+    // compare and loop
+    "  .macro cmploop5 counter,label;"
+    "    .syntax unified;"
+    "    subs \\counter, #1;"
+    "    .syntax divided;"
+    "    beq done_\\@;"
+    "    m0pad;"
+    "    b \\label;"
+    "    done_\\@:;"
+    "  .endm;"
+
+    " .endif;"
+  );
+
+#define M0_ASM_ARGS     :             \
+      [leds] "+h" (leds),             \
+      [counter] "+l" (counter),       \
+      [scratch] "+l" (scratch),       \
+      [d] "+l" (d),                   \
+      [bn] "+l" (bn),                 \
+      [b] "+l" (b)                    \
+    :                                 \
+      [port] "l" (port),              \
+      [base] "l" (base),              \
+      [bitmask] "l" (bitmask),        \
+      [hi_off] "I" (HI_OFFSET),       \
+      [lo_off] "I" (LO_OFFSET),       \
+      [led0] "I" (RO(0)),             \
+      [led1] "I" (RO(1)),             \
+      [led2] "I" (RO(2)),             \
+      [e0] "I" (3+RO(0)),             \
+      [e1] "I" (3+RO(1)),             \
+      [e2] "I" (3+RO(2)),             \
+      [scale0] "I" (4*(2+RO(0))),         \
+      [scale1] "I" (4*(2+RO(1))),         \
+      [scale2] "I" (4*(2+RO(2))),         \
+      [T1] "I" (T1),                  \
+      [T2] "I" (T2),                  \
+      [T3] "I" (T3)                   \
+    :
+
+    /////////////////////////////////////////////////////////////////////////
+    // now for some convinience macros to make building our lines a bit cleaner
+#define LOOP            "  loop_%=:"
+#define HI2             "  qset2 %[bitmask], %[port], %[hi_off];"
+#define D1              "  mod_delay %c[T1],2,0,%[scratch];"
+#define QLO4            "  qlo4 %[b],%[bitmask],%[port], %[lo_off];"
+#define LOADLEDS3(X)    "  loadleds3 %[leds], %[bn], %[led" #X "] ,%[scratch];"
+#define D2(ADJ)         "  mod_delay %c[T2],4," #ADJ ",%[scratch];"
+#define LO2             "  qset2 %[bitmask], %[port], %[lo_off];"
+#define D3(ADJ)         "  mod_delay %c[T3],2," #ADJ ",%[scratch];"
+#define LOADDITHER7(X)  "  loaddither7 %[bn], %[d], %[base], %[led" #X "];"
+#define DITHER5         "  dither5 %[bn], %[d];"
+#define SCALE4(X)       "  scale4 %[bn], %[base], %[scale" #X "], %[scratch];"
+#define SWAPBBN1        "  swapbbn1 %[b], %[bn];"
+#define ADJDITHER7(X)   "  adjdither7 %[base],%[d],%[led" #X "],%[e" #X "],%[scratch];"
+#define INCLEDS3        "  incleds3 %[leds],%[base],%[scratch];"
+#define CMPLOOP5        "  cmploop5 %[counter], loop_%=;"
+#define NOTHING         ""
+
+#if !(defined(SEI_CHK) && (FASTLED_ALLOW_INTERRUPTS == 1))
+    // We're not allowing interrupts - run the entire loop in asm to keep things
+    // as tight as possible.  In an ideal world, we should be pushing out ws281x
+    // leds (or other 3-wire leds) with zero gaps between pixels.
+    asm __volatile__ (
+      // pre-load byte 0
+    LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+
+    // loop over writing out the data
+    LOOP
+      // Write out byte 0, prepping byte 1
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 LOADLEDS3(1)    D2(3) LO2 D3(0)
+      HI2 D1 QLO4 LOADDITHER7(1)  D2(7) LO2 D3(0)
+      HI2 D1 QLO4 DITHER5         D2(5) LO2 D3(0)
+      HI2 D1 QLO4 SCALE4(1)       D2(4) LO2 D3(0)
+      HI2 D1 QLO4 ADJDITHER7(1)   D2(7) LO2 D3(0)
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 SWAPBBN1        D2(1) LO2 D3(0)
+
+      // Write out byte 1, prepping byte 2
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 LOADLEDS3(2)    D2(3) LO2 D3(0)
+      HI2 D1 QLO4 LOADDITHER7(2)  D2(7) LO2 D3(0)
+      HI2 D1 QLO4 DITHER5         D2(5) LO2 D3(0)
+      HI2 D1 QLO4 SCALE4(2)       D2(4) LO2 D3(0)
+      HI2 D1 QLO4 ADJDITHER7(2)   D2(7) LO2 D3(0)
+      HI2 D1 QLO4 INCLEDS3        D2(3) LO2 D3(0)
+      HI2 D1 QLO4 SWAPBBN1        D2(1) LO2 D3(0)
+
+      // Write out byte 2, prepping byte 0
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 LOADLEDS3(0)    D2(3) LO2 D3(0)
+      HI2 D1 QLO4 LOADDITHER7(0)  D2(7) LO2 D3(0)
+      HI2 D1 QLO4 DITHER5         D2(5) LO2 D3(0)
+      HI2 D1 QLO4 SCALE4(0)       D2(4) LO2 D3(0)
+      HI2 D1 QLO4 ADJDITHER7(0)   D2(7) LO2 D3(0)
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 SWAPBBN1        D2(1) LO2 D3(5) CMPLOOP5
+
+      M0_ASM_ARGS
+    );
+#else
+    // We're allowing interrupts - track the loop outside the asm code, to allow
+    // inserting the interrupt overrun checks.
+    asm __volatile__ (
+      // pre-load byte 0
+      LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+      M0_ASM_ARGS);
+
+    do {
+      asm __volatile__ (
+      // Write out byte 0, prepping byte 1
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 LOADLEDS3(1)    D2(3) LO2 D3(0)
+      HI2 D1 QLO4 LOADDITHER7(1)  D2(7) LO2 D3(0)
+      HI2 D1 QLO4 DITHER5         D2(5) LO2 D3(0)
+      HI2 D1 QLO4 SCALE4(1)       D2(4) LO2 D3(0)
+      HI2 D1 QLO4 ADJDITHER7(1)   D2(7) LO2 D3(0)
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 SWAPBBN1        D2(1) LO2 D3(0)
+
+      // Write out byte 1, prepping byte 2
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 LOADLEDS3(2)    D2(3) LO2 D3(0)
+      HI2 D1 QLO4 LOADDITHER7(2)  D2(7) LO2 D3(0)
+      HI2 D1 QLO4 DITHER5         D2(5) LO2 D3(0)
+      HI2 D1 QLO4 SCALE4(2)       D2(4) LO2 D3(0)
+      HI2 D1 QLO4 ADJDITHER7(2)   D2(7) LO2 D3(0)
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 SWAPBBN1        D2(1) LO2 D3(0)
+
+      // Write out byte 2, prepping byte 0
+      HI2 D1 QLO4 INCLEDS3        D2(3) LO2 D3(0)
+      HI2 D1 QLO4 LOADLEDS3(0)    D2(3) LO2 D3(0)
+      HI2 D1 QLO4 LOADDITHER7(0)  D2(7) LO2 D3(0)
+      HI2 D1 QLO4 DITHER5         D2(5) LO2 D3(0)
+      HI2 D1 QLO4 SCALE4(0)       D2(4) LO2 D3(0)
+      HI2 D1 QLO4 ADJDITHER7(0)   D2(7) LO2 D3(0)
+      HI2 D1 QLO4 NOTHING         D2(0) LO2 D3(0)
+      HI2 D1 QLO4 SWAPBBN1        D2(1) LO2 D3(5)
+
+      M0_ASM_ARGS
+      );
+      SEI_CHK; INNER_SEI; --counter; CLI_CHK;
+    } while(counter);
+#endif
+    return num_leds;
+}
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/d21/clockless_arm_d21.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/d21/clockless_arm_d21.h
@@ -0,0 +1,61 @@
+#ifndef __INC_CLOCKLESS_ARM_D21
+#define __INC_CLOCKLESS_ARM_D21
+
+#include "../common/m0clockless.h"
+FASTLED_NAMESPACE_BEGIN
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
+  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+
+  data_t mPinMask;
+  data_ptr_t mPort;
+  CMinWait<WAIT_TIME> mWait;
+public:
+  virtual void init() {
+    FastPinBB<DATA_PIN>::setOutput();
+    mPinMask = FastPinBB<DATA_PIN>::mask();
+    mPort = FastPinBB<DATA_PIN>::port();
+  }
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+    cli();
+    if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    sei();
+    mWait.mark();
+  }
+
+  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+  // gcc will use register Y for the this pointer.
+  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+    struct M0ClocklessData data;
+    data.d[0] = pixels.d[0];
+    data.d[1] = pixels.d[1];
+    data.d[2] = pixels.d[2];
+    data.s[0] = pixels.mScale[0];
+    data.s[1] = pixels.mScale[1];
+    data.s[2] = pixels.mScale[2];
+    data.e[0] = pixels.e[0];
+    data.e[1] = pixels.e[1];
+    data.e[2] = pixels.e[2];
+    data.adj = pixels.mAdvance;
+
+    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
+    return showLedData<8,4,T1,T2,T3,RGB_ORDER, WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
+  }
+
+
+};
+
+FASTLED_NAMESPACE_END
+
+
+#endif // __INC_CLOCKLESS_ARM_D21
--- a/libraries/FastLED-3.2.0/platforms/arm/d21/fastled_arm_d21.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/d21/fastled_arm_d21.h
@@ -0,0 +1,7 @@
+#ifndef __INC_FASTLED_ARM_D21_H
+#define __INC_FASTLED_ARM_D21_H
+
+#include "fastpin_arm_d21.h"
+#include "clockless_arm_d21.h"
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/d21/fastpin_arm_d21.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/d21/fastpin_arm_d21.h
@@ -0,0 +1,176 @@
+#ifndef __INC_FASTPIN_ARM_SAM_H
+#define __INC_FASTPIN_ARM_SAM_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be slightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+/// Template definition for STM32 style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower.  It will need more fine tuning.
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+
+template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, int _GRP> class _ARMPIN {
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  #if 0
+  inline static void setOutput() {
+    if(_BIT<8) {
+      _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
+    } else {
+      _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+    }
+  }
+  inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+  #endif
+
+  inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+  inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+  inline static void hi() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTSET.reg = _MASK; }
+  inline static void lo() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTCLR.reg = _MASK; }
+  // inline static void lo() __attribute__ ((always_inline)) { PORT->Group[_GRP].BSRR = (_MASK<<16); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { PORT->Group[_GRP].OUT.reg = val; }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTTGL.reg = _MASK; }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { return PORT->Group[_GRP].OUT.reg | _MASK; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return PORT->Group[_GRP].OUT.reg & ~_MASK; }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUT.reg; }
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUTSET.reg; }
+  inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUTCLR.reg; }
+  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile PortGroup * r() { return T; } };
+
+#define _IO32(L) _RD32(GPIO ## L)
+
+#define _DEFPIN_ARM(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, L> {};
+
+// Actual pin definitions
+#if defined(ARDUINO_SAMD_CIRCUITPLAYGROUND_EXPRESS)
+
+#define MAX_PIN 17
+_DEFPIN_ARM( 8,1,23);
+_DEFPIN_ARM( 0,1, 9); _DEFPIN_ARM( 1,1, 8); _DEFPIN_ARM( 2,1, 2); _DEFPIN_ARM( 3,1, 3);
+_DEFPIN_ARM( 6,0, 5); _DEFPIN_ARM( 9,0, 6); _DEFPIN_ARM(10,0, 7); _DEFPIN_ARM(12,0, 2);
+_DEFPIN_ARM(A6,1, 9); _DEFPIN_ARM(A7,1, 8); _DEFPIN_ARM(A5,1, 2); _DEFPIN_ARM(A4,1, 3);
+_DEFPIN_ARM(A1,0, 5); _DEFPIN_ARM(A2,0, 6); _DEFPIN_ARM(A3,0, 7); _DEFPIN_ARM(A0,0, 2);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_SAMD_ZERO)
+
+#define MAX_PIN 42
+_DEFPIN_ARM( 0,0,10); _DEFPIN_ARM( 1,0,11); _DEFPIN_ARM( 2,0, 8); _DEFPIN_ARM( 3,0, 9);
+_DEFPIN_ARM( 4,0,14); _DEFPIN_ARM( 5,0,15); _DEFPIN_ARM( 6,0,20); _DEFPIN_ARM( 7,0,21);
+_DEFPIN_ARM( 8,0, 6); _DEFPIN_ARM( 9,0, 7); _DEFPIN_ARM(10,0,18); _DEFPIN_ARM(11,0,16);
+_DEFPIN_ARM(12,0,19); _DEFPIN_ARM(13,0,17); _DEFPIN_ARM(14,0, 2); _DEFPIN_ARM(15,1, 8);
+_DEFPIN_ARM(16,1, 9); _DEFPIN_ARM(17,0, 4); _DEFPIN_ARM(18,0, 5); _DEFPIN_ARM(19,1, 2);
+_DEFPIN_ARM(20,0,22); _DEFPIN_ARM(21,0,23); _DEFPIN_ARM(22,0,12); _DEFPIN_ARM(23,1,11);
+_DEFPIN_ARM(24,1,10); _DEFPIN_ARM(25,1, 3); _DEFPIN_ARM(26,0,27); _DEFPIN_ARM(27,0,28);
+_DEFPIN_ARM(28,0,24); _DEFPIN_ARM(29,0,25); _DEFPIN_ARM(30,1,22); _DEFPIN_ARM(31,1,23);
+_DEFPIN_ARM(32,0,22); _DEFPIN_ARM(33,0,23); _DEFPIN_ARM(34,0,19); _DEFPIN_ARM(35,0,16);
+_DEFPIN_ARM(36,0,18); _DEFPIN_ARM(37,0,17); _DEFPIN_ARM(38,0,13); _DEFPIN_ARM(39,0,21);
+_DEFPIN_ARM(40,0, 6); _DEFPIN_ARM(41,0, 7); _DEFPIN_ARM(42,0, 3);
+
+#define SPI_DATA 24
+#define SPI_CLOCK 23
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_SODAQ_AUTONOMO)
+
+#define MAX_PIN 56
+_DEFPIN_ARM( 0,0, 9); _DEFPIN_ARM( 1,0,10); _DEFPIN_ARM( 2,0,11); _DEFPIN_ARM( 3,1,10);
+_DEFPIN_ARM( 4,1,11); _DEFPIN_ARM( 5,1,12); _DEFPIN_ARM( 6,1,13); _DEFPIN_ARM( 7,1,14);
+_DEFPIN_ARM( 8,1,15); _DEFPIN_ARM( 9,0,14); _DEFPIN_ARM(10,0,15); _DEFPIN_ARM(11,0,16);
+_DEFPIN_ARM(12,0,17); _DEFPIN_ARM(13,0,18); _DEFPIN_ARM(14,0,19); _DEFPIN_ARM(15,1,16);
+_DEFPIN_ARM(16,0, 8); _DEFPIN_ARM(17,0,28); _DEFPIN_ARM(18,1,17); _DEFPIN_ARM(19,0, 2);
+_DEFPIN_ARM(20,0, 6); _DEFPIN_ARM(21,0, 5); _DEFPIN_ARM(22,0, 4); _DEFPIN_ARM(23,1, 9);
+_DEFPIN_ARM(24,1, 8); _DEFPIN_ARM(25,1, 7); _DEFPIN_ARM(26,1, 6); _DEFPIN_ARM(27,1, 5);
+_DEFPIN_ARM(28,1, 4); _DEFPIN_ARM(29,0, 7); _DEFPIN_ARM(30,1, 3); _DEFPIN_ARM(31,1, 2);
+_DEFPIN_ARM(32,1, 1); _DEFPIN_ARM(33,1, 0); _DEFPIN_ARM(34,0, 3); _DEFPIN_ARM(35,0, 3);
+_DEFPIN_ARM(36,1,30); _DEFPIN_ARM(37,1,31); _DEFPIN_ARM(38,1,22); _DEFPIN_ARM(39,1,23);
+_DEFPIN_ARM(40,0,12); _DEFPIN_ARM(41,0,13); _DEFPIN_ARM(42,0,22); _DEFPIN_ARM(43,0,23);
+_DEFPIN_ARM(44,0,20); _DEFPIN_ARM(45,0,21); _DEFPIN_ARM(46,0,27); _DEFPIN_ARM(47,0,24);
+_DEFPIN_ARM(48,0,25); _DEFPIN_ARM(49,1,13); _DEFPIN_ARM(50,1,14); _DEFPIN_ARM(51,0,17);
+_DEFPIN_ARM(52,0,18); _DEFPIN_ARM(53,1,12); _DEFPIN_ARM(54,1,13); _DEFPIN_ARM(55,1,14);
+_DEFPIN_ARM(56,1,15);
+
+#define SPI_DATA 44
+#define SPI_CLOCK 45
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_SAMD_WINO)
+
+#define MAX_PIN 22
+_DEFPIN_ARM(  0, 0, 23); _DEFPIN_ARM(  1, 0, 22); _DEFPIN_ARM(  2, 0, 16); _DEFPIN_ARM(  3, 0, 17);
+_DEFPIN_ARM(  4, 0, 18); _DEFPIN_ARM(  5, 0, 19); _DEFPIN_ARM(  6, 0, 24); _DEFPIN_ARM(  7, 0, 25);
+_DEFPIN_ARM(  8, 0, 27); _DEFPIN_ARM(  9, 0, 28); _DEFPIN_ARM( 10, 0, 30); _DEFPIN_ARM( 11, 0, 31);
+_DEFPIN_ARM( 12, 0, 15); _DEFPIN_ARM( 13, 0, 14); _DEFPIN_ARM( 14, 0,  2); _DEFPIN_ARM( 15, 0,  3);
+_DEFPIN_ARM( 16, 0,  4); _DEFPIN_ARM( 17, 0,  5); _DEFPIN_ARM( 18, 0,  6); _DEFPIN_ARM( 19, 0,  7);
+_DEFPIN_ARM( 20, 0,  8); _DEFPIN_ARM( 21, 0,  9); _DEFPIN_ARM( 22, 0, 10); _DEFPIN_ARM( 23, 0, 11);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_SAMD_MKR1000)
+
+#define MAX_PIN 22
+_DEFPIN_ARM(  0, 0, 22); _DEFPIN_ARM(  1, 0, 23); _DEFPIN_ARM(  2, 0, 10); _DEFPIN_ARM(  3, 0, 11);
+_DEFPIN_ARM(  4, 1, 10); _DEFPIN_ARM(  5, 1, 11); _DEFPIN_ARM(  6, 0, 20); _DEFPIN_ARM(  7, 0, 21);
+_DEFPIN_ARM(  8, 0, 16); _DEFPIN_ARM(  9, 0, 17); _DEFPIN_ARM( 10, 0, 19); _DEFPIN_ARM( 11, 0,  8);
+_DEFPIN_ARM( 12, 0,  9); _DEFPIN_ARM( 13, 1, 23); _DEFPIN_ARM( 14, 1, 22); _DEFPIN_ARM( 15, 0,  2);
+_DEFPIN_ARM( 16, 1,  2); _DEFPIN_ARM( 17, 1,  3); _DEFPIN_ARM( 18, 0,  4); _DEFPIN_ARM( 19, 0,  5);
+_DEFPIN_ARM( 20, 0,  6); _DEFPIN_ARM( 21, 0,  7);
+
+#define SPI_DATA 8
+#define SPI_CLOCK 9
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_GEMMA_M0)
+
+#define MAX_PIN 4
+_DEFPIN_ARM( 0, 0, 4); _DEFPIN_ARM( 1, 0, 2); _DEFPIN_ARM( 2, 0, 5); 
+_DEFPIN_ARM( 3, 0, 0); _DEFPIN_ARM( 4, 0, 1);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ADAFRUIT_TRINKET_M0)
+
+#define MAX_PIN 5
+_DEFPIN_ARM( 0, 0, 8); _DEFPIN_ARM( 1, 0, 2); _DEFPIN_ARM( 2, 0, 9);
+_DEFPIN_ARM( 3, 0, 7); _DEFPIN_ARM( 4, 0, 6);
+
+#define SPI_DATA  4
+#define SPI_CLOCK 3
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#endif
+
+
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+
+#endif // __INC_FASTPIN_ARM_SAM_H
--- a/libraries/FastLED-3.2.0/platforms/arm/d21/led_sysdefs_arm_d21.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/d21/led_sysdefs_arm_d21.h
@@ -0,0 +1,26 @@
+#ifndef __INC_LED_SYSDEFS_ARM_D21_H
+#define __INC_LED_SYSDEFS_ARM_D21_H
+
+
+#define FASTLED_ARM
+#define FASTLED_ARM_M0_PLUS
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 0
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+// reusing/abusing cli/sei defs for due
+#define cli()  __disable_irq();
+#define sei() __enable_irq();
+
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/clockless_arm_k20.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/clockless_arm_k20.h
@@ -0,0 +1,124 @@
+#ifndef __INC_CLOCKLESS_ARM_K20_H
+#define __INC_CLOCKLESS_ARM_K20_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the k20 family of chips, like that used in the teensy 3.0/3.1
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY3)
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+		mPinMask = FastPin<DATA_PIN>::mask();
+		mPort = FastPin<DATA_PIN>::port();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+		if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    mWait.mark();
+  }
+
+	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
+		for(register uint32_t i = BITS-1; i > 0; i--) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+			FastPin<DATA_PIN>::fastset(port, hi);
+			if(b&0x80) {
+				while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+				FastPin<DATA_PIN>::fastset(port, lo);
+			} else {
+				while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+				FastPin<DATA_PIN>::fastset(port, lo);
+			}
+			b <<= 1;
+		}
+
+		while(ARM_DWT_CYCCNT < next_mark);
+		next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+		FastPin<DATA_PIN>::fastset(port, hi);
+
+		if(b&0x80) {
+			while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+			FastPin<DATA_PIN>::fastset(port, lo);
+		} else {
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+			FastPin<DATA_PIN>::fastset(port, lo);
+		}
+	}
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+	    // Get access to the clock
+		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
+		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+		ARM_DWT_CYCCNT = 0;
+
+		register data_ptr_t port = FastPin<DATA_PIN>::port();
+		register data_t hi = *port | FastPin<DATA_PIN>::mask();;
+		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+		*port = lo;
+
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		register uint8_t b = pixels.loadAndScale0();
+
+		cli();
+		uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+		while(pixels.has(1)) {
+			pixels.stepDithering();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+			}
+
+			hi = *port | FastPin<DATA_PIN>::mask();
+			lo = *port & ~FastPin<DATA_PIN>::mask();
+			#endif
+			// Write first byte, read next byte
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.loadAndScale1();
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.loadAndScale2();
+
+			// Write third byte, read 1st byte of next pixel
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.advanceAndLoadAndScale0();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		sei();
+		return ARM_DWT_CYCCNT;
+	}
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/clockless_block_arm_k20.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/clockless_block_arm_k20.h
@@ -0,0 +1,330 @@
+#ifndef __INC_BLOCK_CLOCKLESS_ARM_K20_H
+#define __INC_BLOCK_CLOCKLESS_ARM_K20_H
+
+// Definition for a single channel clockless controller for the k20 family of chips, like that used in the teensy 3.0/3.1
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY3)
+#define FASTLED_HAS_BLOCKLESS 1
+
+#define PORTC_FIRST_PIN 15
+#define PORTD_FIRST_PIN 2
+#define HAS_PORTDC 1
+
+#define PORT_MASK (((1<<LANES)-1) & ((FIRST_PIN==2) ? 0xFF : 0xFFF))
+
+#define MIN(X,Y) (((X)<(Y)) ? (X):(Y))
+#define USED_LANES ((FIRST_PIN==2) ? MIN(LANES,8) : MIN(LANES,12))
+
+#include <kinetis.h>
+
+FASTLED_NAMESPACE_BEGIN
+
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 40>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
+	typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<FIRST_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual int size() { return CLEDController::size() * LANES; }
+
+	virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) { 
+		mWait.wait();
+		uint32_t clocks = showRGBInternal(pixels);
+		#if FASTLED_ALLOW_INTERRUPTS == 0
+		// Adjust the timer
+		long microsTaken = CLKS_TO_MICROS(clocks);
+		MS_COUNTER += (1 + (microsTaken / 1000));
+		#endif
+
+		mWait.mark();
+	}
+
+	virtual void init() {
+		if(FIRST_PIN == PORTC_FIRST_PIN) { // PORTC
+			switch(USED_LANES) {
+				case 12: FastPin<30>::setOutput();
+				case 11: FastPin<29>::setOutput();
+				case 10: FastPin<27>::setOutput();
+				case 9: FastPin<28>::setOutput();
+				case 8: FastPin<12>::setOutput();
+				case 7: FastPin<11>::setOutput();
+				case 6: FastPin<13>::setOutput();
+				case 5: FastPin<10>::setOutput();
+				case 4: FastPin<9>::setOutput();
+				case 3: FastPin<23>::setOutput();
+				case 2: FastPin<22>::setOutput();
+				case 1: FastPin<15>::setOutput();
+			}
+		} else if(FIRST_PIN == PORTD_FIRST_PIN) { // PORTD
+			switch(USED_LANES) {
+				case 8: FastPin<5>::setOutput();
+				case 7: FastPin<21>::setOutput();
+				case 6: FastPin<20>::setOutput();
+				case 5: FastPin<6>::setOutput();
+				case 4: FastPin<8>::setOutput();
+				case 3: FastPin<7>::setOutput();
+				case 2: FastPin<14>::setOutput();
+				case 1: FastPin<2>::setOutput();
+			}
+		}
+		mPinMask = FastPin<FIRST_PIN>::mask();
+		mPort = FastPin<FIRST_PIN>::port();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+	typedef union {
+		uint8_t bytes[12];
+		uint16_t shorts[6];
+		uint32_t raw[3];
+	} Lines;
+
+	template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, PixelController<RGB_ORDER, LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
+		register Lines b2;
+		if(USED_LANES>8) {
+			transpose8<1,2>(b.bytes,b2.bytes);
+			transpose8<1,2>(b.bytes+8,b2.bytes+1);
+		} else {
+			transpose8x1(b.bytes,b2.bytes);
+		}
+		register uint8_t d = pixels.template getd<PX>(pixels);
+		register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+		for(register uint32_t i = 0; i < (USED_LANES/2); i++) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+			if(USED_LANES>8) {
+				*FastPin<FIRST_PIN>::cport() = ((~b2.shorts[i]) & PORT_MASK);
+			} else {
+				*FastPin<FIRST_PIN>::cport() = ((~b2.bytes[7-i]) & PORT_MASK);
+			}
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T3));
+			*FastPin<FIRST_PIN>::cport() = PORT_MASK;
+
+			b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+			b.bytes[i+(USED_LANES/2)] = pixels.template loadAndScale<PX>(pixels,i+(USED_LANES/2),d,scale);
+		}
+
+		// if folks use an odd numnber of lanes, get the last byte's value here
+		if(USED_LANES & 0x01) {
+			b.bytes[USED_LANES-1] = pixels.template loadAndScale<PX>(pixels,USED_LANES-1,d,scale);
+		}
+
+		for(register uint32_t i = USED_LANES/2; i < 8; i++) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+			if(USED_LANES>8) {
+				*FastPin<FIRST_PIN>::cport() = ((~b2.shorts[i]) & PORT_MASK);
+			} else {
+				// b2.bytes[0] = 0;
+				*FastPin<FIRST_PIN>::cport() = ((~b2.bytes[7-i]) & PORT_MASK);
+			}
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T3));
+			*FastPin<FIRST_PIN>::cport() = PORT_MASK;
+
+		}
+	}
+
+
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+		static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+		// Get access to the clock
+		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
+		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+		ARM_DWT_CYCCNT = 0;
+
+		// Setup the pixel controller and load/scale the first byte
+		allpixels.preStepFirstByteDithering();
+		register Lines b0;
+
+		allpixels.preStepFirstByteDithering();
+		for(int i = 0; i < USED_LANES; i++) {
+			b0.bytes[i] = allpixels.loadAndScale0(i);
+		}
+
+		cli();
+		uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+		while(allpixels.has(1)) {
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-5)*CLKS_PER_US)) { sei(); return ARM_DWT_CYCCNT; }
+			}
+			#endif
+			allpixels.stepDithering();
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+			allpixels.advanceData();
+
+			// Write third byte
+			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		return ARM_DWT_CYCCNT;
+	}
+};
+
+#define PMASK ((1<<(LANES))-1)
+#define PMASK_HI (PMASK>>8 & 0xFF)
+#define PMASK_LO (PMASK & 0xFF)
+
+template <uint8_t LANES, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PMASK> {
+	typedef typename FastPin<PORTC_FIRST_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<PORTC_FIRST_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		static_assert(LANES <= 16, "Maximum of 16 lanes for Teensy parallel controllers!");
+		// FastPin<30>::setOutput();
+		// FastPin<29>::setOutput();
+		// FastPin<27>::setOutput();
+		// FastPin<28>::setOutput();
+		switch(LANES) {
+			case 16: FastPin<12>::setOutput();
+			case 15: FastPin<11>::setOutput();
+			case 14: FastPin<13>::setOutput();
+			case 13: FastPin<10>::setOutput();
+			case 12: FastPin<9>::setOutput();
+			case 11: FastPin<23>::setOutput();
+			case 10: FastPin<22>::setOutput();
+			case 9:  FastPin<15>::setOutput();
+
+			case 8:  FastPin<5>::setOutput();
+			case 7:  FastPin<21>::setOutput();
+			case 6:  FastPin<20>::setOutput();
+			case 5:  FastPin<6>::setOutput();
+			case 4:  FastPin<8>::setOutput();
+			case 3:  FastPin<7>::setOutput();
+			case 2:  FastPin<14>::setOutput();
+			case 1:  FastPin<2>::setOutput();
+		}
+	}
+
+	virtual void showPixels(PixelController<RGB_ORDER, LANES, PMASK> & pixels) { 
+		mWait.wait();
+		uint32_t clocks = showRGBInternal(pixels);
+		#if FASTLED_ALLOW_INTERRUPTS == 0
+		// Adjust the timer
+		long microsTaken = CLKS_TO_MICROS(clocks);
+		MS_COUNTER += (1 + (microsTaken / 1000));
+		#endif
+
+		mWait.mark();
+	}
+
+	typedef union {
+		uint8_t bytes[16];
+		uint16_t shorts[8];
+		uint32_t raw[4];
+	} Lines;
+
+	template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, PixelController<RGB_ORDER,LANES, PMASK> &pixels) { // , register uint32_t & b2)  {
+		register Lines b2;
+		transpose8x1(b.bytes,b2.bytes);
+		transpose8x1(b.bytes+8,b2.bytes+8);
+		register uint8_t d = pixels.template getd<PX>(pixels);
+		register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+		for(register uint32_t i = 0; (i < LANES) && (i < 8); i++) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+			*FastPin<PORTD_FIRST_PIN>::sport() = PMASK_LO;
+			*FastPin<PORTC_FIRST_PIN>::sport() = PMASK_HI;
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+6));
+			*FastPin<PORTD_FIRST_PIN>::cport() = ((~b2.bytes[7-i]) & PMASK_LO);
+			*FastPin<PORTC_FIRST_PIN>::cport() = ((~b2.bytes[15-i]) & PMASK_HI);
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T3));
+			*FastPin<PORTD_FIRST_PIN>::cport() = PMASK_LO;
+			*FastPin<PORTC_FIRST_PIN>::cport() = PMASK_HI;
+
+			b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+			if(LANES==16 || (LANES>8 && ((i+8) < LANES))) {
+				b.bytes[i+8] = pixels.template loadAndScale<PX>(pixels,i+8,d,scale);
+			}
+		}
+	}
+
+
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+		static uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, PMASK> &allpixels) {
+		// Get access to the clock
+		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
+		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+		ARM_DWT_CYCCNT = 0;
+
+		// Setup the pixel controller and load/scale the first byte
+		allpixels.preStepFirstByteDithering();
+		register Lines b0;
+
+		allpixels.preStepFirstByteDithering();
+		for(int i = 0; i < LANES; i++) {
+			b0.bytes[i] = allpixels.loadAndScale0(i);
+		}
+
+		cli();
+		uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+		while(allpixels.has(1)) {
+			allpixels.stepDithering();
+			#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return ARM_DWT_CYCCNT; }
+			}
+			#endif
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+			allpixels.advanceData();
+
+			// Write third byte
+			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+
+			#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+		sei();
+
+		return ARM_DWT_CYCCNT;
+	}
+};
+
+FASTLED_NAMESPACE_END
+
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/fastled_arm_k20.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/fastled_arm_k20.h
@@ -0,0 +1,13 @@
+#ifndef __INC_FASTLED_ARM_K20_H
+#define __INC_FASTLED_ARM_K20_H
+
+// Include the k20 headers
+#include "fastpin_arm_k20.h"
+#include "fastspi_arm_k20.h"
+#include "octows2811_controller.h"
+#include "ws2812serial_controller.h"
+#include "smartmatrix_t3.h"
+#include "clockless_arm_k20.h"
+#include "clockless_block_arm_k20.h"
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/fastpin_arm_k20.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/fastpin_arm_k20.h
@@ -0,0 +1,120 @@
+#ifndef __FASTPIN_ARM_K20_H
+#define __FASTPIN_ARM_K20_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be sloightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+
+/// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower.  It will need more fine tuning.
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { _PSOR::r() = _MASK; }
+	inline static void lo() __attribute__ ((always_inline)) { _PCOR::r() = _MASK; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { _PDOR::r() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { _PTOR::r() = _MASK; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return _PDOR::r() | _MASK; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return _PDOR::r() & ~_MASK; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PDOR::r(); }
+	inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_PSOR::r(); }
+	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_PCOR::r(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+/// Template definition for teensy 3.0 style ARM pins using bit banding, providing direct access to the various GPIO registers.  GCC
+/// does a poor job of optimizing around these accesses so they are not being used just yet.
+template<uint8_t PIN, int _BIT, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN_BITBAND {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; }
+	inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { *_PTOR::template rx<_BIT>() = 1; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi();  }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return 1; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return 1; }
+};
+
+// Macros for k20 pin access/definition
+#define GPIO_BITBAND_ADDR(reg, bit) (((uint32_t)&(reg) - 0x40000000) * 32 + (bit) * 4 + 0x42000000)
+#define GPIO_BITBAND_PTR(reg, bit) ((uint32_t *)GPIO_BITBAND_ADDR((reg), (bit)))
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
+	template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
+#define _IO32(L) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR);
+
+#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; \
+									template<> class FastPinBB<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+ 																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {};
+
+// Actual pin definitions
+#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
+
+_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
+
+#define MAX_PIN 33
+_DEFPIN_ARM(0, 16, B); _DEFPIN_ARM(1, 17, B); _DEFPIN_ARM(2, 0, D); _DEFPIN_ARM(3, 12, A);
+_DEFPIN_ARM(4, 13, A); _DEFPIN_ARM(5, 7, D); _DEFPIN_ARM(6, 4, D); _DEFPIN_ARM(7, 2, D);
+_DEFPIN_ARM(8, 3, D); _DEFPIN_ARM(9, 3, C); _DEFPIN_ARM(10, 4, C); _DEFPIN_ARM(11, 6, C);
+_DEFPIN_ARM(12, 7, C); _DEFPIN_ARM(13, 5, C); _DEFPIN_ARM(14, 1, D); _DEFPIN_ARM(15, 0, C);
+_DEFPIN_ARM(16, 0, B); _DEFPIN_ARM(17, 1, B); _DEFPIN_ARM(18, 3, B); _DEFPIN_ARM(19, 2, B);
+_DEFPIN_ARM(20, 5, D); _DEFPIN_ARM(21, 6, D); _DEFPIN_ARM(22, 1, C); _DEFPIN_ARM(23, 2, C);
+_DEFPIN_ARM(24, 5, A); _DEFPIN_ARM(25, 19, B); _DEFPIN_ARM(26, 1, E); _DEFPIN_ARM(27, 9, C);
+_DEFPIN_ARM(28, 8, C); _DEFPIN_ARM(29, 10, C); _DEFPIN_ARM(30, 11, C); _DEFPIN_ARM(31, 0, E);
+_DEFPIN_ARM(32, 18, B); _DEFPIN_ARM(33, 4, A);
+
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+#define SPI1            (*(SPI_t *)0x4002D000)
+
+#define SPI2_DATA 7
+#define SPI2_CLOCK 14
+
+#define FASTLED_TEENSY3
+#define ARM_HARDWARE_SPI
+#define HAS_HARDWARE_PIN_SUPPORT
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+#endif // __INC_FASTPIN_ARM_K20
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/fastspi_arm_k20.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/fastspi_arm_k20.h
@@ -0,0 +1,466 @@
+#ifndef __INC_FASTSPI_ARM_H
+#define __INC_FASTSPI_ARM_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
+
+// Version 1.20 renamed SPI_t to KINETISK_SPI_t
+#if TEENSYDUINO >= 120
+#define SPI_t KINETISK_SPI_t
+#endif
+
+#ifndef KINETISK_SPI0
+#define KINETISK_SPI0 SPI0
+#endif
+
+#ifndef SPI_PUSHR_CONT
+#define SPI_PUSHR_CONT SPIX.PUSHR_CONT
+#define SPI_PUSHR_CTAS(X) SPIX.PUSHR_CTAS(X)
+#define SPI_PUSHR_EOQ SPIX.PUSHR_EOQ
+#define SPI_PUSHR_CTCNT SPIX.PUSHR_CTCNT
+#define SPI_PUSHR_PCS(X) SPIX.PUSHR_PCS(X)
+#endif
+
+// Template function that, on compilation, expands to a constant representing the highest bit set in a byte.  Right now,
+// if no bits are set (value is 0), it returns 0, which is also the value returned if the lowest bit is the only bit
+// set (the zero-th bit).  Unclear if I  will want this to change at some point.
+template<int VAL, int BIT> class BitWork {
+public:
+	static int highestBit() __attribute__((always_inline)) { return (VAL & 1 << BIT) ? BIT : BitWork<VAL, BIT-1>::highestBit(); }
+};
+template<int VAL> class BitWork<VAL, 0> {
+public:
+	static int highestBit() __attribute__((always_inline)) { return 0; }
+};
+
+#define MAX(A, B) (( (A) > (B) ) ? (A) : (B))
+
+#define USE_CONT 0
+// intra-frame backup data
+struct SPIState {
+	uint32_t _ctar0,_ctar1;
+	uint32_t pins[4];
+};
+
+// extern SPIState gState;
+
+
+// Templated function to translate a clock divider value into the prescalar, scalar, and clock doubling setting for the world.
+template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint32_t & dbl) {
+	switch(VAL) {
+		// Handle the dbl clock cases
+		case 0: case 1:
+		case 2: preScalar = 0; scalar = 0; dbl = 1; break;
+		case 3: preScalar = 1; scalar = 0; dbl = 1; break;
+		case 5: preScalar = 2; scalar = 0; dbl = 1; break;
+		case 7: preScalar = 3; scalar = 0; dbl = 1; break;
+
+		// Handle the scalar value 6 cases (since it's not a power of two, it won't get caught
+		// below)
+		case 9: preScalar = 1; scalar = 2; dbl = 1; break;
+		case 18: case 19: preScalar = 1; scalar = 2; dbl = 0; break;
+
+		case 15: preScalar = 2; scalar = 2; dbl = 1; break;
+		case 30: case 31: preScalar = 2; scalar = 2; dbl = 0; break;
+
+		case 21: case 22: case 23: preScalar = 3; scalar = 2; dbl = 1; break;
+		case 42: case 43: case 44: case 45: case 46: case 47: preScalar = 3; scalar = 2; dbl = 0; break;
+		default: {
+			int p2 = BitWork<VAL/2, 15>::highestBit();
+			int p3 = BitWork<VAL/3, 15>::highestBit();
+			int p5 = BitWork<VAL/5, 15>::highestBit();
+			int p7 = BitWork<VAL/7, 15>::highestBit();
+
+			int w2 = 2 * (1 << p2);
+			int w3 = (VAL/3) > 0 ? 3 * (1 << p3) : 0;
+			int w5 = (VAL/5) > 0 ? 5 * (1 << p5) : 0;
+			int w7 = (VAL/7) > 0 ? 7 * (1 << p7) : 0;
+
+			int maxval = MAX(MAX(w2, w3), MAX(w5, w7));
+
+			if(w2 == maxval) { preScalar = 0; scalar = p2; }
+			else if(w3 == maxval) { preScalar = 1; scalar = p3; }
+			else if(w5 == maxval) { preScalar = 2; scalar = p5; }
+			else if(w7 == maxval) { preScalar = 3; scalar = p7; }
+
+			dbl = 0;
+			if(scalar == 0) { dbl = 1; }
+			else if(scalar < 3) { scalar--; }
+		}
+	}
+	return;
+}
+
+#define SPIX (*(SPI_t*)pSPIX)
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+class ARMHardwareSPIOutput {
+	Selectable *m_pSelect;
+	SPIState gState;
+
+	// Borrowed from the teensy3 SPSR emulation code -- note, enabling pin 7 disables pin 11 (and vice versa),
+	// and likewise enabling pin 14 disables pin 13 (and vice versa)
+	inline void enable_pins(void) __attribute__((always_inline)) {
+		//serial_print("enable_pins\n");
+		switch(_DATA_PIN) {
+			case 7:
+				CORE_PIN7_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+			case 11:
+				CORE_PIN11_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN7_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+		}
+
+		switch(_CLOCK_PIN) {
+			case 13:
+				CORE_PIN13_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN14_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+			case 14:
+				CORE_PIN14_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+		}
+	}
+
+	// Borrowed from the teensy3 SPSR emulation code.  We disable the pins that we're using, and restore the state on the pins that we aren't using
+	inline void disable_pins(void) __attribute__((always_inline)) {
+		switch(_DATA_PIN) {
+			case 7: CORE_PIN7_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN11_CONFIG = gState.pins[1]; break;
+			case 11: CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN7_CONFIG = gState.pins[0]; break;
+		}
+
+		switch(_CLOCK_PIN) {
+			case 13: CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN14_CONFIG = gState.pins[3]; break;
+			case 14: CORE_PIN14_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN13_CONFIG = gState.pins[2]; break;
+		}
+	}
+
+	static inline void update_ctars(uint32_t ctar0, uint32_t ctar1) __attribute__((always_inline)) {
+		if(SPIX.CTAR0 == ctar0 && SPIX.CTAR1 == ctar1) return;
+		uint32_t mcr = SPIX.MCR;
+		if(mcr & SPI_MCR_MDIS) {
+			SPIX.CTAR0 = ctar0;
+			SPIX.CTAR1 = ctar1;
+		} else {
+			SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+			SPIX.CTAR0 = ctar0;
+			SPIX.CTAR1 = ctar1;
+			SPIX.MCR = mcr;
+		}
+	}
+
+	static inline void update_ctar0(uint32_t ctar) __attribute__((always_inline)) {
+		if (SPIX.CTAR0 == ctar) return;
+		uint32_t mcr = SPIX.MCR;
+		if (mcr & SPI_MCR_MDIS) {
+			SPIX.CTAR0 = ctar;
+		} else {
+			SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+			SPIX.CTAR0 = ctar;
+
+			SPIX.MCR = mcr;
+		}
+	}
+
+	static inline void update_ctar1(uint32_t ctar) __attribute__((always_inline)) {
+		if (SPIX.CTAR1 == ctar) return;
+		uint32_t mcr = SPIX.MCR;
+		if (mcr & SPI_MCR_MDIS) {
+			SPIX.CTAR1 = ctar;
+		} else {
+			SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+			SPIX.CTAR1 = ctar;
+			SPIX.MCR = mcr;
+
+		}
+	}
+
+	void setSPIRate() {
+		// Configure CTAR0, defaulting to 8 bits and CTAR1, defaulting to 16 bits
+		uint32_t _PBR = 0;
+		uint32_t _BR = 0;
+		uint32_t _CSSCK = 0;
+		uint32_t _DBR = 0;
+
+		// if(_SPI_CLOCK_DIVIDER >= 256) 		{ _PBR = 0; _BR = _CSSCK = 7; _DBR = 0; } // osc/256
+		// else if(_SPI_CLOCK_DIVIDER >= 128) 	{ _PBR = 0; _BR = _CSSCK = 6; _DBR = 0; } // osc/128
+		// else if(_SPI_CLOCK_DIVIDER >= 64) 	{ _PBR = 0; _BR = _CSSCK = 5; _DBR = 0; } // osc/64
+		// else if(_SPI_CLOCK_DIVIDER >= 32) 	{ _PBR = 0; _BR = _CSSCK = 4; _DBR = 0; } // osc/32
+		// else if(_SPI_CLOCK_DIVIDER >= 16) 	{ _PBR = 0; _BR = _CSSCK = 3; _DBR = 0; } // osc/16
+		// else if(_SPI_CLOCK_DIVIDER >= 8) 	{ _PBR = 0; _BR = _CSSCK = 1; _DBR = 0; } // osc/8
+		// else if(_SPI_CLOCK_DIVIDER >= 7) 	{ _PBR = 3; _BR = _CSSCK = 0; _DBR = 1; } // osc/7
+		// else if(_SPI_CLOCK_DIVIDER >= 5) 	{ _PBR = 2; _BR = _CSSCK = 0; _DBR = 1; } // osc/5
+		// else if(_SPI_CLOCK_DIVIDER >= 4) 	{ _PBR = 0; _BR = _CSSCK = 0; _DBR = 0; } // osc/4
+		// else if(_SPI_CLOCK_DIVIDER >= 3) 	{ _PBR = 1; _BR = _CSSCK = 0; _DBR = 1; } // osc/3
+		// else                                { _PBR = 0; _BR = _CSSCK = 0; _DBR = 1; } // osc/2
+
+		getScalars<_SPI_CLOCK_DIVIDER>(_PBR, _BR, _DBR);
+		_CSSCK = _BR;
+
+		uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+		uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+
+		#if USE_CONT == 1
+		ctar0 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+		ctar1 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+		#endif
+
+		if(_DBR) {
+			ctar0 |= SPI_CTAR_DBR;
+			ctar1 |= SPI_CTAR_DBR;
+		}
+
+		update_ctars(ctar0,ctar1);
+	}
+
+	void inline save_spi_state() __attribute__ ((always_inline)) {
+		// save ctar data
+		gState._ctar0 = SPIX.CTAR0;
+		gState._ctar1 = SPIX.CTAR1;
+
+		// save data for the not-us pins
+		gState.pins[0] = CORE_PIN7_CONFIG;
+		gState.pins[1] = CORE_PIN11_CONFIG;
+		gState.pins[2] = CORE_PIN13_CONFIG;
+		gState.pins[3] = CORE_PIN14_CONFIG;
+	}
+
+	void inline restore_spi_state() __attribute__ ((always_inline)) {
+		// restore ctar data
+		update_ctars(gState._ctar0,gState._ctar1);
+
+		// restore data for the not-us pins (not necessary because disable_pins will do this)
+		// CORE_PIN7_CONFIG = gState.pins[0];
+		// CORE_PIN11_CONFIG = gState.pins[1];
+		// CORE_PIN13_CONFIG = gState.pins[2];
+		// CORE_PIN14_CONFIG = gState.pins[3];
+	}
+
+
+public:
+	ARMHardwareSPIOutput() { m_pSelect = NULL; }
+	ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+
+	void init() {
+		// set the pins to output
+		FastPin<_DATA_PIN>::setOutput();
+		FastPin<_CLOCK_PIN>::setOutput();
+
+		// Enable SPI0 clock
+		uint32_t sim6 = SIM_SCGC6;
+		if((SPI_t*)pSPIX == &KINETISK_SPI0) {
+			if (!(sim6 & SIM_SCGC6_SPI0)) {
+				//serial_print("init1\n");
+				SIM_SCGC6 = sim6 | SIM_SCGC6_SPI0;
+				SPIX.CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1);
+			}
+		} else if((SPI_t*)pSPIX == &SPI1) {
+			if (!(sim6 & SIM_SCGC6_SPI1)) {
+				//serial_print("init1\n");
+				SIM_SCGC6 = sim6 | SIM_SCGC6_SPI1;
+				SPIX.CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1);
+			}
+		}
+
+		// Configure SPI as the master and enable
+		SPIX.MCR |= SPI_MCR_MSTR; // | SPI_MCR_CONT_SCKE);
+		SPIX.MCR &= ~(SPI_MCR_MDIS | SPI_MCR_HALT);
+
+		// pin/spi configuration happens on select
+	}
+
+	static void waitFully() __attribute__((always_inline)) {
+		// Wait for the last byte to get shifted into the register
+		bool empty = false;
+
+		do {
+			cli();
+			if ((SPIX.SR & 0xF000) > 0) {
+				// reset the TCF flag
+				SPIX.SR |= SPI_SR_TCF;
+			} else {
+				empty = true;
+			}
+			sei();
+		} while (!empty);
+
+		// wait for the TCF flag to get set
+		while (!(SPIX.SR & SPI_SR_TCF));
+		SPIX.SR |= (SPI_SR_TCF | SPI_SR_EOQF);
+	}
+
+	static bool needwait() __attribute__((always_inline)) { return (SPIX.SR & 0x4000); }
+	static void wait() __attribute__((always_inline)) { while( (SPIX.SR & 0x4000) );  }
+	static void wait1() __attribute__((always_inline)) { while( (SPIX.SR & 0xF000) >= 0x2000);  }
+
+	enum ECont { CONT, NOCONT };
+	enum EWait { PRE, POST, NONE };
+	enum ELast { NOTLAST, LAST };
+
+	#if USE_CONT == 1
+	#define CM CONT
+	#else
+	#define CM NOCONT
+	#endif
+	#define WM PRE
+
+	template<ECont CONT_STATE, EWait WAIT_STATE, ELast LAST_STATE> class Write {
+	public:
+		static void writeWord(uint16_t w) __attribute__((always_inline)) {
+			if(WAIT_STATE == PRE) { wait(); }
+			cli();
+			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+			SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
+			SPIX.SR |= SPI_SR_TCF;
+			sei();
+			if(WAIT_STATE == POST) { wait(); }
+		}
+
+		static void writeByte(uint8_t b) __attribute__((always_inline)) {
+			if(WAIT_STATE == PRE) { wait(); }
+			cli();
+			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+			SPI_PUSHR_CTAS(0) | (b & 0xFF);
+			SPIX.SR |= SPI_SR_TCF;
+			sei();
+			if(WAIT_STATE == POST) { wait(); }
+		}
+	};
+
+	static void writeWord(uint16_t w) __attribute__((always_inline)) { wait(); cli(); SPIX.PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF; sei(); }
+	static void writeWordNoWait(uint16_t w) __attribute__((always_inline)) { cli(); SPIX.PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF; sei(); }
+
+	static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); cli(); SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF; sei(); }
+	static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { cli(); SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF);SPIX.SR |= SPI_SR_TCF; sei(); wait(); }
+	static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { cli(); SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF; sei(); }
+
+	static void writeWordCont(uint16_t w) __attribute__((always_inline)) { wait(); cli(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF; sei(); }
+	static void writeWordContNoWait(uint16_t w) __attribute__((always_inline)) { cli(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;  sei();}
+
+	static void writeByteCont(uint8_t b) __attribute__((always_inline)) { wait(); cli(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;  sei(); }
+	static void writeByteContPostWait(uint8_t b) __attribute__((always_inline)) { cli(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;  sei(); wait(); }
+	static void writeByteContNoWait(uint8_t b) __attribute__((always_inline)) { cli(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF; sei(); }
+
+	// not the most efficient mechanism in the world - but should be enough for sm16716 and friends
+	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+		uint32_t ctar1_save = SPIX.CTAR1;
+
+		// Clear out the FMSZ bits, reset them for 1 bit transferd for the start bit
+		uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(0);
+		update_ctar1(ctar1);
+
+		writeWord( (b & (1 << BIT)) != 0);
+
+		update_ctar1(ctar1_save);
+	}
+
+	void inline select() __attribute__((always_inline)) {
+		save_spi_state();
+		if(m_pSelect != NULL) { m_pSelect->select(); }
+		setSPIRate();
+		enable_pins();
+	}
+
+	void inline release() __attribute__((always_inline)) {
+		disable_pins();
+		if(m_pSelect != NULL) { m_pSelect->release(); }
+		restore_spi_state();
+	}
+
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) { Write<CM, WM, NOTLAST>::writeByte(value); }
+	}
+
+	void writeBytesValue(uint8_t value, int len) {
+		select();
+		while(len--) {
+			writeByte(value);
+		}
+		waitFully();
+		release();
+	}
+
+	// Write a block of n uint8_ts out
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		uint8_t *end = data + len;
+		select();
+		// could be optimized to write 16bit words out instead of 8bit bytes
+		while(data != end) {
+			writeByte(D::adjust(*data++));
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		select();
+		int len = pixels.mLen;
+
+		// Setup the pixel controller
+		if((FLAGS & FLAG_START_BIT) == 0) {
+			//If no start bit stupiditiy, write out as many 16-bit blocks as we can
+			while(pixels.has(2)) {
+				// Load and write out the first two bytes
+				if(WM == NONE) { wait1(); }
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale0()) << 8 | D::adjust(pixels.loadAndScale1()));
+
+				// Load and write out the next two bytes (step dithering, advance data in between since we
+				// cross pixels here)
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale2()) << 8 | D::adjust(pixels.stepAdvanceAndLoadAndScale0()));
+
+				// Load and write out the next two bytes
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale1()) << 8 | D::adjust(pixels.loadAndScale2()));
+				pixels.stepDithering();
+				pixels.advanceData();
+			}
+
+			if(pixels.has(1)) {
+				if(WM == NONE) { wait1(); }
+				// write out the rest as alternating 16/8-bit blocks (likely to be just one)
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale0()) << 8 | D::adjust(pixels.loadAndScale1()));
+				Write<CM, WM, NOTLAST>::writeByte(D::adjust(pixels.loadAndScale2()));
+			}
+
+			D::postBlock(len);
+			waitFully();
+		} else if(FLAGS & FLAG_START_BIT) {
+			uint32_t ctar1_save = SPIX.CTAR1;
+
+			// Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit
+			uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(8);
+			update_ctar1(ctar1);
+
+			while(pixels.has(1)) {
+				writeWord( 0x100 | D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+				pixels.advanceData();
+				pixels.stepDithering();
+			}
+			D::postBlock(len);
+			waitFully();
+
+			// restore ctar1
+			update_ctar1(ctar1_save);
+		}
+		release();
+	}
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/led_sysdefs_arm_k20.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/led_sysdefs_arm_k20.h
@@ -0,0 +1,46 @@
+#ifndef __INC_LED_SYSDEFS_ARM_K20_H
+#define __INC_LED_SYSDEFS_ARM_K20_H
+
+#define FASTLED_TEENSY3
+#define FASTLED_ARM
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+#if (F_CPU == 96000000)
+#define CLK_DBL 1
+#endif
+
+// Get some system include files
+#include <avr/io.h>
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Define the register types
+#if defined(ARDUINO) // && ARDUINO < 150
+typedef volatile       uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+#endif
+
+extern volatile uint32_t systick_millis_count;
+#  define MS_COUNTER systick_millis_count
+
+
+// Default to using PROGMEM, since TEENSY3 provides it
+// even though all it does is ignore it.  Just being
+// conservative here in case TEENSY3 changes.
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/octows2811_controller.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/octows2811_controller.h
@@ -0,0 +1,66 @@
+#ifndef __INC_OCTOWS2811_CONTROLLER_H
+#define __INC_OCTOWS2811_CONTROLLER_H
+
+#ifdef USE_OCTOWS2811
+
+// #include "OctoWS2811.h"
+
+FASTLED_NAMESPACE_BEGIN
+
+template<EOrder RGB_ORDER = GRB, uint8_t CHIP = WS2811_800kHz>
+class COctoWS2811Controller : public CPixelLEDController<RGB_ORDER, 8, 0xFF> {
+  OctoWS2811  *pocto;
+  uint8_t *drawbuffer,*framebuffer;
+
+  void _init(int nLeds) {
+    if(pocto == NULL) {
+      drawbuffer = (uint8_t*)malloc(nLeds * 8 * 3);
+      framebuffer = (uint8_t*)malloc(nLeds * 8 * 3);
+
+      // byte ordering is handled in show by the pixel controller
+      int config = WS2811_RGB;
+      config |= CHIP;
+
+      pocto = new OctoWS2811(nLeds, framebuffer, drawbuffer, config);
+
+      pocto->begin();
+    }
+  }
+public:
+  COctoWS2811Controller() { pocto = NULL; }
+
+
+  virtual void init() { /* do nothing yet */ }
+
+  typedef union {
+    uint8_t bytes[8];
+    uint32_t raw[2];
+  } Lines;
+
+  virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
+    _init(pixels.size());
+
+    uint8_t *pData = drawbuffer;
+    while(pixels.has(1)) {
+      Lines b;
+
+      for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale0(i); }
+      transpose8x1_MSB(b.bytes,pData); pData += 8;
+      for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale1(i); }
+      transpose8x1_MSB(b.bytes,pData); pData += 8;
+      for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale2(i); }
+      transpose8x1_MSB(b.bytes,pData); pData += 8;
+      pixels.stepDithering();
+      pixels.advanceData();
+    }
+
+    pocto->show();
+  }
+
+};
+
+FASTLED_NAMESPACE_END
+
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/smartmatrix_t3.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/smartmatrix_t3.h
@@ -0,0 +1,55 @@
+#ifndef __INC_SMARTMATRIX_T3_H
+#define __INC_SMARTMATRIX_T3_H
+
+#ifdef SmartMatrix_h
+#include <SmartMatrix.h>
+
+FASTLED_NAMESPACE_BEGIN
+
+extern SmartMatrix *pSmartMatrix;
+
+// note - dmx simple must be included before FastSPI for this code to be enabled
+class CSmartMatrixController : public CPixelLEDController<RGB_ORDER> {
+  SmartMatrix matrix;
+
+public:
+  // initialize the LED controller
+  virtual void init() {
+      // Initialize 32x32 LED Matrix
+    matrix.begin();
+    matrix.setBrightness(255);
+    matrix.setColorCorrection(ccNone);
+
+    // Clear screen
+    clearLeds(0);
+    matrix.swapBuffers();
+    pSmartMatrix = &matrix;
+  }
+
+  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    if(SMART_MATRIX_CAN_TRIPLE_BUFFER) {
+      rgb24 *md = matrix.getRealBackBuffer();
+    } else {
+      rgb24 *md = matrix.backBuffer();
+    }
+    while(pixels.has(1)) {
+      md->red = pixels.loadAndScale0();
+      md->green = pixels.loadAndScale1();
+      md->blue = pixels.loadAndScale2();
+      md++;
+      pixels.advanceData();
+      pixels.stepDithering();
+    }
+    matrix.swapBuffers();
+    if(SMART_MATRIX_CAN_TRIPLE_BUFFER && pixels.advanceBy() > 0) {
+      matrix.setBackBuffer(pixels.mData);
+    }
+  }
+
+};
+
+FASTLED_NAMESPACE_END
+
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k20/ws2812serial_controller.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k20/ws2812serial_controller.h
@@ -0,0 +1,46 @@
+#ifndef __INC_WS2812SERIAL_CONTROLLER_H
+#define __INC_WS2812SERIAL_CONTROLLER_H
+
+#ifdef USE_WS2812SERIAL
+
+FASTLED_NAMESPACE_BEGIN
+
+template<int DATA_PIN, EOrder RGB_ORDER>
+class CWS2812SerialController : public CPixelLEDController<RGB_ORDER, 8, 0xFF> {
+  WS2812Serial *pserial;
+  uint8_t *drawbuffer,*framebuffer;
+
+  void _init(int nLeds) {
+    if (pserial == NULL) {
+      drawbuffer = (uint8_t*)malloc(nLeds * 3);
+      framebuffer = (uint8_t*)malloc(nLeds * 12);
+      pserial = new WS2812Serial(nLeds, framebuffer, drawbuffer, DATA_PIN, WS2812_RGB);
+      pserial->begin();
+    }
+  }
+public:
+  CWS2812SerialController() { pserial = NULL; }
+
+  virtual void init() { /* do nothing yet */ }
+
+  virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
+    _init(pixels.size());
+
+    uint8_t *p = drawbuffer;
+
+    while(pixels.has(1)) {
+      *p++ = pixels.loadAndScale0();
+      *p++ = pixels.loadAndScale1();
+      *p++ = pixels.loadAndScale2();
+      pixels.stepDithering();
+      pixels.advanceData();
+    }
+    pserial->show();
+  }
+
+};
+
+FASTLED_NAMESPACE_END
+
+#endif // USE_WS2812SERIAL
+#endif // __INC_WS2812SERIAL_CONTROLLER_H
--- a/libraries/FastLED-3.2.0/platforms/arm/k66/clockless_arm_k66.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k66/clockless_arm_k66.h
@@ -0,0 +1,124 @@
+#ifndef __INC_CLOCKLESS_ARM_K66_H
+#define __INC_CLOCKLESS_ARM_K66_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the k66 family of chips, like that used in the teensy 3.6
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY3)
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+		mPinMask = FastPin<DATA_PIN>::mask();
+		mPort = FastPin<DATA_PIN>::port();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+		if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    mWait.mark();
+  }
+
+	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
+		for(register uint32_t i = BITS-1; i > 0; i--) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+			FastPin<DATA_PIN>::fastset(port, hi);
+			if(b&0x80) {
+				while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+				FastPin<DATA_PIN>::fastset(port, lo);
+			} else {
+				while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+				FastPin<DATA_PIN>::fastset(port, lo);
+			}
+			b <<= 1;
+		}
+
+		while(ARM_DWT_CYCCNT < next_mark);
+		next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+		FastPin<DATA_PIN>::fastset(port, hi);
+
+		if(b&0x80) {
+			while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+			FastPin<DATA_PIN>::fastset(port, lo);
+		} else {
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+			FastPin<DATA_PIN>::fastset(port, lo);
+		}
+	}
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+	    // Get access to the clock
+		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
+		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+		ARM_DWT_CYCCNT = 0;
+
+		register data_ptr_t port = FastPin<DATA_PIN>::port();
+		register data_t hi = *port | FastPin<DATA_PIN>::mask();;
+		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+		*port = lo;
+
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		register uint8_t b = pixels.loadAndScale0();
+
+		cli();
+		uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+		while(pixels.has(1)) {
+			pixels.stepDithering();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+			}
+
+			hi = *port | FastPin<DATA_PIN>::mask();
+			lo = *port & ~FastPin<DATA_PIN>::mask();
+			#endif
+			// Write first byte, read next byte
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.loadAndScale1();
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.loadAndScale2();
+
+			// Write third byte, read 1st byte of next pixel
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.advanceAndLoadAndScale0();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		sei();
+		return ARM_DWT_CYCCNT;
+	}
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k66/clockless_block_arm_k66.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k66/clockless_block_arm_k66.h
@@ -0,0 +1,344 @@
+#ifndef __INC_BLOCK_CLOCKLESS_ARM_K66_H
+#define __INC_BLOCK_CLOCKLESS_ARM_K66_H
+
+// Definition for a single channel clockless controller for the k66 family of chips, like that used in the teensy 3.6
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY3)
+#define FASTLED_HAS_BLOCKLESS 1
+
+#define PORTB_FIRST_PIN 0
+#define PORTC_FIRST_PIN 15
+#define PORTD_FIRST_PIN 2
+#define HAS_PORTDC 1
+
+#define LANE_MASK (((1<<LANES)-1) & ((FIRST_PIN==2) ? 0xFF : 0xFFF))
+#define PORT_SHIFT(P) ((P) << ((FIRST_PIN==0) ? 16 : 0))
+#define PORT_MASK PORT_SHIFT(LANE_MASK)
+
+#define MIN(X,Y) (((X)<(Y)) ? (X):(Y))
+#define USED_LANES ((FIRST_PIN!=15) ? MIN(LANES,8) : MIN(LANES,12))
+
+#include <kinetis.h>
+
+FASTLED_NAMESPACE_BEGIN
+
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 40>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, LANE_MASK> {
+	typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<FIRST_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual int size() { return CLEDController::size() * LANES; }
+
+	virtual void showPixels(PixelController<RGB_ORDER, LANES, LANE_MASK> & pixels) { 
+		mWait.wait();
+		uint32_t clocks = showRGBInternal(pixels);
+		#if FASTLED_ALLOW_INTERRUPTS == 0
+		// Adjust the timer
+		long microsTaken = CLKS_TO_MICROS(clocks);
+		MS_COUNTER += (1 + (microsTaken / 1000));
+		#endif
+
+		mWait.mark();
+	}
+
+	virtual void init() {
+		if(FIRST_PIN == PORTC_FIRST_PIN) { // PORTC
+			switch(USED_LANES) {
+				case 12: FastPin<30>::setOutput();
+				case 11: FastPin<29>::setOutput();
+				case 10: FastPin<27>::setOutput();
+				case 9: FastPin<28>::setOutput();
+				case 8: FastPin<12>::setOutput();
+				case 7: FastPin<11>::setOutput();
+				case 6: FastPin<13>::setOutput();
+				case 5: FastPin<10>::setOutput();
+				case 4: FastPin<9>::setOutput();
+				case 3: FastPin<23>::setOutput();
+				case 2: FastPin<22>::setOutput();
+				case 1: FastPin<15>::setOutput();
+			}
+		} else if(FIRST_PIN == PORTD_FIRST_PIN) { // PORTD
+			switch(USED_LANES) {
+				case 8: FastPin<5>::setOutput();
+				case 7: FastPin<21>::setOutput();
+				case 6: FastPin<20>::setOutput();
+				case 5: FastPin<6>::setOutput();
+				case 4: FastPin<8>::setOutput();
+				case 3: FastPin<7>::setOutput();
+				case 2: FastPin<14>::setOutput();
+				case 1: FastPin<2>::setOutput();
+			}
+		} else if (FIRST_PIN == PORTB_FIRST_PIN) { // PORTB
+			switch (USED_LANES) {
+				case 8: FastPin<45>::setOutput();
+				case 7: FastPin<44>::setOutput();
+				case 6: FastPin<46>::setOutput();
+				case 5: FastPin<43>::setOutput();
+				case 4: FastPin<30>::setOutput();
+				case 3: FastPin<29>::setOutput();
+				case 2: FastPin<1>::setOutput();
+				case 1: FastPin<0>::setOutput();
+			}
+		}
+		mPinMask = FastPin<FIRST_PIN>::mask();
+		mPort = FastPin<FIRST_PIN>::port();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+	typedef union {
+		uint8_t bytes[12];
+		uint16_t shorts[6];
+		uint32_t raw[3];
+	} Lines;
+
+	template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, PixelController<RGB_ORDER, LANES, LANE_MASK> &pixels) { // , register uint32_t & b2)  {
+		register Lines b2;
+		if(USED_LANES>8) {
+			transpose8<1,2>(b.bytes,b2.bytes);
+			transpose8<1,2>(b.bytes+8,b2.bytes+1);
+		} else {
+			transpose8x1(b.bytes,b2.bytes);
+		}
+		register uint8_t d = pixels.template getd<PX>(pixels);
+		register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+		for(register uint32_t i = 0; i < (USED_LANES/2); i++) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+			if(USED_LANES>8) {
+				*FastPin<FIRST_PIN>::cport() = ((~b2.shorts[i]) & PORT_MASK);
+			} else {
+				*FastPin<FIRST_PIN>::cport() = (PORT_SHIFT(~b2.bytes[7-i]) & PORT_MASK);
+			}
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T3));
+			*FastPin<FIRST_PIN>::cport() = PORT_MASK;
+
+			b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+			b.bytes[i+(USED_LANES/2)] = pixels.template loadAndScale<PX>(pixels,i+(USED_LANES/2),d,scale);
+		}
+
+		// if folks use an odd numnber of lanes, get the last byte's value here
+		if(USED_LANES & 0x01) {
+			b.bytes[USED_LANES-1] = pixels.template loadAndScale<PX>(pixels,USED_LANES-1,d,scale);
+		}
+
+		for(register uint32_t i = USED_LANES/2; i < 8; i++) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+			if(USED_LANES>8) {
+				*FastPin<FIRST_PIN>::cport() = ((~b2.shorts[i]) & PORT_MASK);
+			} else {
+				// b2.bytes[0] = 0;
+				*FastPin<FIRST_PIN>::cport() = (PORT_SHIFT(~b2.bytes[7-i]) & PORT_MASK);
+			}
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T3));
+			*FastPin<FIRST_PIN>::cport() = PORT_MASK;
+
+		}
+	}
+
+
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+		static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, LANE_MASK> &allpixels) {
+		// Get access to the clock
+		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
+		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+		ARM_DWT_CYCCNT = 0;
+
+		// Setup the pixel controller and load/scale the first byte
+		allpixels.preStepFirstByteDithering();
+		register Lines b0;
+
+		allpixels.preStepFirstByteDithering();
+		for(int i = 0; i < USED_LANES; i++) {
+			b0.bytes[i] = allpixels.loadAndScale0(i);
+		}
+
+		cli();
+		uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+		while(allpixels.has(1)) {
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-5)*CLKS_PER_US)) { sei(); return ARM_DWT_CYCCNT; }
+			}
+			#endif
+			allpixels.stepDithering();
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+			allpixels.advanceData();
+
+			// Write third byte
+			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		return ARM_DWT_CYCCNT;
+	}
+};
+
+#define PMASK ((1<<(LANES))-1)
+#define PMASK_HI (PMASK>>8 & 0xFF)
+#define PMASK_LO (PMASK & 0xFF)
+
+template <uint8_t LANES, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PMASK> {
+	typedef typename FastPin<PORTC_FIRST_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<PORTC_FIRST_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		static_assert(LANES <= 16, "Maximum of 16 lanes for Teensy parallel controllers!");
+		// FastPin<30>::setOutput();
+		// FastPin<29>::setOutput();
+		// FastPin<27>::setOutput();
+		// FastPin<28>::setOutput();
+		switch(LANES) {
+			case 16: FastPin<12>::setOutput();
+			case 15: FastPin<11>::setOutput();
+			case 14: FastPin<13>::setOutput();
+			case 13: FastPin<10>::setOutput();
+			case 12: FastPin<9>::setOutput();
+			case 11: FastPin<23>::setOutput();
+			case 10: FastPin<22>::setOutput();
+			case 9:  FastPin<15>::setOutput();
+
+			case 8:  FastPin<5>::setOutput();
+			case 7:  FastPin<21>::setOutput();
+			case 6:  FastPin<20>::setOutput();
+			case 5:  FastPin<6>::setOutput();
+			case 4:  FastPin<8>::setOutput();
+			case 3:  FastPin<7>::setOutput();
+			case 2:  FastPin<14>::setOutput();
+			case 1:  FastPin<2>::setOutput();
+		}
+	}
+
+	virtual void showPixels(PixelController<RGB_ORDER, LANES, PMASK> & pixels) { 
+		mWait.wait();
+		uint32_t clocks = showRGBInternal(pixels);
+		#if FASTLED_ALLOW_INTERRUPTS == 0
+		// Adjust the timer
+		long microsTaken = CLKS_TO_MICROS(clocks);
+		MS_COUNTER += (1 + (microsTaken / 1000));
+		#endif
+
+		mWait.mark();
+	}
+
+	typedef union {
+		uint8_t bytes[16];
+		uint16_t shorts[8];
+		uint32_t raw[4];
+	} Lines;
+
+	template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, PixelController<RGB_ORDER,LANES, PMASK> &pixels) { // , register uint32_t & b2)  {
+		register Lines b2;
+		transpose8x1(b.bytes,b2.bytes);
+		transpose8x1(b.bytes+8,b2.bytes+8);
+		register uint8_t d = pixels.template getd<PX>(pixels);
+		register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+		for(register uint32_t i = 0; (i < LANES) && (i < 8); i++) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+			*FastPin<PORTD_FIRST_PIN>::sport() = PMASK_LO;
+			*FastPin<PORTC_FIRST_PIN>::sport() = PMASK_HI;
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+6));
+			*FastPin<PORTD_FIRST_PIN>::cport() = ((~b2.bytes[7-i]) & PMASK_LO);
+			*FastPin<PORTC_FIRST_PIN>::cport() = ((~b2.bytes[15-i]) & PMASK_HI);
+
+			while((next_mark - ARM_DWT_CYCCNT) > (T3));
+			*FastPin<PORTD_FIRST_PIN>::cport() = PMASK_LO;
+			*FastPin<PORTC_FIRST_PIN>::cport() = PMASK_HI;
+
+			b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+			if(LANES==16 || (LANES>8 && ((i+8) < LANES))) {
+				b.bytes[i+8] = pixels.template loadAndScale<PX>(pixels,i+8,d,scale);
+			}
+		}
+	}
+
+
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+		static uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, PMASK> &allpixels) {
+		// Get access to the clock
+		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
+		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+		ARM_DWT_CYCCNT = 0;
+
+		// Setup the pixel controller and load/scale the first byte
+		allpixels.preStepFirstByteDithering();
+		register Lines b0;
+
+		allpixels.preStepFirstByteDithering();
+		for(int i = 0; i < LANES; i++) {
+			b0.bytes[i] = allpixels.loadAndScale0(i);
+		}
+
+		cli();
+		uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+		while(allpixels.has(1)) {
+			allpixels.stepDithering();
+			#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return ARM_DWT_CYCCNT; }
+			}
+			#endif
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+			allpixels.advanceData();
+
+			// Write third byte
+			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+
+			#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+		sei();
+
+		return ARM_DWT_CYCCNT;
+	}
+};
+
+FASTLED_NAMESPACE_END
+
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k66/fastled_arm_k66.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k66/fastled_arm_k66.h
@@ -0,0 +1,14 @@
+#ifndef __INC_FASTLED_ARM_K66_H
+#define __INC_FASTLED_ARM_K66_H
+
+// Include the k66 headers
+#include "fastpin_arm_k66.h"
+#include "fastspi_arm_k66.h"
+#include "../k20/octows2811_controller.h"
+#include "../k20/ws2812serial_controller.h"
+#include "../k20/smartmatrix_t3.h"
+#include "clockless_arm_k66.h"
+#include "clockless_block_arm_k66.h"
+
+#endif
+
--- a/libraries/FastLED-3.2.0/platforms/arm/k66/fastpin_arm_k66.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k66/fastpin_arm_k66.h
@@ -0,0 +1,128 @@
+#ifndef __FASTPIN_ARM_K66_H
+#define __FASTPIN_ARM_K66_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be slightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+
+/// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower.  It will need more fine tuning.
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { _PSOR::r() = _MASK; }
+	inline static void lo() __attribute__ ((always_inline)) { _PCOR::r() = _MASK; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { _PDOR::r() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { _PTOR::r() = _MASK; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return _PDOR::r() | _MASK; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return _PDOR::r() & ~_MASK; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PDOR::r(); }
+	inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_PSOR::r(); }
+	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_PCOR::r(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+/// Template definition for teensy 3.0 style ARM pins using bit banding, providing direct access to the various GPIO registers.  GCC
+/// does a poor job of optimizing around these accesses so they are not being used just yet.
+template<uint8_t PIN, int _BIT, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN_BITBAND {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; }
+	inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { *_PTOR::template rx<_BIT>() = 1; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi();  }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return 1; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return 1; }
+};
+
+// Macros for k20 pin access/definition
+#define GPIO_BITBAND_ADDR(reg, bit) (((uint32_t)&(reg) - 0x40000000) * 32 + (bit) * 4 + 0x42000000)
+#define GPIO_BITBAND_PTR(reg, bit) ((uint32_t *)GPIO_BITBAND_ADDR((reg), (bit)))
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
+	template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
+#define _IO32(L) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR);
+
+#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; \
+									template<> class FastPinBB<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+ 																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {};
+
+// Actual pin definitions
+#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
+
+_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
+
+#define MAX_PIN 63
+_DEFPIN_ARM( 0, 16, B); _DEFPIN_ARM( 1, 17, B); _DEFPIN_ARM( 2,  0, D); _DEFPIN_ARM( 3, 12, A);
+_DEFPIN_ARM( 4, 13, A); _DEFPIN_ARM( 5,  7, D); _DEFPIN_ARM( 6,  4, D); _DEFPIN_ARM( 7,  2, D);
+_DEFPIN_ARM( 8,  3, D); _DEFPIN_ARM( 9,  3, C); _DEFPIN_ARM(10,  4, C); _DEFPIN_ARM(11,  6, C);
+_DEFPIN_ARM(12,  7, C); _DEFPIN_ARM(13,  5, C); _DEFPIN_ARM(14,  1, D); _DEFPIN_ARM(15,  0, C);
+_DEFPIN_ARM(16,  0, B); _DEFPIN_ARM(17,  1, B); _DEFPIN_ARM(18,  3, B); _DEFPIN_ARM(19,  2, B);
+_DEFPIN_ARM(20,  5, D); _DEFPIN_ARM(21,  6, D); _DEFPIN_ARM(22,  1, C); _DEFPIN_ARM(23,  2, C);
+_DEFPIN_ARM(24, 26, E); _DEFPIN_ARM(25,  5, A); _DEFPIN_ARM(26, 14, A); _DEFPIN_ARM(27, 15, A);
+_DEFPIN_ARM(28, 16, A); _DEFPIN_ARM(29, 18, B); _DEFPIN_ARM(30, 19, B); _DEFPIN_ARM(31, 10, B);
+_DEFPIN_ARM(32, 11, B); _DEFPIN_ARM(33, 24, E); _DEFPIN_ARM(34, 25, E); _DEFPIN_ARM(35,  8, C);
+_DEFPIN_ARM(36,  9, C); _DEFPIN_ARM(37, 10, C); _DEFPIN_ARM(38, 11, C); _DEFPIN_ARM(39, 17, A);
+_DEFPIN_ARM(40, 28, A); _DEFPIN_ARM(41, 29, A); _DEFPIN_ARM(42, 26, A); _DEFPIN_ARM(43, 20, B);
+_DEFPIN_ARM(44, 22, B); _DEFPIN_ARM(45, 23, B); _DEFPIN_ARM(46, 21, B); _DEFPIN_ARM(47,  8, D);
+_DEFPIN_ARM(48,  9, D); _DEFPIN_ARM(49,  4, B); _DEFPIN_ARM(50,  5, B); _DEFPIN_ARM(51, 14, D);
+_DEFPIN_ARM(52, 13, D); _DEFPIN_ARM(53, 12, D); _DEFPIN_ARM(54, 15, D); _DEFPIN_ARM(55, 11, D);
+_DEFPIN_ARM(56, 10, E); _DEFPIN_ARM(57, 11, E); _DEFPIN_ARM(58,  0, E); _DEFPIN_ARM(59,  1, E);
+_DEFPIN_ARM(60,  2, E); _DEFPIN_ARM(61,  3, E); _DEFPIN_ARM(62,  4, E); _DEFPIN_ARM(63,  5, E);
+
+
+
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+
+#define SPI2_DATA 7
+#define SPI2_CLOCK 14
+
+#define FASTLED_TEENSY3
+#define ARM_HARDWARE_SPI
+#define HAS_HARDWARE_PIN_SUPPORT
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+#endif // __INC_FASTPIN_ARM_K66
--- a/libraries/FastLED-3.2.0/platforms/arm/k66/fastspi_arm_k66.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k66/fastspi_arm_k66.h
@@ -0,0 +1,470 @@
+#ifndef __INC_FASTSPI_ARM_H
+#define __INC_FASTSPI_ARM_H
+
+//
+// copied from k20 code
+// changed SPI1 define to KINETISK_SPI1
+// TODO: add third alternative MOSI pin (28) and CLOCK pin (27)
+// TODO: add alternative pins for SPI1
+// TODO: add SPI2 output
+//
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
+
+// Version 1.20 renamed SPI_t to KINETISK_SPI_t
+#if TEENSYDUINO >= 120
+#define SPI_t KINETISK_SPI_t
+#endif
+
+#ifndef KINETISK_SPI0
+#define KINETISK_SPI0 SPI0
+#endif
+
+#ifndef SPI_PUSHR_CONT
+#define SPI_PUSHR_CONT SPIX.PUSHR_CONT
+#define SPI_PUSHR_CTAS(X) SPIX.PUSHR_CTAS(X)
+#define SPI_PUSHR_EOQ SPIX.PUSHR_EOQ
+#define SPI_PUSHR_CTCNT SPIX.PUSHR_CTCNT
+#define SPI_PUSHR_PCS(X) SPIX.PUSHR_PCS(X)
+#endif
+
+// Template function that, on compilation, expands to a constant representing the highest bit set in a byte.  Right now,
+// if no bits are set (value is 0), it returns 0, which is also the value returned if the lowest bit is the only bit
+// set (the zero-th bit).  Unclear if I  will want this to change at some point.
+template<int VAL, int BIT> class BitWork {
+public:
+	static int highestBit() __attribute__((always_inline)) { return (VAL & 1 << BIT) ? BIT : BitWork<VAL, BIT-1>::highestBit(); }
+};
+template<int VAL> class BitWork<VAL, 0> {
+public:
+	static int highestBit() __attribute__((always_inline)) { return 0; }
+};
+
+#define MAX(A, B) (( (A) > (B) ) ? (A) : (B))
+
+#define USE_CONT 0
+// intra-frame backup data
+struct SPIState {
+	uint32_t _ctar0,_ctar1;
+	uint32_t pins[4];
+};
+
+// extern SPIState gState;
+
+
+// Templated function to translate a clock divider value into the prescalar, scalar, and clock doubling setting for the world.
+template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint32_t & dbl) {
+	switch(VAL) {
+		// Handle the dbl clock cases
+		case 0: case 1:
+		case 2: preScalar = 0; scalar = 0; dbl = 1; break;
+		case 3: preScalar = 1; scalar = 0; dbl = 1; break;
+		case 5: preScalar = 2; scalar = 0; dbl = 1; break;
+		case 7: preScalar = 3; scalar = 0; dbl = 1; break;
+
+		// Handle the scalar value 6 cases (since it's not a power of two, it won't get caught
+		// below)
+		case 9: preScalar = 1; scalar = 2; dbl = 1; break;
+		case 18: case 19: preScalar = 1; scalar = 2; dbl = 0; break;
+
+		case 15: preScalar = 2; scalar = 2; dbl = 1; break;
+		case 30: case 31: preScalar = 2; scalar = 2; dbl = 0; break;
+
+		case 21: case 22: case 23: preScalar = 3; scalar = 2; dbl = 1; break;
+		case 42: case 43: case 44: case 45: case 46: case 47: preScalar = 3; scalar = 2; dbl = 0; break;
+		default: {
+			int p2 = BitWork<VAL/2, 15>::highestBit();
+			int p3 = BitWork<VAL/3, 15>::highestBit();
+			int p5 = BitWork<VAL/5, 15>::highestBit();
+			int p7 = BitWork<VAL/7, 15>::highestBit();
+
+			int w2 = 2 * (1 << p2);
+			int w3 = (VAL/3) > 0 ? 3 * (1 << p3) : 0;
+			int w5 = (VAL/5) > 0 ? 5 * (1 << p5) : 0;
+			int w7 = (VAL/7) > 0 ? 7 * (1 << p7) : 0;
+
+			int maxval = MAX(MAX(w2, w3), MAX(w5, w7));
+
+			if(w2 == maxval) { preScalar = 0; scalar = p2; }
+			else if(w3 == maxval) { preScalar = 1; scalar = p3; }
+			else if(w5 == maxval) { preScalar = 2; scalar = p5; }
+			else if(w7 == maxval) { preScalar = 3; scalar = p7; }
+
+			dbl = 0;
+			if(scalar == 0) { dbl = 1; }
+			else if(scalar < 3) { scalar--; }
+		}
+	}
+	return;
+}
+
+#define SPIX (*(SPI_t*)pSPIX)
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+class ARMHardwareSPIOutput {
+	Selectable *m_pSelect;
+	SPIState gState;
+
+	// Borrowed from the teensy3 SPSR emulation code -- note, enabling pin 7 disables pin 11 (and vice versa),
+	// and likewise enabling pin 14 disables pin 13 (and vice versa)
+	inline void enable_pins(void) __attribute__((always_inline)) {
+		//serial_print("enable_pins\n");
+		switch(_DATA_PIN) {
+			case 7:
+				CORE_PIN7_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+			case 11:
+				CORE_PIN11_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN7_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+		}
+
+		switch(_CLOCK_PIN) {
+			case 13:
+				CORE_PIN13_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN14_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+			case 14:
+				CORE_PIN14_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+				CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+				break;
+		}
+	}
+
+	// Borrowed from the teensy3 SPSR emulation code.  We disable the pins that we're using, and restore the state on the pins that we aren't using
+	inline void disable_pins(void) __attribute__((always_inline)) {
+		switch(_DATA_PIN) {
+			case 7: CORE_PIN7_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN11_CONFIG = gState.pins[1]; break;
+			case 11: CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN7_CONFIG = gState.pins[0]; break;
+		}
+
+		switch(_CLOCK_PIN) {
+			case 13: CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN14_CONFIG = gState.pins[3]; break;
+			case 14: CORE_PIN14_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN13_CONFIG = gState.pins[2]; break;
+		}
+	}
+
+	static inline void update_ctars(uint32_t ctar0, uint32_t ctar1) __attribute__((always_inline)) {
+		if(SPIX.CTAR0 == ctar0 && SPIX.CTAR1 == ctar1) return;
+		uint32_t mcr = SPIX.MCR;
+		if(mcr & SPI_MCR_MDIS) {
+			SPIX.CTAR0 = ctar0;
+			SPIX.CTAR1 = ctar1;
+		} else {
+			SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+			SPIX.CTAR0 = ctar0;
+			SPIX.CTAR1 = ctar1;
+			SPIX.MCR = mcr;
+		}
+	}
+
+	static inline void update_ctar0(uint32_t ctar) __attribute__((always_inline)) {
+		if (SPIX.CTAR0 == ctar) return;
+		uint32_t mcr = SPIX.MCR;
+		if (mcr & SPI_MCR_MDIS) {
+			SPIX.CTAR0 = ctar;
+		} else {
+			SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+			SPIX.CTAR0 = ctar;
+
+			SPIX.MCR = mcr;
+		}
+	}
+
+	static inline void update_ctar1(uint32_t ctar) __attribute__((always_inline)) {
+		if (SPIX.CTAR1 == ctar) return;
+		uint32_t mcr = SPIX.MCR;
+		if (mcr & SPI_MCR_MDIS) {
+			SPIX.CTAR1 = ctar;
+		} else {
+			SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+			SPIX.CTAR1 = ctar;
+			SPIX.MCR = mcr;
+
+		}
+	}
+
+	void setSPIRate() {
+		// Configure CTAR0, defaulting to 8 bits and CTAR1, defaulting to 16 bits
+		uint32_t _PBR = 0;
+		uint32_t _BR = 0;
+		uint32_t _CSSCK = 0;
+		uint32_t _DBR = 0;
+
+		// if(_SPI_CLOCK_DIVIDER >= 256) 		{ _PBR = 0; _BR = _CSSCK = 7; _DBR = 0; } // osc/256
+		// else if(_SPI_CLOCK_DIVIDER >= 128) 	{ _PBR = 0; _BR = _CSSCK = 6; _DBR = 0; } // osc/128
+		// else if(_SPI_CLOCK_DIVIDER >= 64) 	{ _PBR = 0; _BR = _CSSCK = 5; _DBR = 0; } // osc/64
+		// else if(_SPI_CLOCK_DIVIDER >= 32) 	{ _PBR = 0; _BR = _CSSCK = 4; _DBR = 0; } // osc/32
+		// else if(_SPI_CLOCK_DIVIDER >= 16) 	{ _PBR = 0; _BR = _CSSCK = 3; _DBR = 0; } // osc/16
+		// else if(_SPI_CLOCK_DIVIDER >= 8) 	{ _PBR = 0; _BR = _CSSCK = 1; _DBR = 0; } // osc/8
+		// else if(_SPI_CLOCK_DIVIDER >= 7) 	{ _PBR = 3; _BR = _CSSCK = 0; _DBR = 1; } // osc/7
+		// else if(_SPI_CLOCK_DIVIDER >= 5) 	{ _PBR = 2; _BR = _CSSCK = 0; _DBR = 1; } // osc/5
+		// else if(_SPI_CLOCK_DIVIDER >= 4) 	{ _PBR = 0; _BR = _CSSCK = 0; _DBR = 0; } // osc/4
+		// else if(_SPI_CLOCK_DIVIDER >= 3) 	{ _PBR = 1; _BR = _CSSCK = 0; _DBR = 1; } // osc/3
+		// else                                { _PBR = 0; _BR = _CSSCK = 0; _DBR = 1; } // osc/2
+
+		getScalars<_SPI_CLOCK_DIVIDER>(_PBR, _BR, _DBR);
+		_CSSCK = _BR;
+
+		uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+		uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+
+		#if USE_CONT == 1
+		ctar0 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+		ctar1 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+		#endif
+
+		if(_DBR) {
+			ctar0 |= SPI_CTAR_DBR;
+			ctar1 |= SPI_CTAR_DBR;
+		}
+
+		update_ctars(ctar0,ctar1);
+	}
+
+	void inline save_spi_state() __attribute__ ((always_inline)) {
+		// save ctar data
+		gState._ctar0 = SPIX.CTAR0;
+		gState._ctar1 = SPIX.CTAR1;
+
+		// save data for the not-us pins
+		gState.pins[0] = CORE_PIN7_CONFIG;
+		gState.pins[1] = CORE_PIN11_CONFIG;
+		gState.pins[2] = CORE_PIN13_CONFIG;
+		gState.pins[3] = CORE_PIN14_CONFIG;
+	}
+
+	void inline restore_spi_state() __attribute__ ((always_inline)) {
+		// restore ctar data
+		update_ctars(gState._ctar0,gState._ctar1);
+
+		// restore data for the not-us pins (not necessary because disable_pins will do this)
+		// CORE_PIN7_CONFIG = gState.pins[0];
+		// CORE_PIN11_CONFIG = gState.pins[1];
+		// CORE_PIN13_CONFIG = gState.pins[2];
+		// CORE_PIN14_CONFIG = gState.pins[3];
+	}
+
+
+public:
+	ARMHardwareSPIOutput() { m_pSelect = NULL; }
+	ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+
+	void init() {
+		// set the pins to output
+		FastPin<_DATA_PIN>::setOutput();
+		FastPin<_CLOCK_PIN>::setOutput();
+
+		// Enable SPI0 clock
+		uint32_t sim6 = SIM_SCGC6;
+		if((SPI_t*)pSPIX == &KINETISK_SPI0) {
+			if (!(sim6 & SIM_SCGC6_SPI0)) {
+				//serial_print("init1\n");
+				SIM_SCGC6 = sim6 | SIM_SCGC6_SPI0;
+				SPIX.CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1);
+			}
+		} else if((SPI_t*)pSPIX == &KINETISK_SPI1) {
+			if (!(sim6 & SIM_SCGC6_SPI1)) {
+				//serial_print("init1\n");
+				SIM_SCGC6 = sim6 | SIM_SCGC6_SPI1;
+				SPIX.CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1);
+			}
+		}
+
+		// Configure SPI as the master and enable
+		SPIX.MCR |= SPI_MCR_MSTR; // | SPI_MCR_CONT_SCKE);
+		SPIX.MCR &= ~(SPI_MCR_MDIS | SPI_MCR_HALT);
+
+		// pin/spi configuration happens on select
+	}
+
+	static void waitFully() __attribute__((always_inline)) {
+		// Wait for the last byte to get shifted into the register
+		bool empty = false;
+
+		do {
+			cli();
+			if ((SPIX.SR & 0xF000) > 0) {
+				// reset the TCF flag
+				SPIX.SR |= SPI_SR_TCF;
+			} else {
+				empty = true;
+			}
+			sei();
+		} while (!empty);
+
+		// wait for the TCF flag to get set
+		while (!(SPIX.SR & SPI_SR_TCF));
+		SPIX.SR |= (SPI_SR_TCF | SPI_SR_EOQF);
+	}
+
+	static bool needwait() __attribute__((always_inline)) { return (SPIX.SR & 0x4000); }
+	static void wait() __attribute__((always_inline)) { while( (SPIX.SR & 0x4000) );  }
+	static void wait1() __attribute__((always_inline)) { while( (SPIX.SR & 0xF000) >= 0x2000);  }
+
+	enum ECont { CONT, NOCONT };
+	enum EWait { PRE, POST, NONE };
+	enum ELast { NOTLAST, LAST };
+
+	#if USE_CONT == 1
+	#define CM CONT
+	#else
+	#define CM NOCONT
+	#endif
+	#define WM PRE
+
+	template<ECont CONT_STATE, EWait WAIT_STATE, ELast LAST_STATE> class Write {
+	public:
+		static void writeWord(uint16_t w) __attribute__((always_inline)) {
+			if(WAIT_STATE == PRE) { wait(); }
+			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+			SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
+			SPIX.SR |= SPI_SR_TCF;
+			if(WAIT_STATE == POST) { wait(); }
+		}
+
+		static void writeByte(uint8_t b) __attribute__((always_inline)) {
+			if(WAIT_STATE == PRE) { wait(); }
+			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+			SPI_PUSHR_CTAS(0) | (b & 0xFF);
+			SPIX.SR |= SPI_SR_TCF;
+			if(WAIT_STATE == POST) { wait(); }
+		}
+	};
+
+	static void writeWord(uint16_t w) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+	static void writeWordNoWait(uint16_t w) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+
+	static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+	static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF);SPIX.SR |= SPI_SR_TCF; wait(); }
+	static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+
+	static void writeWordCont(uint16_t w) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+	static void writeWordContNoWait(uint16_t w) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+
+	static void writeByteCont(uint8_t b) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+	static void writeByteContPostWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;wait(); }
+	static void writeByteContNoWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+
+	// not the most efficient mechanism in the world - but should be enough for sm16716 and friends
+	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+		uint32_t ctar1_save = SPIX.CTAR1;
+
+		// Clear out the FMSZ bits, reset them for 1 bit transferd for the start bit
+		uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(0);
+		update_ctar1(ctar1);
+
+		writeWord( (b & (1 << BIT)) != 0);
+
+		update_ctar1(ctar1_save);
+	}
+
+	void inline select() __attribute__((always_inline)) {
+		save_spi_state();
+		if(m_pSelect != NULL) { m_pSelect->select(); }
+		setSPIRate();
+		enable_pins();
+	}
+
+	void inline release() __attribute__((always_inline)) {
+		disable_pins();
+		if(m_pSelect != NULL) { m_pSelect->release(); }
+		restore_spi_state();
+	}
+
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) { Write<CM, WM, NOTLAST>::writeByte(value); }
+	}
+
+	void writeBytesValue(uint8_t value, int len) {
+		select();
+		while(len--) {
+			writeByte(value);
+		}
+		waitFully();
+		release();
+	}
+
+	// Write a block of n uint8_ts out
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		uint8_t *end = data + len;
+		select();
+		// could be optimized to write 16bit words out instead of 8bit bytes
+		while(data != end) {
+			writeByte(D::adjust(*data++));
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		select();
+		int len = pixels.mLen;
+
+		// Setup the pixel controller
+		if((FLAGS & FLAG_START_BIT) == 0) {
+			//If no start bit stupiditiy, write out as many 16-bit blocks as we can
+			while(pixels.has(2)) {
+				// Load and write out the first two bytes
+				if(WM == NONE) { wait1(); }
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale0()) << 8 | D::adjust(pixels.loadAndScale1()));
+
+				// Load and write out the next two bytes (step dithering, advance data in between since we
+				// cross pixels here)
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale2()) << 8 | D::adjust(pixels.stepAdvanceAndLoadAndScale0()));
+
+				// Load and write out the next two bytes
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale1()) << 8 | D::adjust(pixels.loadAndScale2()));
+				pixels.stepDithering();
+				pixels.advanceData();
+			}
+
+			if(pixels.has(1)) {
+				if(WM == NONE) { wait1(); }
+				// write out the rest as alternating 16/8-bit blocks (likely to be just one)
+				Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale0()) << 8 | D::adjust(pixels.loadAndScale1()));
+				Write<CM, WM, NOTLAST>::writeByte(D::adjust(pixels.loadAndScale2()));
+			}
+
+			D::postBlock(len);
+			waitFully();
+		} else if(FLAGS & FLAG_START_BIT) {
+			uint32_t ctar1_save = SPIX.CTAR1;
+
+			// Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit
+			uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(8);
+			update_ctar1(ctar1);
+
+			while(pixels.has(1)) {
+				writeWord( 0x100 | D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+				pixels.advanceData();
+				pixels.stepDithering();
+			}
+			D::postBlock(len);
+			waitFully();
+
+			// restore ctar1
+			update_ctar1(ctar1_save);
+		}
+		release();
+	}
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/k66/led_sysdefs_arm_k66.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/k66/led_sysdefs_arm_k66.h
@@ -0,0 +1,46 @@
+#ifndef __INC_LED_SYSDEFS_ARM_K66_H
+#define __INC_LED_SYSDEFS_ARM_K66_H
+
+#define FASTLED_TEENSY3
+#define FASTLED_ARM
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+#if (F_CPU == 192000000)
+#define CLK_DBL 1
+#endif
+
+// Get some system include files
+#include <avr/io.h>
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Define the register types
+#if defined(ARDUINO) // && ARDUINO < 150
+typedef volatile       uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+#endif
+
+extern volatile uint32_t systick_millis_count;
+#  define MS_COUNTER systick_millis_count
+
+
+// Default to using PROGMEM, since TEENSY3 provides it
+// even though all it does is ignore it.  Just being
+// conservative here in case TEENSY3 changes.
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/kl26/clockless_arm_kl26.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/kl26/clockless_arm_kl26.h
@@ -0,0 +1,65 @@
+#ifndef __INC_CLOCKLESS_ARM_KL26
+#define __INC_CLOCKLESS_ARM_KL26
+
+#include "../common/m0clockless.h"
+FASTLED_NAMESPACE_BEGIN
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
+  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+
+  data_t mPinMask;
+  data_ptr_t mPort;
+  CMinWait<WAIT_TIME> mWait;
+public:
+  virtual void init() {
+    FastPinBB<DATA_PIN>::setOutput();
+    mPinMask = FastPinBB<DATA_PIN>::mask();
+    mPort = FastPinBB<DATA_PIN>::port();
+  }
+
+  virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+    cli();
+    uint32_t clocks = showRGBInternal(pixels);
+    if(!clocks) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      clocks = showRGBInternal(pixels);
+    }
+    long microsTaken = CLKS_TO_MICROS(clocks * ((T1 + T2 + T3) * 24));
+    MS_COUNTER += (microsTaken / 1000);
+    sei();
+    mWait.mark();
+  }
+
+  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+  // gcc will use register Y for the this pointer.
+  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+    struct M0ClocklessData data;
+    data.d[0] = pixels.d[0];
+    data.d[1] = pixels.d[1];
+    data.d[2] = pixels.d[2];
+    data.s[0] = pixels.mScale[0];
+    data.s[1] = pixels.mScale[1];
+    data.s[2] = pixels.mScale[2];
+    data.e[0] = pixels.e[0];
+    data.e[1] = pixels.e[1];
+    data.e[2] = pixels.e[2];
+    data.adj = pixels.mAdvance;
+
+    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
+    return showLedData<4,8,T1,T2,T3,RGB_ORDER, WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
+    // return 0; // 0x00FFFFFF - _VAL;
+  }
+
+
+};
+
+FASTLED_NAMESPACE_END
+
+
+#endif // __INC_CLOCKLESS_ARM_KL26
--- a/libraries/FastLED-3.2.0/platforms/arm/kl26/fastled_arm_kl26.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/kl26/fastled_arm_kl26.h
@@ -0,0 +1,10 @@
+#ifndef __INC_FASTLED_ARM_KL26_H
+#define __INC_FASTLED_ARM_KL26_H
+
+// Include the k20 headers
+#include "fastpin_arm_kl26.h"
+#include "fastspi_arm_kl26.h"
+#include "clockless_arm_kl26.h"
+#include "../k20/ws2812serial_controller.h"
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/kl26/fastpin_arm_kl26.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/kl26/fastpin_arm_kl26.h
@@ -0,0 +1,88 @@
+#ifndef __FASTPIN_ARM_KL26_H
+#define __FASTPIN_ARM_KL26_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be sloightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+
+/// Template definition for teensy LC style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower.  It will need more fine tuning.
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN {
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+  inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+  inline static void hi() __attribute__ ((always_inline)) { _PSOR::r() = _MASK; }
+  inline static void lo() __attribute__ ((always_inline)) { _PCOR::r() = _MASK; }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { _PDOR::r() = val; }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { _PTOR::r() = _MASK; }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { return _PDOR::r() | _MASK; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return _PDOR::r() & ~_MASK; }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PDOR::r(); }
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_PSOR::r(); }
+  inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_PCOR::r(); }
+  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+// Macros for kl26 pin access/definition
+#define GPIO_BITBAND_ADDR(reg, bit) (((uint32_t)&(reg) - 0x40000000) * 32 + (bit) * 4 + 0x42000000)
+#define GPIO_BITBAND_PTR(reg, bit) ((uint32_t *)GPIO_BITBAND_ADDR((reg), (bit)))
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
+template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
+#define _IO32(L) _RD32(FGPIO ## L ## _PDOR); _RD32(FGPIO ## L ## _PSOR); _RD32(FGPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(FGPIO ## L ## _PDIR); _RD32(FGPIO ## L ## _PDDR);
+
+#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(FGPIO ## L ## _PDOR), _R(FGPIO ## L ## _PSOR), _R(FGPIO ## L ## _PCOR), \
+_R(GPIO ## L ## _PTOR), _R(FGPIO ## L ## _PDIR), _R(FGPIO ## L ## _PDDR)> {}; \
+/* template<> class FastPinBB<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; */
+
+// Actual pin definitions
+#if defined(FASTLED_TEENSYLC) && defined(CORE_TEENSY)
+
+_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
+
+#define MAX_PIN 26
+_DEFPIN_ARM(0, 16, B); _DEFPIN_ARM(1, 17, B); _DEFPIN_ARM(2, 0, D); _DEFPIN_ARM(3, 1, A);
+_DEFPIN_ARM(4, 2, A); _DEFPIN_ARM(5, 7, D); _DEFPIN_ARM(6, 4, D); _DEFPIN_ARM(7, 2, D);
+_DEFPIN_ARM(8, 3, D); _DEFPIN_ARM(9, 3, C); _DEFPIN_ARM(10, 4, C); _DEFPIN_ARM(11, 6, C);
+_DEFPIN_ARM(12, 7, C); _DEFPIN_ARM(13, 5, C); _DEFPIN_ARM(14, 1, D); _DEFPIN_ARM(15, 0, C);
+_DEFPIN_ARM(16, 0, B); _DEFPIN_ARM(17, 1, B); _DEFPIN_ARM(18, 3, B); _DEFPIN_ARM(19, 2, B);
+_DEFPIN_ARM(20, 5, D); _DEFPIN_ARM(21, 6, D); _DEFPIN_ARM(22, 1, C); _DEFPIN_ARM(23, 2, C);
+_DEFPIN_ARM(24, 20, E); _DEFPIN_ARM(25, 21, E); _DEFPIN_ARM(26, 30, E);
+
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+// #define SPI1            (*(SPI_t *)0x4002D000)
+
+#define SPI2_DATA 0
+#define SPI2_CLOCK 20
+
+#define HAS_HARDWARE_PIN_SUPPORT
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+#endif // __INC_FASTPIN_ARM_K20
--- a/libraries/FastLED-3.2.0/platforms/arm/kl26/fastspi_arm_kl26.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/kl26/fastspi_arm_kl26.h
@@ -0,0 +1,252 @@
+#ifndef __INC_FASTSPI_ARM_KL26_H
+#define __INC_FASTSPI_ARM_KL26_h
+
+FASTLED_NAMESPACE_BEGIN
+
+template <int VAL> void getScalars(uint8_t & sppr, uint8_t & spr) {
+  if(VAL > 4096) { sppr=7; spr=8; }
+  else if(VAL > 3584) { sppr=6; spr=8; }
+  else if(VAL > 3072) { sppr=5; spr=8; }
+  else if(VAL > 2560) { sppr=4; spr=8; }
+  else if(VAL > 2048) { sppr=7; spr=7; }
+  else if(VAL > 2048) { sppr=3; spr=8; }
+  else if(VAL > 1792) { sppr=6; spr=7; }
+  else if(VAL > 1536) { sppr=5; spr=7; }
+  else if(VAL > 1536) { sppr=2; spr=8; }
+  else if(VAL > 1280) { sppr=4; spr=7; }
+  else if(VAL > 1024) { sppr=7; spr=6; }
+  else if(VAL > 1024) { sppr=3; spr=7; }
+  else if(VAL > 1024) { sppr=1; spr=8; }
+  else if(VAL > 896) { sppr=6; spr=6; }
+  else if(VAL > 768) { sppr=5; spr=6; }
+  else if(VAL > 768) { sppr=2; spr=7; }
+  else if(VAL > 640) { sppr=4; spr=6; }
+  else if(VAL > 512) { sppr=7; spr=5; }
+  else if(VAL > 512) { sppr=3; spr=6; }
+  else if(VAL > 512) { sppr=1; spr=7; }
+  else if(VAL > 512) { sppr=0; spr=8; }
+  else if(VAL > 448) { sppr=6; spr=5; }
+  else if(VAL > 384) { sppr=5; spr=5; }
+  else if(VAL > 384) { sppr=2; spr=6; }
+  else if(VAL > 320) { sppr=4; spr=5; }
+  else if(VAL > 256) { sppr=7; spr=4; }
+  else if(VAL > 256) { sppr=3; spr=5; }
+  else if(VAL > 256) { sppr=1; spr=6; }
+  else if(VAL > 256) { sppr=0; spr=7; }
+  else if(VAL > 224) { sppr=6; spr=4; }
+  else if(VAL > 192) { sppr=5; spr=4; }
+  else if(VAL > 192) { sppr=2; spr=5; }
+  else if(VAL > 160) { sppr=4; spr=4; }
+  else if(VAL > 128) { sppr=7; spr=3; }
+  else if(VAL > 128) { sppr=3; spr=4; }
+  else if(VAL > 128) { sppr=1; spr=5; }
+  else if(VAL > 128) { sppr=0; spr=6; }
+  else if(VAL > 112) { sppr=6; spr=3; }
+  else if(VAL > 96) { sppr=5; spr=3; }
+  else if(VAL > 96) { sppr=2; spr=4; }
+  else if(VAL > 80) { sppr=4; spr=3; }
+  else if(VAL > 64) { sppr=7; spr=2; }
+  else if(VAL > 64) { sppr=3; spr=3; }
+  else if(VAL > 64) { sppr=1; spr=4; }
+  else if(VAL > 64) { sppr=0; spr=5; }
+  else if(VAL > 56) { sppr=6; spr=2; }
+  else if(VAL > 48) { sppr=5; spr=2; }
+  else if(VAL > 48) { sppr=2; spr=3; }
+  else if(VAL > 40) { sppr=4; spr=2; }
+  else if(VAL > 32) { sppr=7; spr=1; }
+  else if(VAL > 32) { sppr=3; spr=2; }
+  else if(VAL > 32) { sppr=1; spr=3; }
+  else if(VAL > 32) { sppr=0; spr=4; }
+  else if(VAL > 28) { sppr=6; spr=1; }
+  else if(VAL > 24) { sppr=5; spr=1; }
+  else if(VAL > 24) { sppr=2; spr=2; }
+  else if(VAL > 20) { sppr=4; spr=1; }
+  else if(VAL > 16) { sppr=7; spr=0; }
+  else if(VAL > 16) { sppr=3; spr=1; }
+  else if(VAL > 16) { sppr=1; spr=2; }
+  else if(VAL > 16) { sppr=0; spr=3; }
+  else if(VAL > 14) { sppr=6; spr=0; }
+  else if(VAL > 12) { sppr=5; spr=0; }
+  else if(VAL > 12) { sppr=2; spr=1; }
+  else if(VAL > 10) { sppr=4; spr=0; }
+  else if(VAL > 8) { sppr=3; spr=0; }
+  else if(VAL > 8) { sppr=1; spr=1; }
+  else if(VAL > 8) { sppr=0; spr=2; }
+  else if(VAL > 6) { sppr=2; spr=0; }
+  else if(VAL > 4) { sppr=1; spr=0; }
+  else if(VAL > 4) { sppr=0; spr=1; }
+  else /* if(VAL > 2) */ { sppr=0; spr=0; }
+}
+
+
+#define SPIX (*(KINETISL_SPI_t*)pSPIX)
+#define ARM_HARDWARE_SPI
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+class ARMHardwareSPIOutput {
+  Selectable *m_pSelect;
+
+  static inline void enable_pins(void) __attribute__((always_inline)) {
+    switch(_DATA_PIN) {
+      case 0: CORE_PIN0_CONFIG =  PORT_PCR_MUX(2); break;
+      case 1: CORE_PIN1_CONFIG =  PORT_PCR_MUX(5); break;
+      case 7: CORE_PIN7_CONFIG =  PORT_PCR_MUX(2); break;
+      case 8: CORE_PIN8_CONFIG =  PORT_PCR_MUX(5); break;
+      case 11: CORE_PIN11_CONFIG =  PORT_PCR_MUX(2); break;
+      case 12: CORE_PIN12_CONFIG =  PORT_PCR_MUX(5); break;
+      case 21: CORE_PIN21_CONFIG =  PORT_PCR_MUX(2); break;
+    }
+
+    switch(_CLOCK_PIN) {
+      case 13: CORE_PIN13_CONFIG =  PORT_PCR_MUX(2); break;
+      case 14: CORE_PIN14_CONFIG =  PORT_PCR_MUX(2); break;
+      case 20: CORE_PIN20_CONFIG =  PORT_PCR_MUX(2); break;
+    }
+  }
+
+  static inline void disable_pins(void) __attribute((always_inline)) {
+    switch(_DATA_PIN) {
+      case 0: CORE_PIN0_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 1: CORE_PIN1_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 7: CORE_PIN7_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 8: CORE_PIN8_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 11: CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 12: CORE_PIN12_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 21: CORE_PIN21_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+    }
+
+    switch(_CLOCK_PIN) {
+      case 13: CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 14: CORE_PIN14_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+      case 20: CORE_PIN20_CONFIG = PORT_PCR_SRE | PORT_PCR_MUX(1); break;
+    }
+  }
+
+  void setSPIRate() {
+    uint8_t sppr, spr;
+    getScalars<_SPI_CLOCK_DIVIDER>(sppr, spr);
+
+    // Set the speed
+    SPIX.BR = SPI_BR_SPPR(sppr) | SPI_BR_SPR(spr);
+
+    // Also, force 8 bit transfers (don't want to juggle 8/16 since that flushes the world)
+    SPIX.C2 = 0;
+    SPIX.C1 |= SPI_C1_SPE;
+  }
+
+public:
+  ARMHardwareSPIOutput() { m_pSelect = NULL; }
+  ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+
+  // set the object representing the selectable
+  void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+  // initialize the SPI subssytem
+  void init() {
+    FastPin<_DATA_PIN>::setOutput();
+    FastPin<_CLOCK_PIN>::setOutput();
+
+    // Enable the SPI clocks
+    uint32_t sim4 = SIM_SCGC4;
+    if ((pSPIX == 0x40076000) && !(sim4 & SIM_SCGC4_SPI0)) {
+      SIM_SCGC4 = sim4 | SIM_SCGC4_SPI0;
+    }
+
+    if ( (pSPIX == 0x40077000) && !(sim4 & SIM_SCGC4_SPI1)) {
+      SIM_SCGC4 = sim4 | SIM_SCGC4_SPI1;
+    }
+
+    SPIX.C1 = SPI_C1_MSTR | SPI_C1_SPE;
+    SPIX.C2 = 0;
+    SPIX.BR = SPI_BR_SPPR(1) | SPI_BR_SPR(0);
+  }
+
+  // latch the CS select
+  void inline select() __attribute__((always_inline)) {
+    if(m_pSelect != NULL) { m_pSelect->select(); }
+    setSPIRate();
+    enable_pins();
+  }
+
+
+  // release the CS select
+  void inline release() __attribute__((always_inline)) {
+    disable_pins();
+    if(m_pSelect != NULL) { m_pSelect->release(); }
+  }
+
+  // Wait for the world to be clear
+  static void wait() __attribute__((always_inline)) { while(!(SPIX.S & SPI_S_SPTEF));  }
+
+  // wait until all queued up data has been written
+  void waitFully() { wait(); }
+
+  // not the most efficient mechanism in the world - but should be enough for sm16716 and friends
+  template <uint8_t BIT> inline static void writeBit(uint8_t b) { /* TODO */ }
+
+  // write a byte out via SPI (returns immediately on writing register)
+  static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPIX.DL = b; }
+  // write a word out via SPI (returns immediately on writing register)
+  static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w & 0xFF); }
+
+  // A raw set of writing byte values, assumes setup/init/waiting done elsewhere (static for use by adjustment classes)
+  static void writeBytesValueRaw(uint8_t value, int len) {
+    while(len--) { writeByte(value); }
+  }
+
+  // A full cycle of writing a value for len bytes, including select, release, and waiting
+  void writeBytesValue(uint8_t value, int len) {
+    setSPIRate();
+    select();
+    while(len--) {
+      writeByte(value);
+    }
+    waitFully();
+    release();
+  }
+
+  // A full cycle of writing a raw block of data out, including select, release, and waiting
+  template <class D> void writeBytes(register uint8_t *data, int len) {
+    setSPIRate();
+    uint8_t *end = data + len;
+    select();
+    // could be optimized to write 16bit words out instead of 8bit bytes
+    while(data != end) {
+      writeByte(D::adjust(*data++));
+    }
+    D::postBlock(len);
+    waitFully();
+    release();
+  }
+
+  void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+
+  template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+    int len = pixels.mLen;
+
+    select();
+    while(pixels.has(1)) {
+      if(FLAGS & FLAG_START_BIT) {
+        writeBit<0>(1);
+        writeByte(D::adjust(pixels.loadAndScale0()));
+        writeByte(D::adjust(pixels.loadAndScale1()));
+        writeByte(D::adjust(pixels.loadAndScale2()));
+      } else {
+        writeByte(D::adjust(pixels.loadAndScale0()));
+        writeByte(D::adjust(pixels.loadAndScale1()));
+        writeByte(D::adjust(pixels.loadAndScale2()));
+      }
+
+      pixels.advanceData();
+      pixels.stepDithering();
+    }
+    D::postBlock(len);
+    release();
+  }
+
+};
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/kl26/led_sysdefs_arm_kl26.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/kl26/led_sysdefs_arm_kl26.h
@@ -0,0 +1,47 @@
+#ifndef __INC_LED_SYSDEFS_ARM_KL26_H
+#define __INC_LED_SYSDEFS_ARM_KL26_H
+
+#define FASTLED_TEENSYLC
+#define FASTLED_ARM
+#define FASTLED_ARM_M0_PLUS
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+#define FASTLED_SPI_BYTE_ONLY
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+#if (F_CPU == 96000000)
+#define CLK_DBL 1
+#endif
+
+// Get some system include files
+#include <avr/io.h>
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Define the register types
+#if defined(ARDUINO) // && ARDUINO < 150
+typedef volatile       uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+#endif
+
+extern volatile uint32_t systick_millis_count;
+#  define MS_COUNTER systick_millis_count
+
+// Default to using PROGMEM since TEENSYLC provides it
+// even though all it does is ignore it.  Just being
+// conservative here in case TEENSYLC changes.
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/nrf51/clockless_arm_nrf51.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/nrf51/clockless_arm_nrf51.h
@@ -0,0 +1,83 @@
+#ifndef __INC_CLOCKLESS_ARM_NRF51
+#define __INC_CLOCKLESS_ARM_NRF51
+
+#if defined(NRF51)
+
+#include <nrf51_bitfields.h>
+#define FASTLED_HAS_CLOCKLESS 1
+
+#if (FASTLED_ALLOW_INTERRUPTS==1)
+#define SEI_CHK LED_TIMER->CC[0] = (WAIT_TIME * (F_CPU/1000000)); LED_TIMER->TASKS_CLEAR; LED_TIMER->EVENTS_COMPARE[0] = 0;
+#define CLI_CHK cli(); if(LED_TIMER->EVENTS_COMPARE[0]) { LED_TIMER->TASKS_STOP = 1; return 0; }
+#define INNER_SEI sei();
+#else
+#define SEI_CHK
+#define CLI_CHK
+#define INNER_SEI delaycycles<1>();
+#endif
+
+
+#include "../common/m0clockless.h"
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 75>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
+  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+
+  data_t mPinMask;
+  data_ptr_t mPort;
+  CMinWait<WAIT_TIME> mWait;
+public:
+  virtual void init() {
+    FastPinBB<DATA_PIN>::setOutput();
+    mPinMask = FastPinBB<DATA_PIN>::mask();
+    mPort = FastPinBB<DATA_PIN>::port();
+  }
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+    cli();
+    if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    sei();
+    mWait.mark();
+  }
+
+  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+  // gcc will use register Y for the this pointer.
+  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+    struct M0ClocklessData data;
+    data.d[0] = pixels.d[0];
+    data.d[1] = pixels.d[1];
+    data.d[2] = pixels.d[2];
+    data.s[0] = pixels.mScale[0];
+    data.s[1] = pixels.mScale[1];
+    data.s[2] = pixels.mScale[2];
+    data.e[0] = pixels.e[0];
+    data.e[1] = pixels.e[1];
+    data.e[2] = pixels.e[2];
+    data.adj = pixels.mAdvance;
+
+    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
+
+    // timer mode w/prescaler of 0
+    LED_TIMER->MODE = TIMER_MODE_MODE_Timer;
+    LED_TIMER->PRESCALER = 0;
+    LED_TIMER->EVENTS_COMPARE[0] = 0;
+    LED_TIMER->BITMODE = TIMER_BITMODE_BITMODE_16Bit;
+    LED_TIMER->SHORTS = TIMER_SHORTS_COMPARE0_CLEAR_Msk;
+    LED_TIMER->TASKS_START = 1;
+
+    int ret = showLedData<4,8,T1,T2,T3,RGB_ORDER,WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
+
+    LED_TIMER->TASKS_STOP = 1;
+    return ret; // 0x00FFFFFF - _VAL;
+  }
+};
+
+
+#endif // NRF51
+#endif // __INC_CLOCKLESS_ARM_NRF51
--- a/libraries/FastLED-3.2.0/platforms/arm/nrf51/fastled_arm_nrf51.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/nrf51/fastled_arm_nrf51.h
@@ -0,0 +1,9 @@
+#ifndef __INC_FASTLED_ARM_NRF51_H
+#define __INC_FASTLED_ARM_NRF51_H
+
+// Include the k20 headers
+#include "fastpin_arm_nrf51.h"
+#include "fastspi_arm_nrf51.h"
+#include "clockless_arm_nrf51.h"
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/nrf51/fastpin_arm_nrf51.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/nrf51/fastpin_arm_nrf51.h
@@ -0,0 +1,119 @@
+#ifndef __FASTPIN_ARM_NRF51_H
+#define __FASTPIN_ARM_NRF51_H
+
+#if defined(NRF51)
+/// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower.  It will need more fine tuning.
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+#if 0
+template<uint8_t PIN, uint32_t _MASK, typename _DIRSET, typename _DIRCLR, typename _OUTSET, typename _OUTCLR, typename _OUT> class _ARMPIN {
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  inline static void setOutput() { _DIRSET::r() = _MASK; }
+  inline static void setInput() { _DIRCLR::r() = _MASK; }
+
+  inline static void hi() __attribute__ ((always_inline)) { _OUTSET::r() = _MASK; }
+  inline static void lo() __attribute__ ((always_inline)) { _OUTCLR::r() = _MASK; }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { _OUT::r() = val; }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { _OUT::r() ^= _MASK; }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { return _OUT::r() | _MASK; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return _OUT::r() & ~_MASK; }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_OUT::r(); }
+  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+#define ADDR(X) *(volatile uint32_t*)X
+#define NR_GPIO_ADDR(base,offset) (*(volatile uint32_t *))((uint32_t)(base + offset))
+#define NR_DIRSET ADDR(0x50000518UL) // NR_GPIO_ADDR(NRF_GPIO_BASE, 0x518)
+#define NR_DIRCLR ADDR(0x5000051CUL) // NR_GPIO_ADDR(NRF_GPIO_BASE, 0x51C)
+#define NR_OUTSET ADDR(0x50000508UL) // NR_GPIO_ADDR(NRF_GPIO_BASE, 0x508)
+#define NR_OUTCLR ADDR(0x5000050CUL) // NR_GPIO_ADDR(NRF_GPIO_BASE, 0x50C)
+#define NR_OUT ADDR(0x50000504UL) // NR_GPIO_ADDR(NRF_GPIO_BASE, 0x504)
+
+#define _RD32_NRF(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; }};
+
+_RD32_NRF(NR_DIRSET);
+_RD32_NRF(NR_DIRCLR);
+_RD32_NRF(NR_OUTSET);
+_RD32_NRF(NR_OUTCLR);
+_RD32_NRF(NR_OUT);
+
+#define _DEFPIN_ARM(PIN) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << PIN, \
+  _R(NR_DIRSET), _R(NR_DIRCLR), _R(NR_OUTSET), _R(NR_OUTCLR), _R(NR_OUT)> {};
+#else
+
+typedef struct {                                    /*!< GPIO Structure                                                        */
+  // __I  uint32_t  RESERVED0[321];
+  __IO uint32_t  OUT;                               /*!< Write GPIO port.                                                      */
+  __IO uint32_t  OUTSET;                            /*!< Set individual bits in GPIO port.                                     */
+  __IO uint32_t  OUTCLR;                            /*!< Clear individual bits in GPIO port.                                   */
+  __I  uint32_t  IN;                                /*!< Read GPIO port.                                                       */
+  __IO uint32_t  DIR;                               /*!< Direction of GPIO pins.                                               */
+  __IO uint32_t  DIRSET;                            /*!< DIR set register.                                                     */
+  __IO uint32_t  DIRCLR;                            /*!< DIR clear register.                                                   */
+  __I  uint32_t  RESERVED1[120];
+  __IO uint32_t  PIN_CNF[32];                       /*!< Configuration of GPIO pins.                                           */
+} FL_NRF_GPIO_Type;
+
+#define FL_NRF_GPIO_BASE                   0x50000504UL
+#define FL_NRF_GPIO                        ((FL_NRF_GPIO_Type           *) FL_NRF_GPIO_BASE)
+
+template<uint8_t PIN, uint32_t _MASK> class _ARMPIN {
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  inline static void setOutput() { FL_NRF_GPIO->DIRSET = _MASK; }
+  inline static void setInput() { FL_NRF_GPIO->DIRCLR = _MASK; }
+
+  inline static void hi() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUTSET = _MASK; }
+  inline static void lo() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUTCLR= _MASK; }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { FL_NRF_GPIO->OUT = val; }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUT ^= _MASK; }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { return FL_NRF_GPIO->OUT | _MASK; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return FL_NRF_GPIO->OUT & ~_MASK; }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &FL_NRF_GPIO->OUT; }
+  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+
+  inline static bool isset() __attribute__ ((always_inline)) { return (FL_NRF_GPIO->IN & _MASK) != 0; }
+};
+
+
+#define _DEFPIN_ARM(PIN) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << PIN> {};
+#endif
+
+// Actual pin definitions
+#define MAX_PIN 31
+_DEFPIN_ARM(0); _DEFPIN_ARM(1); _DEFPIN_ARM(2); _DEFPIN_ARM(3);
+_DEFPIN_ARM(4); _DEFPIN_ARM(5); _DEFPIN_ARM(6); _DEFPIN_ARM(7);
+_DEFPIN_ARM(8); _DEFPIN_ARM(9); _DEFPIN_ARM(10); _DEFPIN_ARM(11);
+_DEFPIN_ARM(12); _DEFPIN_ARM(13); _DEFPIN_ARM(14); _DEFPIN_ARM(15);
+_DEFPIN_ARM(16); _DEFPIN_ARM(17); _DEFPIN_ARM(18); _DEFPIN_ARM(19);
+_DEFPIN_ARM(20); _DEFPIN_ARM(21); _DEFPIN_ARM(22); _DEFPIN_ARM(23);
+_DEFPIN_ARM(24); _DEFPIN_ARM(25); _DEFPIN_ARM(26); _DEFPIN_ARM(27);
+_DEFPIN_ARM(28); _DEFPIN_ARM(29); _DEFPIN_ARM(30); _DEFPIN_ARM(31);
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/nrf51/fastspi_arm_nrf51.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/nrf51/fastspi_arm_nrf51.h
@@ -0,0 +1,150 @@
+#ifndef __INC_FASTSPI_NRF_H
+#define __INC_FASTSPI_NRF_H
+
+#ifdef NRF51
+
+#ifndef FASTLED_FORCE_SOFTWARE_SPI
+#define FASTLED_ALL_PINS_HARDWARE_SPI
+
+// A nop/stub class, mostly to show the SPI methods that are needed/used by the various SPI chipset implementations.  Should
+// be used as a definition for the set of methods that the spi implementation classes should use (since C++ doesn't support the
+// idea of interfaces - it's possible this could be done with virtual classes, need to decide if i want that overhead)
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class NRF51SPIOutput {
+
+  struct saveData {
+    uint32_t sck;
+    uint32_t mosi;
+    uint32_t miso;
+    uint32_t freq;
+    uint32_t enable;
+  } mSavedData;
+
+  void saveSPIData() {
+    mSavedData.sck = NRF_SPI0->PSELSCK;
+    mSavedData.mosi = NRF_SPI0->PSELMOSI;
+    mSavedData.miso = NRF_SPI0->PSELMISO;
+    mSavedData.freq = NRF_SPI0->FREQUENCY;
+    mSavedData.enable = NRF_SPI0->ENABLE;
+  }
+
+  void restoreSPIData() {
+    NRF_SPI0->PSELSCK = mSavedData.sck;
+    NRF_SPI0->PSELMOSI = mSavedData.mosi;
+    NRF_SPI0->PSELMISO = mSavedData.miso;
+    NRF_SPI0->FREQUENCY = mSavedData.freq;
+    mSavedData.enable = NRF_SPI0->ENABLE;
+  }
+
+public:
+  NRF51SPIOutput() { FastPin<_DATA_PIN>::setOutput(); FastPin<_CLOCK_PIN>::setOutput(); }
+  NRF51SPIOutput(Selectable *pSelect) {  FastPin<_DATA_PIN>::setOutput(); FastPin<_CLOCK_PIN>::setOutput();  }
+
+  // set the object representing the selectable
+  void setSelect(Selectable *pSelect) { /* TODO */ }
+
+  // initialize the SPI subssytem
+  void init() {
+    FastPin<_DATA_PIN>::setOutput();
+    FastPin<_CLOCK_PIN>::setOutput();
+    NRF_SPI0->PSELSCK = _CLOCK_PIN;
+    NRF_SPI0->PSELMOSI = _DATA_PIN;
+    NRF_SPI0->PSELMISO = 0xFFFFFFFF;
+    NRF_SPI0->FREQUENCY = 0x80000000;
+    NRF_SPI0->ENABLE = 1;
+    NRF_SPI0->EVENTS_READY = 0;
+  }
+
+  // latch the CS select
+  void select() { saveSPIData(); init(); }
+
+  // release the CS select
+  void release() { shouldWait(); restoreSPIData(); }
+
+  static bool shouldWait(bool wait = false) __attribute__((always_inline)) __attribute__((always_inline)) {
+    // static bool sWait=false;
+    // bool oldWait = sWait;
+    // sWait = wait;
+    // never going to bother with waiting since we're always running the spi clock at max speed on the rfduino
+    // TODO: When we set clock rate, implement/fix waiting properly, otherwise the world hangs up
+    return false;
+  }
+  
+  // wait until all queued up data has been written
+  static void waitFully() __attribute__((always_inline)){ if(shouldWait()) { while(NRF_SPI0->EVENTS_READY==0); } NRF_SPI0->INTENCLR; }
+  static void wait() __attribute__((always_inline)){ if(shouldWait()) { while(NRF_SPI0->EVENTS_READY==0); } NRF_SPI0->INTENCLR; }
+
+  // write a byte out via SPI (returns immediately on writing register)
+  static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); NRF_SPI0->TXD = b; NRF_SPI0->INTENCLR; shouldWait(true); }
+
+  // write a word out via SPI (returns immediately on writing register)
+  static void writeWord(uint16_t w) __attribute__((always_inline)){ writeByte(w>>8); writeByte(w & 0xFF);  }
+
+  // A raw set of writing byte values, assumes setup/init/waiting done elsewhere (static for use by adjustment classes)
+  static void writeBytesValueRaw(uint8_t value, int len) { while(len--) { writeByte(value);  } }
+
+  // A full cycle of writing a value for len bytes, including select, release, and waiting
+  void writeBytesValue(uint8_t value, int len) {
+    select();
+    while(len--) {
+      writeByte(value);
+    }
+    waitFully();
+    release();
+  }
+
+  // A full cycle of writing a raw block of data out, including select, release, and waiting
+  template<class D> void writeBytes(uint8_t *data, int len) {
+    uint8_t *end = data + len;
+    select();
+    while(data != end) {
+      writeByte(D::adjust(*data++));
+    }
+    D::postBlock(len);
+    waitFully();
+    release();
+  }
+
+  void writeBytes(uint8_t *data, int len) {
+    writeBytes<DATA_NOP>(data, len);
+  }
+
+  // write a single bit out, which bit from the passed in byte is determined by template parameter
+  template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+    waitFully();
+    NRF_SPI0->ENABLE = 0;
+    if(b & 1<<BIT) {
+      FastPin<_DATA_PIN>::hi();
+    } else {
+      FastPin<_DATA_PIN>::lo();
+    }
+    FastPin<_CLOCK_PIN>::toggle();
+    FastPin<_CLOCK_PIN>::toggle();
+    NRF_SPI0->ENABLE = 1;
+  }
+
+  template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+    select();
+    int len = pixels.mLen;
+    while(pixels.has(1)) {
+      if(FLAGS & FLAG_START_BIT) {
+				writeBit<0>(1);
+      }
+			writeByte(D::adjust(pixels.loadAndScale0()));
+			writeByte(D::adjust(pixels.loadAndScale1()));
+			writeByte(D::adjust(pixels.loadAndScale2()));
+
+			pixels.advanceData();
+			pixels.stepDithering();
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+  }
+
+};
+
+#endif
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/nrf51/led_sysdefs_arm_nrf51.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/nrf51/led_sysdefs_arm_nrf51.h
@@ -0,0 +1,46 @@
+#ifndef __LED_SYSDEFS_ARM_NRF51
+#define __LED_SYSDEFS_ARM_NRF51
+
+#ifndef NRF51
+#define NRF51
+#endif
+
+#define LED_TIMER NRF_TIMER1
+#define FASTLED_NO_PINMAP
+#define FASTLED_HAS_CLOCKLESS
+
+#define FASTLED_SPI_BYTE_ONLY
+
+#define FASTLED_ARM
+#define FASTLED_ARM_M0
+
+#ifndef F_CPU
+#define F_CPU 16000000
+#endif
+
+#include <stdint.h>
+#include <nrf51.h>
+#include <core_cm0.h>
+
+typedef volatile uint32_t RoReg;
+typedef volatile uint32_t RwReg;
+typedef uint32_t prog_uint32_t;
+typedef uint8_t boolean;
+
+#define PROGMEM
+#define NO_PROGMEM
+#define NEED_CXX_BITS
+
+// Default to NOT using PROGMEM here
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 0
+#endif
+
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#define cli()  __disable_irq();
+#define sei() __enable_irq();
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/sam/clockless_arm_sam.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/sam/clockless_arm_sam.h
@@ -0,0 +1,120 @@
+#ifndef __INC_CLOCKLESS_ARM_SAM_H
+#define __INC_CLOCKLESS_ARM_SAM_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the sam family of arm chips, like that used in the due and rfduino
+// See clockless.h for detailed info on how the template parameters are used.
+
+#if defined(__SAM3X8E__)
+
+
+#define TADJUST 0
+#define TOTAL ( (T1+TADJUST) + (T2+TADJUST) + (T3+TADJUST) )
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+	typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		FastPinBB<DATA_PIN>::setOutput();
+		mPinMask = FastPinBB<DATA_PIN>::mask();
+		mPort = FastPinBB<DATA_PIN>::port();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+		mWait.wait();
+		if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+		mWait.mark();
+	}
+
+	template<int BITS>  __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register uint8_t & b) {
+		// Make sure we don't slot into a wrapping spot, this will delay up to 12.5µs for WS2812
+		// bool bShift=0;
+		// while(VAL < (TOTAL*10)) { bShift=true; }
+		// if(bShift) { next_mark = (VAL-TOTAL); };
+
+		for(register uint32_t i = BITS; i > 0; i--) {
+			// wait to start the bit, then set the pin high
+			while(DUE_TIMER_VAL < next_mark);
+			next_mark = (DUE_TIMER_VAL+TOTAL);
+			*port = 1;
+
+			// how long we want to wait next depends on whether or not our bit is set to 1 or 0
+			if(b&0x80) {
+				// we're a 1, wait until there's less than T3 clocks left
+				while((next_mark - DUE_TIMER_VAL) > (T3));
+			} else {
+				// we're a 0, wait until there's less than (T2+T3+slop) clocks left in this bit
+				while((next_mark - DUE_TIMER_VAL) > (T2+T3+6+TADJUST+TADJUST));
+			}
+			*port=0;
+			b <<= 1;
+		}
+	}
+
+#define FORCE_REFERENCE(var)  asm volatile( "" : : "r" (var) )
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+		// Setup and start the clock
+		TC_Configure(DUE_TIMER,DUE_TIMER_CHANNEL,TC_CMR_TCCLKS_TIMER_CLOCK1);
+		pmc_enable_periph_clk(DUE_TIMER_ID);
+		TC_Start(DUE_TIMER,DUE_TIMER_CHANNEL);
+
+		register data_ptr_t port asm("r7") = FastPinBB<DATA_PIN>::port(); FORCE_REFERENCE(port);
+		*port = 0;
+
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		uint8_t b = pixels.loadAndScale0();
+
+		uint32_t next_mark = (DUE_TIMER_VAL + (TOTAL));
+		while(pixels.has(1)) {
+			pixels.stepDithering();
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			if(DUE_TIMER_VAL > next_mark) {
+				if((DUE_TIMER_VAL - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); TC_Stop(DUE_TIMER,DUE_TIMER_CHANNEL); return 0; }
+			}
+			#endif
+
+			writeBits<8+XTRA0>(next_mark, port, b);
+
+			b = pixels.loadAndScale1();
+			writeBits<8+XTRA0>(next_mark, port,b);
+
+			b = pixels.loadAndScale2();
+			writeBits<8+XTRA0>(next_mark, port,b);
+
+			b = pixels.advanceAndLoadAndScale0();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		TC_Stop(DUE_TIMER,DUE_TIMER_CHANNEL);
+		return DUE_TIMER_VAL;
+	}
+};
+
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/sam/clockless_block_arm_sam.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/sam/clockless_block_arm_sam.h
@@ -0,0 +1,184 @@
+ #ifndef __INC_BLOCK_CLOCKLESS_H
+#define __INC_BLOCK_CLOCKLESS_H
+
+FASTLED_NAMESPACE_BEGIN
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Base template for clockless controllers.  These controllers have 3 control points in their cycle for each bit.  The first point
+// is where the line is raised hi.  The second pointsnt is where the line is dropped low for a zero.  The third point is where the
+// line is dropped low for a one.  T1, T2, and T3 correspond to the timings for those three in clock cycles.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__SAM3X8E__)
+#define PORT_MASK (((1<<LANES)-1) & ((FIRST_PIN==2) ? 0xFF : 0xFF))
+
+#define FASTLED_HAS_BLOCKLESS 1
+
+#define PORTD_FIRST_PIN 25
+#define PORTA_FIRST_PIN 69
+#define PORTB_FIRST_PIN 90
+
+typedef union {
+  uint8_t bytes[8];
+  uint32_t raw[2];
+} Lines;
+
+#define TADJUST 0
+#define TOTAL ( (T1+TADJUST) + (T2+TADJUST) + (T3+TADJUST) )
+#define T1_MARK (TOTAL - (T1+TADJUST))
+#define T2_MARK (T1_MARK - (T2+TADJUST))
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
+	typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<FIRST_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual int size() { return CLEDController::size() * LANES; }
+	virtual void init() {
+    static_assert(LANES <= 8, "Maximum of 8 lanes for Due parallel controllers!");
+    if(FIRST_PIN == PORTA_FIRST_PIN) {
+      switch(LANES) {
+        case 8: FastPin<31>::setOutput();
+        case 7: FastPin<58>::setOutput();
+        case 6: FastPin<100>::setOutput();
+        case 5: FastPin<59>::setOutput();
+        case 4: FastPin<60>::setOutput();
+        case 3: FastPin<61>::setOutput();
+        case 2: FastPin<68>::setOutput();
+        case 1: FastPin<69>::setOutput();
+      }
+    } else if(FIRST_PIN == PORTD_FIRST_PIN) {
+      switch(LANES) {
+        case 8: FastPin<11>::setOutput();
+        case 7: FastPin<29>::setOutput();
+        case 6: FastPin<15>::setOutput();
+        case 5: FastPin<14>::setOutput();
+        case 4: FastPin<28>::setOutput();
+        case 3: FastPin<27>::setOutput();
+        case 2: FastPin<26>::setOutput();
+        case 1: FastPin<25>::setOutput();
+      }
+    } else if(FIRST_PIN == PORTB_FIRST_PIN) {
+      switch(LANES) {
+        case 8: FastPin<97>::setOutput();
+        case 7: FastPin<96>::setOutput();
+        case 6: FastPin<95>::setOutput();
+        case 5: FastPin<94>::setOutput();
+        case 4: FastPin<93>::setOutput();
+        case 3: FastPin<92>::setOutput();
+        case 2: FastPin<91>::setOutput();
+        case 1: FastPin<90>::setOutput();
+      }
+    }
+    mPinMask = FastPin<FIRST_PIN>::mask();
+    mPort = FastPin<FIRST_PIN>::port();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+  virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) {
+    mWait.wait();
+    showRGBInternal(pixels);
+    sei();
+    mWait.mark();
+  }
+
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+		// Serial.println("Entering show");
+
+    int nLeds = allpixels.mLen;
+
+    // Setup the pixel controller and load/scale the first byte
+		Lines b0,b1,b2;
+
+    allpixels.preStepFirstByteDithering();
+		for(uint8_t i = 0; i < LANES; i++) {
+			b0.bytes[i] = allpixels.loadAndScale0(i);
+		}
+
+		// Setup and start the clock
+    TC_Configure(DUE_TIMER,DUE_TIMER_CHANNEL,TC_CMR_TCCLKS_TIMER_CLOCK1);
+    pmc_enable_periph_clk(DUE_TIMER_ID);
+    TC_Start(DUE_TIMER,DUE_TIMER_CHANNEL);
+
+    #if (FASTLED_ALLOW_INTERRUPTS == 1)
+    cli();
+    #endif
+		uint32_t next_mark = (DUE_TIMER_VAL + (TOTAL));
+		while(nLeds--) {
+      allpixels.stepDithering();
+      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+      cli();
+      if(DUE_TIMER_VAL > next_mark) {
+        if((DUE_TIMER_VAL - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) {
+          sei(); TC_Stop(DUE_TIMER,DUE_TIMER_CHANNEL); return DUE_TIMER_VAL;
+        }
+      }
+      #endif
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(next_mark, b0, b1, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(next_mark, b1, b2, allpixels);
+
+      allpixels.advanceData();
+			// Write third byte
+			writeBits<8+XTRA0,0>(next_mark, b2, b0, allpixels);
+
+      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+      sei();
+      #endif
+		}
+
+		return DUE_TIMER_VAL;
+	}
+
+  template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, Lines & b3, PixelController<RGB_ORDER,LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
+    Lines b2;
+    transpose8x1(b.bytes,b2.bytes);
+
+    register uint8_t d = pixels.template getd<PX>(pixels);
+    register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+    for(uint32_t i = 0; (i < LANES) && (i<8); i++) {
+      while(DUE_TIMER_VAL < next_mark);
+      next_mark = (DUE_TIMER_VAL+TOTAL);
+
+      *FastPin<FIRST_PIN>::sport() = PORT_MASK;
+
+      while((next_mark - DUE_TIMER_VAL) > (T2+T3+6));
+      *FastPin<FIRST_PIN>::cport() = (~b2.bytes[7-i]) & PORT_MASK;
+
+      while((next_mark - (DUE_TIMER_VAL)) > T3);
+      *FastPin<FIRST_PIN>::cport() = PORT_MASK;
+
+      b3.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+    }
+
+    for(uint32_t i = LANES; i < 8; i++) {
+      while(DUE_TIMER_VAL < next_mark);
+      next_mark = (DUE_TIMER_VAL+TOTAL);
+      *FastPin<FIRST_PIN>::sport() = PORT_MASK;
+
+      while((next_mark - DUE_TIMER_VAL) > (T2+T3+6));
+      *FastPin<FIRST_PIN>::cport() = (~b2.bytes[7-i]) & PORT_MASK;
+
+      while((next_mark - DUE_TIMER_VAL) > T3);
+      *FastPin<FIRST_PIN>::cport() = PORT_MASK;
+    }
+  }
+
+
+};
+
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/sam/fastled_arm_sam.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/sam/fastled_arm_sam.h
@@ -0,0 +1,10 @@
+#ifndef __INC_FASTLED_ARM_SAM_H
+#define __INC_FASTLED_ARM_SAM_H
+
+// Include the sam headers
+#include "fastpin_arm_sam.h"
+#include "fastspi_arm_sam.h"
+#include "clockless_arm_sam.h"
+#include "clockless_block_arm_sam.h"
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/sam/fastpin_arm_sam.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/sam/fastpin_arm_sam.h
@@ -0,0 +1,137 @@
+#ifndef __INC_FASTPIN_ARM_SAM_H
+#define __INC_FASTPIN_ARM_SAM_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be sloightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+
+/// Template definition for arduino due style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower.  It will need more fine tuning.
+/// The registers are data register, set output register, clear output register, set data direction register
+template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _PCOR, typename _PDDR> class _DUEPIN {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { _PSOR::r() = _MASK; }
+	inline static void lo() __attribute__ ((always_inline)) { _PCOR::r() = _MASK; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { _PDOR::r() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle();  }
+
+	inline static void toggle() __attribute__ ((always_inline)) { _PDOR::r() ^= _MASK; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return _PDOR::r() | _MASK; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return _PDOR::r() & ~_MASK; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PDOR::r(); }
+	inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_PSOR::r(); }
+	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_PCOR::r(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+
+/// Template definition for DUE  style ARM pins using bit banding, providing direct access to the various GPIO registers.  GCC
+/// does a poor job of optimizing around these accesses so they are not being used just yet.
+template<uint8_t PIN, uint32_t _BIT, typename _PDOR, typename _PSOR, typename _PCOR, typename _PDDR> class _DUEPIN_BITBAND {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; }
+	inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() ^= 1; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi();  }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return 1; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return 1; }
+};
+
+#define GPIO_BITBAND_ADDR(reg, bit) (((uint32_t)&(reg) - 0x40000000) * 32 + (bit) * 4 + 0x42000000)
+#define GPIO_BITBAND_PTR(reg, bit) ((uint32_t *)GPIO_BITBAND_ADDR((reg), (bit)))
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
+	template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
+#define DUE_IO32(L) _RD32(REG_PIO ## L ## _ODSR); _RD32(REG_PIO ## L ## _SODR); _RD32(REG_PIO ## L ## _CODR); _RD32(REG_PIO ## L ## _OER);
+
+#define _DEFPIN_DUE(PIN, BIT, L) template<> class FastPin<PIN> : public _DUEPIN<PIN, 1 << BIT, _R(REG_PIO ## L ## _ODSR), _R(REG_PIO ## L ## _SODR), _R(REG_PIO ## L ## _CODR), \
+  																			_R(GPIO ## L ## _OER)> {}; \
+  								   template<> class FastPinBB<PIN> : public _DUEPIN_BITBAND<PIN, BIT, _R(REG_PIO ## L ## _ODSR), _R(REG_PIO ## L ## _SODR), _R(REG_PIO ## L ## _CODR), \
+  																			_R(GPIO ## L ## _OER)> {};
+
+#if defined(__SAM3X8E__)
+
+DUE_IO32(A);
+DUE_IO32(B);
+DUE_IO32(C);
+DUE_IO32(D);
+
+#define MAX_PIN 78
+_DEFPIN_DUE(0, 8, A); _DEFPIN_DUE(1, 9, A); _DEFPIN_DUE(2, 25, B); _DEFPIN_DUE(3, 28, C);
+_DEFPIN_DUE(4, 26, C); _DEFPIN_DUE(5, 25, C); _DEFPIN_DUE(6, 24, C); _DEFPIN_DUE(7, 23, C);
+_DEFPIN_DUE(8, 22, C); _DEFPIN_DUE(9, 21, C); _DEFPIN_DUE(10, 29, C); _DEFPIN_DUE(11, 7, D);
+_DEFPIN_DUE(12, 8, D); _DEFPIN_DUE(13, 27, B); _DEFPIN_DUE(14, 4, D); _DEFPIN_DUE(15, 5, D);
+_DEFPIN_DUE(16, 13, A); _DEFPIN_DUE(17, 12, A); _DEFPIN_DUE(18, 11, A); _DEFPIN_DUE(19, 10, A);
+_DEFPIN_DUE(20, 12, B); _DEFPIN_DUE(21, 13, B); _DEFPIN_DUE(22, 26, B); _DEFPIN_DUE(23, 14, A);
+_DEFPIN_DUE(24, 15, A); _DEFPIN_DUE(25, 0, D); _DEFPIN_DUE(26, 1, D); _DEFPIN_DUE(27, 2, D);
+_DEFPIN_DUE(28, 3, D); _DEFPIN_DUE(29, 6, D); _DEFPIN_DUE(30, 9, D); _DEFPIN_DUE(31, 7, A);
+_DEFPIN_DUE(32, 10, D); _DEFPIN_DUE(33, 1, C); _DEFPIN_DUE(34, 2, C); _DEFPIN_DUE(35, 3, C);
+_DEFPIN_DUE(36, 4, C); _DEFPIN_DUE(37, 5, C); _DEFPIN_DUE(38, 6, C); _DEFPIN_DUE(39, 7, C);
+_DEFPIN_DUE(40, 8, C); _DEFPIN_DUE(41, 9, C); _DEFPIN_DUE(42, 19, A); _DEFPIN_DUE(43, 20, A);
+_DEFPIN_DUE(44, 19, C); _DEFPIN_DUE(45, 18, C); _DEFPIN_DUE(46, 17, C); _DEFPIN_DUE(47, 16, C);
+_DEFPIN_DUE(48, 15, C); _DEFPIN_DUE(49, 14, C); _DEFPIN_DUE(50, 13, C); _DEFPIN_DUE(51, 12, C);
+_DEFPIN_DUE(52, 21, B); _DEFPIN_DUE(53, 14, B); _DEFPIN_DUE(54, 16, A); _DEFPIN_DUE(55, 24, A);
+_DEFPIN_DUE(56, 23, A); _DEFPIN_DUE(57, 22, A); _DEFPIN_DUE(58, 6, A); _DEFPIN_DUE(59, 4, A);
+_DEFPIN_DUE(60, 3, A); _DEFPIN_DUE(61, 2, A); _DEFPIN_DUE(62, 17, B); _DEFPIN_DUE(63, 18, B);
+_DEFPIN_DUE(64, 19, B); _DEFPIN_DUE(65, 20, B); _DEFPIN_DUE(66, 15, B); _DEFPIN_DUE(67, 16, B);
+_DEFPIN_DUE(68, 1, A); _DEFPIN_DUE(69, 0, A); _DEFPIN_DUE(70, 17, A); _DEFPIN_DUE(71, 18, A);
+_DEFPIN_DUE(72, 30, C); _DEFPIN_DUE(73, 21, A); _DEFPIN_DUE(74, 25, A); _DEFPIN_DUE(75, 26, A);
+_DEFPIN_DUE(76, 27, A); _DEFPIN_DUE(77, 28, A); _DEFPIN_DUE(78, 23, B);
+
+// digix pins
+_DEFPIN_DUE(90, 0, B); _DEFPIN_DUE(91, 1, B); _DEFPIN_DUE(92, 2, B); _DEFPIN_DUE(93, 3, B);
+_DEFPIN_DUE(94, 4, B); _DEFPIN_DUE(95, 5, B); _DEFPIN_DUE(96, 6, B); _DEFPIN_DUE(97, 7, B);
+_DEFPIN_DUE(98, 8, B); _DEFPIN_DUE(99, 9, B); _DEFPIN_DUE(100, 5, A); _DEFPIN_DUE(101, 22, B);
+_DEFPIN_DUE(102, 23, B); _DEFPIN_DUE(103, 24, B); _DEFPIN_DUE(104, 27, C); _DEFPIN_DUE(105, 20, C);
+_DEFPIN_DUE(106, 11, C); _DEFPIN_DUE(107, 10, C); _DEFPIN_DUE(108, 21, A); _DEFPIN_DUE(109, 30, C);
+_DEFPIN_DUE(110, 29, B); _DEFPIN_DUE(111, 30, B); _DEFPIN_DUE(112, 31, B); _DEFPIN_DUE(113, 28, B);
+
+#define SPI_DATA 75
+#define SPI_CLOCK 76
+#define ARM_HARDWARE_SPI
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+
+#endif // __INC_FASTPIN_ARM_SAM_H
--- a/libraries/FastLED-3.2.0/platforms/arm/sam/fastspi_arm_sam.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/sam/fastspi_arm_sam.h
@@ -0,0 +1,163 @@
+#ifndef __INC_FASTSPI_ARM_SAM_H
+#define __INC_FASTSPI_ARM_SAM_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(__SAM3X8E__)
+#define m_SPI ((Spi*)SPI0)
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class SAMHardwareSPIOutput {
+	Selectable *m_pSelect;
+
+	static inline void waitForEmpty() { while ((m_SPI->SPI_SR & SPI_SR_TDRE) == 0); }
+
+	void enableConfig() { m_SPI->SPI_WPMR &= ~SPI_WPMR_WPEN; }
+	void disableConfig() { m_SPI->SPI_WPMR |= SPI_WPMR_WPEN; }
+
+	void enableSPI() { m_SPI->SPI_CR = SPI_CR_SPIEN; }
+	void disableSPI() { m_SPI->SPI_CR = SPI_CR_SPIDIS; }
+	void resetSPI() { m_SPI->SPI_CR = SPI_CR_SWRST; }
+
+	static inline void readyTransferBits(register uint32_t bits) {
+		bits -= 8;
+		// don't change the number of transfer bits while data is still being transferred from TDR to the shift register
+		waitForEmpty();
+		m_SPI->SPI_CSR[0] = SPI_CSR_NCPHA | SPI_CSR_CSAAT | (bits << SPI_CSR_BITS_Pos) | SPI_CSR_DLYBCT(1) | SPI_CSR_SCBR(_SPI_CLOCK_DIVIDER);
+	}
+
+	template<int BITS> static inline void writeBits(uint16_t w) {
+		waitForEmpty();
+		m_SPI->SPI_TDR = (uint32_t)w | SPI_PCS(0);
+	}
+
+public:
+	SAMHardwareSPIOutput() { m_pSelect = NULL; }
+	SAMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+
+	// set the object representing the selectable
+	void setSelect(Selectable *pSelect) { /* TODO */ }
+
+	// initialize the SPI subssytem
+	void init() {
+		// m_SPI = SPI0;
+
+		// set the output pins master out, master in, clock.  Note doing this here because I still don't
+		// know how I want to expose this type of functionality in FastPin.
+		PIO_Configure(PIOA, PIO_PERIPH_A, FastPin<_DATA_PIN>::mask(), PIO_DEFAULT);
+		PIO_Configure(PIOA, PIO_PERIPH_A, FastPin<_DATA_PIN-1>::mask(), PIO_DEFAULT);
+		PIO_Configure(PIOA, PIO_PERIPH_A, FastPin<_CLOCK_PIN>::mask(), PIO_DEFAULT);
+
+		release();
+
+		// Configure the SPI clock, divider between 1-255
+		// SCBR = _SPI_CLOCK_DIVIDER
+		pmc_enable_periph_clk(ID_SPI0);
+		disableSPI();
+
+		// reset twice (what the sam code does, not sure why?)
+		resetSPI();
+		resetSPI();
+
+		// Configure SPI as master, enable
+		// Bits we want in MR: master, disable mode fault detection, variable peripheral select
+		m_SPI->SPI_MR = SPI_MR_MSTR | SPI_MR_MODFDIS | SPI_MR_PS;
+
+		enableSPI();
+
+		// Send everything out in 8 bit chunks, other sizes appear to work, poorly...
+		readyTransferBits(8);
+	}
+
+	// latch the CS select
+	void inline select() __attribute__((always_inline)) { if(m_pSelect != NULL) { m_pSelect->select(); } }
+
+	// release the CS select
+	void inline release() __attribute__((always_inline)) { if(m_pSelect != NULL) { m_pSelect->release(); } }
+
+	// wait until all queued up data has been written
+	void waitFully() { while((m_SPI->SPI_SR & SPI_SR_TXEMPTY) == 0); }
+
+	// write a byte out via SPI (returns immediately on writing register)
+	static void writeByte(uint8_t b) {
+		writeBits<8>(b);
+	}
+
+	// write a word out via SPI (returns immediately on writing register)
+	static void writeWord(uint16_t w) {
+		writeBits<16>(w);
+	}
+
+	// A raw set of writing byte values, assumes setup/init/waiting done elsewhere
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) { writeByte(value); }
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	void writeBytesValue(uint8_t value, int len) {
+		select(); writeBytesValueRaw(value, len); release();
+	}
+
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		uint8_t *end = data + len;
+		select();
+		// could be optimized to write 16bit words out instead of 8bit bytes
+		while(data != end) {
+			writeByte(D::adjust(*data++));
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a single bit out, which bit from the passed in byte is determined by template parameter
+	// not the most efficient mechanism in the world - but should be enough for sm16716 and friends
+	template <uint8_t BIT> inline void writeBit(uint8_t b) {
+		// need to wait for all exisiting data to go out the door, first
+		waitFully();
+		disableSPI();
+		if(b & (1 << BIT)) {
+			FastPin<_DATA_PIN>::hi();
+		} else {
+			FastPin<_DATA_PIN>::lo();
+		}
+
+		FastPin<_CLOCK_PIN>::hi();
+		FastPin<_CLOCK_PIN>::lo();
+		enableSPI();
+	}
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		select();
+		int len = pixels.mLen;
+
+		if(FLAGS & FLAG_START_BIT) {
+			while(pixels.has(1)) {
+				writeBits<9>((1<<8) | D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+				pixels.advanceData();
+				pixels.stepDithering();
+			}
+		} else {
+			while(pixels.has(1)) {
+				writeByte(D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+				pixels.advanceData();
+				pixels.stepDithering();
+			}
+		}
+		D::postBlock(len);
+		release();
+	}
+};
+
+#endif
+
+FASTLED_NAMESPACE_END
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/sam/led_sysdefs_arm_sam.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/sam/led_sysdefs_arm_sam.h
@@ -0,0 +1,39 @@
+#ifndef __INC_LED_SYSDEFS_ARM_SAM_H
+#define __INC_LED_SYSDEFS_ARM_SAM_H
+
+
+#define FASTLED_ARM
+
+// Setup DUE timer defines/channels/etc...
+#ifndef DUE_TIMER_CHANNEL
+#define DUE_TIMER_GROUP 0
+#endif
+
+#ifndef DUE_TIMER_CHANNEL
+#define DUE_TIMER_CHANNEL 0
+#endif
+
+#define DUE_TIMER ((DUE_TIMER_GROUP==0) ? TC0 : ((DUE_TIMER_GROUP==1) ? TC1 : TC2))
+#define DUE_TIMER_ID (ID_TC0 + (DUE_TIMER_GROUP*3) + DUE_TIMER_CHANNEL)
+#define DUE_TIMER_VAL (DUE_TIMER->TC_CHANNEL[DUE_TIMER_CHANNEL].TC_CV << 1)
+#define DUE_TIMER_RUNNING ((DUE_TIMER->TC_CHANNEL[DUE_TIMER_CHANNEL].TC_SR & TC_SR_CLKSTA) != 0)
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+// reusing/abusing cli/sei defs for due
+#define cli()  __disable_irq(); __disable_fault_irq();
+#define sei() __enable_irq(); __enable_fault_irq();
+
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/stm32/clockless_arm_stm32.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/stm32/clockless_arm_stm32.h
@@ -0,0 +1,126 @@
+#ifndef __INC_CLOCKLESS_ARM_STM32_H
+#define __INC_CLOCKLESS_ARM_STM32_H
+
+FASTLED_NAMESPACE_BEGIN
+// Definition for a single channel clockless controller for the stm32 family of chips, like that used in the spark core
+// See clockless.h for detailed info on how the template parameters are used.
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+  typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+  typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+  data_t mPinMask;
+  data_ptr_t mPort;
+  CMinWait<WAIT_TIME> mWait;
+public:
+  virtual void init() {
+    FastPin<DATA_PIN>::setOutput();
+    mPinMask = FastPin<DATA_PIN>::mask();
+    mPort = FastPin<DATA_PIN>::port();
+  }
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+    if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    mWait.mark();
+  }
+
+#define _CYCCNT (*(volatile uint32_t*)(0xE0001004UL))
+
+  template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
+    for(register uint32_t i = BITS-1; i > 0; i--) {
+      while(_CYCCNT < (T1+T2+T3-20));
+      FastPin<DATA_PIN>::fastset(port, hi);
+      _CYCCNT = 4;
+      if(b&0x80) {
+        while(_CYCCNT < (T1+T2-20));
+        FastPin<DATA_PIN>::fastset(port, lo);
+      } else {
+        while(_CYCCNT < (T1-10));
+        FastPin<DATA_PIN>::fastset(port, lo);
+      }
+      b <<= 1;
+    }
+
+    while(_CYCCNT < (T1+T2+T3-20));
+    FastPin<DATA_PIN>::fastset(port, hi);
+    _CYCCNT = 4;
+
+    if(b&0x80) {
+      while(_CYCCNT < (T1+T2-20));
+      FastPin<DATA_PIN>::fastset(port, lo);
+    } else {
+      while(_CYCCNT < (T1-10));
+      FastPin<DATA_PIN>::fastset(port, lo);
+    }
+  }
+
+  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+  // gcc will use register Y for the this pointer.
+  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+    // Get access to the clock
+    CoreDebug->DEMCR  |= CoreDebug_DEMCR_TRCENA_Msk;
+    DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
+    DWT->CYCCNT = 0;
+
+    register data_ptr_t port = FastPin<DATA_PIN>::port();
+    register data_t hi = *port | FastPin<DATA_PIN>::mask();;
+    register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+    *port = lo;
+
+    // Setup the pixel controller and load/scale the first byte
+    pixels.preStepFirstByteDithering();
+    register uint8_t b = pixels.loadAndScale0();
+
+    cli();
+
+    uint32_t next_mark = (T1+T2+T3);
+
+    DWT->CYCCNT = 0;
+    while(pixels.has(1)) {
+      pixels.stepDithering();
+      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+      cli();
+      // if interrupts took longer than 45µs, punt on the current frame
+      if(DWT->CYCCNT > next_mark) {
+        if((DWT->CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+      }
+
+      hi = *port | FastPin<DATA_PIN>::mask();
+      lo = *port & ~FastPin<DATA_PIN>::mask();
+      #endif
+
+      // Write first byte, read next byte
+      writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+      b = pixels.loadAndScale1();
+
+      // Write second byte, read 3rd byte
+      writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+      b = pixels.loadAndScale2();
+
+      // Write third byte, read 1st byte of next pixel
+      writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+      b = pixels.advanceAndLoadAndScale0();
+      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+      sei();
+      #endif
+    };
+
+    sei();
+    return DWT->CYCCNT;
+  }
+};
+
+FASTLED_NAMESPACE_END
+
+  #endif
--- a/libraries/FastLED-3.2.0/platforms/arm/stm32/fastled_arm_stm32.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/stm32/fastled_arm_stm32.h
@@ -0,0 +1,9 @@
+#ifndef __INC_FASTLED_ARM_SAM_H
+#define __INC_FASTLED_ARM_SAM_H
+
+// Include the sam headers
+#include "fastpin_arm_stm32.h"
+// #include "fastspi_arm_stm32.h"
+#include "clockless_arm_stm32.h"
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/arm/stm32/fastpin_arm_stm32.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/stm32/fastpin_arm_stm32.h
@@ -0,0 +1,105 @@
+#ifndef __FASTPIN_ARM_STM32_H
+#define __FASTPIN_ARM_STM32_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be sloightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+/// Template definition for STM32 style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower.  It will need more fine tuning.
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+
+template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, typename _GPIO> class _ARMPIN {
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  #if 0
+  inline static void setOutput() {
+    if(_BIT<8) {
+      _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
+    } else {
+      _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+    }
+  }
+  inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+  #endif
+
+  inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+  inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+  inline static void hi() __attribute__ ((always_inline)) { _GPIO::r()->BSRR = _MASK; }
+  inline static void lo() __attribute__ ((always_inline)) { _GPIO::r()->BRR = _MASK; }
+  // inline static void lo() __attribute__ ((always_inline)) { _GPIO::r()->BSRR = (_MASK<<16); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { _GPIO::r()->ODR = val; }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { if(_GPIO::r()->ODR & _MASK) { lo(); } else { hi(); } }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { return _GPIO::r()->ODR | _MASK; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return _GPIO::r()->ODR & ~_MASK; }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_GPIO::r()->ODR; }
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO::r()->BSRR; }
+  inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO::r()->BRR; }
+  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile GPIO_TypeDef * r() { return T; } };
+
+#define _IO32(L) _RD32(GPIO ## L)
+
+#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L)> {};
+
+// Actual pin definitions
+#if defined(SPARK)
+
+_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E); _IO32(F); _IO32(G);
+
+
+#define MAX_PIN 19
+_DEFPIN_ARM(0, 7, B);
+_DEFPIN_ARM(1, 6, B);
+_DEFPIN_ARM(2, 5, B);
+_DEFPIN_ARM(3, 4, B);
+_DEFPIN_ARM(4, 3, B);
+_DEFPIN_ARM(5, 15, A);
+_DEFPIN_ARM(6, 14, A);
+_DEFPIN_ARM(7, 13, A);
+_DEFPIN_ARM(8, 8, A);
+_DEFPIN_ARM(9, 9, A);
+_DEFPIN_ARM(10, 0, A);
+_DEFPIN_ARM(11, 1, A);
+_DEFPIN_ARM(12, 4, A);
+_DEFPIN_ARM(13, 5, A);
+_DEFPIN_ARM(14, 6, A);
+_DEFPIN_ARM(15, 7, A);
+_DEFPIN_ARM(16, 0, B);
+_DEFPIN_ARM(17, 1, B);
+_DEFPIN_ARM(18, 3, A);
+_DEFPIN_ARM(19, 2, A);
+
+
+#define SPI_DATA 15
+#define SPI_CLOCK 13
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+#endif // __INC_FASTPIN_ARM_STM32
--- a/libraries/FastLED-3.2.0/platforms/arm/stm32/led_sysdefs_arm_stm32.h
+++ b/libraries/FastLED-3.2.0/platforms/arm/stm32/led_sysdefs_arm_stm32.h
@@ -0,0 +1,47 @@
+#ifndef __INC_LED_SYSDEFS_ARM_SAM_H
+#define __INC_LED_SYSDEFS_ARM_SAM_H
+
+#include <application.h>
+
+#define FASTLED_NAMESPACE_BEGIN namespace NSFastLED {
+#define FASTLED_NAMESPACE_END }
+#define FASTLED_USING_NAMESPACE using namespace NSFastLED;
+
+#define FASTLED_ARM
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 0
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+// reusing/abusing cli/sei defs for due
+#define cli()  __disable_irq(); __disable_fault_irq();
+#define sei() __enable_irq(); __enable_fault_irq();
+
+// pgmspace definitions
+#define PROGMEM
+#define pgm_read_dword(addr) (*(const unsigned long *)(addr))
+#define pgm_read_dword_near(addr) pgm_read_dword(addr)
+
+// Default to NOT using PROGMEM here
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 0
+#endif
+
+// data type defs
+typedef volatile       uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+
+#define FASTLED_NO_PINMAP
+
+#define F_CPU 72000000
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/avr/clockless_trinket.h
+++ b/libraries/FastLED-3.2.0/platforms/avr/clockless_trinket.h
@@ -0,0 +1,464 @@
+#ifndef __INC_CLOCKLESS_TRINKET_H
+#define __INC_CLOCKLESS_TRINKET_H
+
+#include "../../controller.h"
+#include "../../lib8tion.h"
+#include <avr/interrupt.h> // for cli/se definitions
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_AVR)
+
+// Scaling macro choice
+#ifndef TRINKET_SCALE
+#define TRINKET_SCALE 1
+// whether or not to use dithering
+#define DITHER 1
+#endif
+
+#if (F_CPU==8000000)
+#define FASTLED_SLOW_CLOCK_ADJUST // asm __volatile__ ("mov r0,r0\n\t");
+#else
+#define FASTLED_SLOW_CLOCK_ADJUST
+#endif
+
+#define US_PER_TICK (64 / (F_CPU/1000000))
+
+// Variations on the functions in delay.h - w/a loop var passed in to preserve registers across calls by the optimizer/compiler
+template<int CYCLES> inline void _dc(register uint8_t & loopvar);
+
+template<int _LOOP, int PAD> __attribute__((always_inline)) inline void _dc_AVR(register uint8_t & loopvar) {
+	_dc<PAD>(loopvar);
+	// The convolution in here is to ensure that the state of the carry flag coming into the delay loop is preserved
+	asm __volatile__ (  "BRCS L_PC%=\n\t"
+						"        LDI %[loopvar], %[_LOOP]\n\tL_%=: DEC %[loopvar]\n\t BRNE L_%=\n\tBREQ L_DONE%=\n\t"
+						"L_PC%=: LDI %[loopvar], %[_LOOP]\n\tLL_%=: DEC %[loopvar]\n\t BRNE LL_%=\n\tBSET 0\n\t"
+						"L_DONE%=:\n\t"
+						:
+							[loopvar] "+a" (loopvar) : [_LOOP] "M" (_LOOP) : );
+}
+
+template<int CYCLES> __attribute__((always_inline)) inline void _dc(register uint8_t & loopvar) {
+	_dc_AVR<CYCLES/6,CYCLES%6>(loopvar);
+}
+template<> __attribute__((always_inline)) inline void _dc<-6>(register uint8_t & ) {}
+template<> __attribute__((always_inline)) inline void _dc<-5>(register uint8_t & ) {}
+template<> __attribute__((always_inline)) inline void _dc<-4>(register uint8_t & ) {}
+template<> __attribute__((always_inline)) inline void _dc<-3>(register uint8_t & ) {}
+template<> __attribute__((always_inline)) inline void _dc<-2>(register uint8_t & ) {}
+template<> __attribute__((always_inline)) inline void _dc<-1>(register uint8_t & ) {}
+template<> __attribute__((always_inline)) inline void _dc< 0>(register uint8_t & ) {}
+template<> __attribute__((always_inline)) inline void _dc< 1>(register uint8_t & ) {asm __volatile__("mov r0,r0":::);}
+template<> __attribute__((always_inline)) inline void _dc< 2>(register uint8_t & ) {asm __volatile__("rjmp .+0":::);}
+template<> __attribute__((always_inline)) inline void _dc< 3>(register uint8_t & loopvar) { _dc<2>(loopvar); _dc<1>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc< 4>(register uint8_t & loopvar) { _dc<2>(loopvar); _dc<2>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc< 5>(register uint8_t & loopvar) { _dc<2>(loopvar); _dc<3>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc< 6>(register uint8_t & loopvar) { _dc<2>(loopvar); _dc<2>(loopvar); _dc<2>(loopvar);}
+template<> __attribute__((always_inline)) inline void _dc< 7>(register uint8_t & loopvar) { _dc<4>(loopvar); _dc<3>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc< 8>(register uint8_t & loopvar) { _dc<4>(loopvar); _dc<4>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc< 9>(register uint8_t & loopvar) { _dc<5>(loopvar); _dc<4>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<10>(register uint8_t & loopvar) { _dc<6>(loopvar); _dc<4>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<11>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<1>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<12>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<2>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<13>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<3>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<14>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<4>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<15>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<5>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<16>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<6>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<17>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<7>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<18>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<8>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<19>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<9>(loopvar); }
+template<> __attribute__((always_inline)) inline void _dc<20>(register uint8_t & loopvar) { _dc<10>(loopvar); _dc<10>(loopvar); }
+
+#define DINTPIN(T,ADJ,PINADJ) (T-(PINADJ+ADJ)>0) ? _dc<T-(PINADJ+ADJ)>(loopvar) : _dc<0>(loopvar);
+#define DINT(T,ADJ) if(AVR_PIN_CYCLES(DATA_PIN)==1) { DINTPIN(T,ADJ,1) } else { DINTPIN(T,ADJ,2); }
+#define D1(ADJ) DINT(T1,ADJ)
+#define D2(ADJ) DINT(T2,ADJ)
+#define D3(ADJ) DINT(T3,ADJ)
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Base template for clockless controllers.  These controllers have 3 control points in their cycle for each bit.  The first point
+// is where the line is raised hi.  The second point is where the line is dropped low for a zero.  The third point is where the
+// line is dropped low for a one.  T1, T2, and T3 correspond to the timings for those three in clock cycles.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if (!defined(NO_CORRECTION) || (NO_CORRECTION == 0)) && (FASTLED_ALLOW_INTERRUPTS == 0)
+static uint8_t gTimeErrorAccum256ths;
+#endif
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 10>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+	static_assert(T1 >= 2 && T2 >= 2 && T3 >= 3, "Not enough cycles - use a higher clock speed");
+
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+
+		mWait.wait();
+		cli();
+
+		showRGBInternal(pixels);
+
+		// Adjust the timer
+#if (!defined(NO_CORRECTION) || (NO_CORRECTION == 0)) && (FASTLED_ALLOW_INTERRUPTS == 0)
+        uint32_t microsTaken = (uint32_t)pixels.size() * (uint32_t)CLKS_TO_MICROS(24 * (T1 + T2 + T3));
+
+        // adust for approximate observed actal runtime (as of January 2015)
+        // roughly 9.6 cycles per pixel, which is 0.6us/pixel at 16MHz
+        // microsTaken += nLeds * 0.6 * CLKS_TO_MICROS(16);
+        microsTaken += scale16by8(pixels.size(),(0.6 * 256) + 1) * CLKS_TO_MICROS(16);
+
+        // if less than 1000us, there is NO timer impact,
+        // this is because the ONE interrupt that might come in while interrupts
+        // are disabled is queued up, and it will be serviced as soon as
+        // interrupts are re-enabled.
+        // This actually should technically also account for the runtime of the
+        // interrupt handler itself, but we're just not going to worry about that.
+        if( microsTaken > 1000) {
+
+            // Since up to one timer tick will be queued, we don't need
+            // to adjust the MS_COUNTER for that one.
+            microsTaken -= 1000;
+
+            // Now convert microseconds to 256ths of a second, approximately like this:
+            // 250ths = (us/4)
+            // 256ths = 250ths * (263/256);
+            uint16_t x256ths = microsTaken >> 2;
+            x256ths += scale16by8(x256ths,7);
+
+            x256ths += gTimeErrorAccum256ths;
+            MS_COUNTER += (x256ths >> 8);
+            gTimeErrorAccum256ths = x256ths & 0xFF;
+        }
+
+#if 0
+        // For pixel counts of 30 and under at 16Mhz, no correction is necessary.
+        // For pixel counts of 15 and under at 8Mhz, no correction is necessary.
+        //
+        // This code, below, is smaller, and quicker clock correction, which drifts much
+        // more significantly, but is a few bytes smaller.  Presented here for consideration
+        // as an alternate on the ATtiny, which can't have more than about 150 pixels MAX
+        // anyway, meaning that microsTaken will never be more than about 4,500, which fits in
+        // a 16-bit variable.  The difference between /1000 and /1024 only starts showing
+        // up in the range of about 100 pixels, so many ATtiny projects won't even
+        // see a clock difference due to the approximation there.
+		uint16_t microsTaken = (uint32_t)nLeds * (uint32_t)CLKS_TO_MICROS((24) * (T1 + T2 + T3));
+        MS_COUNTER += (microsTaken >> 10);
+#endif
+
+#endif
+
+		sei();
+		mWait.mark();
+	}
+#define USE_ASM_MACROS
+
+// The variables that our various asm statemetns use.  The same block of variables needs to be declared for
+// all the asm blocks because GCC is pretty stupid and it would clobber variables happily or optimize code away too aggressively
+#define ASM_VARS : /* write variables */				\
+				[count] "+x" (count),					\
+				[data] "+z" (data),						\
+				[b1] "+a" (b1),							\
+				[d0] "+r" (d0),							\
+				[d1] "+r" (d1),							\
+				[d2] "+r" (d2),							\
+				[loopvar] "+a" (loopvar),				\
+				[scale_base] "+a" (scale_base)			\
+				: /* use variables */					\
+				[ADV] "r" (advanceBy),					\
+				[b0] "a" (b0),							\
+				[hi] "r" (hi),							\
+				[lo] "r" (lo),							\
+				[s0] "r" (s0),					  		\
+				[s1] "r" (s1),							\
+				[s2] "r" (s2),							\
+				[e0] "r" (e0),							\
+				[e1] "r" (e1),							\
+				[e2] "r" (e2),							\
+				[PORT] "M" (FastPin<DATA_PIN>::port()-0x20),		\
+				[O0] "M" (RGB_BYTE0(RGB_ORDER)),		\
+				[O1] "M" (RGB_BYTE1(RGB_ORDER)),		\
+				[O2] "M" (RGB_BYTE2(RGB_ORDER))		\
+				: "cc" /* clobber registers */
+
+
+// Note: the code in the else in HI1/LO1 will be turned into an sts (2 cycle, 2 word) opcode
+// 1 cycle, write hi to the port
+#define HI1 FASTLED_SLOW_CLOCK_ADJUST if((int)(FastPin<DATA_PIN>::port())-0x20 < 64) { asm __volatile__("out %[PORT], %[hi]" ASM_VARS ); } else { *FastPin<DATA_PIN>::port()=hi; }
+// 1 cycle, write lo to the port
+#define LO1 if((int)(FastPin<DATA_PIN>::port())-0x20 < 64) { asm __volatile__("out %[PORT], %[lo]" ASM_VARS ); } else { *FastPin<DATA_PIN>::port()=lo; }
+
+// 2 cycles, sbrs on flipping the line to lo if we're pushing out a 0
+#define QLO2(B, N) asm __volatile__("sbrs %[" #B "], " #N ASM_VARS ); LO1;
+// load a byte from ram into the given var with the given offset
+#define LD2(B,O) asm __volatile__("ldd %[" #B "], Z + %[" #O "]\n\t" ASM_VARS );
+// 4 cycles - load a byte from ram into the scaling scratch space with the given offset, clear the target var, clear carry
+#define LDSCL4(B,O) asm __volatile__("ldd %[scale_base], Z + %[" #O "]\n\tclr %[" #B "]\n\tclc\n\t" ASM_VARS );
+
+#if (DITHER==1)
+// apply dithering value  before we do anything with scale_base
+#define PRESCALE4(D) asm __volatile__("cpse %[scale_base], __zero_reg__\n\t add %[scale_base],%[" #D "]\n\tbrcc L_%=\n\tldi %[scale_base], 0xFF\n\tL_%=:\n\t" ASM_VARS);
+
+// Do the add for the prescale
+#define PRESCALEA2(D) asm __volatile__("cpse %[scale_base], __zero_reg__\n\t add %[scale_base],%[" #D "]\n\t" ASM_VARS);
+
+// Do the clamp for the prescale, clear carry when we're done - NOTE: Must ensure carry flag state is preserved!
+#define PRESCALEB4(D) asm __volatile__("brcc L_%=\n\tldi %[scale_base], 0xFF\n\tL_%=:\n\tneg %[" #D "]\n\tCLC" ASM_VARS);
+
+// Clamp for prescale, increment data, since we won't ever wrap 65k, this also effectively clears carry for us
+#define PSBIDATA4(D) asm __volatile__("brcc L_%=\n\tldi %[scale_base], 0xFF\n\tL_%=:\n\tadd %A[data], %[ADV]\n\tadc %B[data], __zero_reg__\n\t" ASM_VARS);
+
+#else
+#define PRESCALE4(D) _dc<4>(loopvar);
+#define PRESCALEA2(D) _dc<2>(loopvar);
+#define PRESCALEB4(D) _dc<4>(loopvar);
+#define PSBIDATA4(D) asm __volatile__( "add %A[data], %[ADV]\n\tadc %B[data], __zero_reg__\n\trjmp .+0\n\t" ASM_VARS );
+#endif
+
+// 2 cycles - perform one step of the scaling (if a given bit is set in scale, add scale-base to the scratch space)
+#define _SCALE02(B, N) "sbrc %[s0], " #N "\n\tadd %[" #B "], %[scale_base]\n\t"
+#define _SCALE12(B, N) "sbrc %[s1], " #N "\n\tadd %[" #B "], %[scale_base]\n\t"
+#define _SCALE22(B, N) "sbrc %[s2], " #N "\n\tadd %[" #B "], %[scale_base]\n\t"
+#define SCALE02(B,N) asm __volatile__( _SCALE02(B,N) ASM_VARS );
+#define SCALE12(B,N) asm __volatile__( _SCALE12(B,N) ASM_VARS );
+#define SCALE22(B,N) asm __volatile__( _SCALE22(B,N) ASM_VARS );
+
+// 1 cycle - rotate right, pulling in from carry
+#define _ROR1(B) "ror %[" #B "]\n\t"
+#define ROR1(B) asm __volatile__( _ROR1(B) ASM_VARS);
+
+// 1 cycle, clear the carry bit
+#define _CLC1 "clc\n\t"
+#define CLC1 asm __volatile__( _CLC1 ASM_VARS );
+
+// 2 cycles, rortate right, pulling in from carry then clear the carry bit
+#define RORCLC2(B) asm __volatile__( _ROR1(B) _CLC1 ASM_VARS );
+
+// 4 cycles, rotate, clear carry, scale next bit
+#define RORSC04(B, N) asm __volatile__( _ROR1(B) _CLC1 _SCALE02(B, N) ASM_VARS );
+#define RORSC14(B, N) asm __volatile__( _ROR1(B) _CLC1 _SCALE12(B, N) ASM_VARS );
+#define RORSC24(B, N) asm __volatile__( _ROR1(B) _CLC1 _SCALE22(B, N) ASM_VARS );
+
+// 4 cycles, scale bit, rotate, clear carry
+#define SCROR04(B, N) asm __volatile__( _SCALE02(B,N) _ROR1(B) _CLC1 ASM_VARS );
+#define SCROR14(B, N) asm __volatile__( _SCALE12(B,N) _ROR1(B) _CLC1 ASM_VARS );
+#define SCROR24(B, N) asm __volatile__( _SCALE22(B,N) _ROR1(B) _CLC1 ASM_VARS );
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Loop life cycle
+
+// dither adjustment macro - should be kept in sync w/what's in stepDithering
+// #define ADJDITHER2(D, E) D = E - D;
+#define _NEGD1(D) "neg %[" #D "]\n\t"
+#define _ADJD1(D,E) "add %[" #D "], %[" #E "]\n\t"
+#define ADJDITHER2(D, E) asm __volatile__ ( _NEGD1(D) _ADJD1(D, E) ASM_VARS);
+#define ADDDE1(D, E) asm __volatile__ ( _ADJD1(D, E) ASM_VARS );
+
+// #define xstr(a) str(a)
+// #define str(a) #a
+// #define ADJDITHER2(D,E) asm __volatile__("subi %[" #D "], " xstr(DUSE) "\n\tand %[" #D "], %[" #E "]\n\t" ASM_VARS);
+
+// define the beginning of the loop
+#define LOOP asm __volatile__("1:" ASM_VARS );
+// define the end of the loop
+#define DONE asm __volatile__("2:" ASM_VARS );
+
+// 2 cycles - increment the data pointer
+#define IDATA2 asm __volatile__("add %A[data], %[ADV]\n\tadc %B[data], __zero_reg__\n\t"  ASM_VARS );
+#define IDATACLC3 asm __volatile__("add %A[data], %[ADV]\n\tadc %B[data], __zero_reg__\n\t" _CLC1  ASM_VARS );
+
+// 1 cycle mov
+#define _MOV1(B1, B2) "mov %[" #B1 "], %[" #B2 "]\n\t"
+
+#define MOV1(B1, B2) asm __volatile__( _MOV1(B1,B2) ASM_VARS );
+
+// 3 cycle mov - skip if scale fix is happening
+#if (FASTLED_SCALE8_FIXED == 1)
+#define _MOV_FIX03(B1, B2) "mov %[" #B1 "], %[scale_base]\n\tcpse %[s0], __zero_reg__\n\t" _MOV1(B1, B2)
+#define _MOV_FIX13(B1, B2) "mov %[" #B1 "], %[scale_base]\n\tcpse %[s1], __zero_reg__\n\t" _MOV1(B1, B2)
+#define _MOV_FIX23(B1, B2) "mov %[" #B1 "], %[scale_base]\n\tcpse %[s2], __zero_reg__\n\t" _MOV1(B1, B2)
+#else
+// if we haven't fixed scale8, just do the move and nop the 2 cycles that would be used to
+// do the fixed adjustment
+#define _MOV_FIX03(B1, B2) _MOV1(B1, B2) "rjmp .+0\n\t"
+#define _MOV_FIX13(B1, B2) _MOV1(B1, B2) "rjmp .+0\n\t"
+#define _MOV_FIX23(B1, B2) _MOV1(B1, B2) "rjmp .+0\n\t"
+#endif
+
+// 3 cycle mov + negate D for dither adjustment
+#define MOV_NEGD04(B1, B2, D) asm __volatile( _MOV_FIX03(B1, B2) _NEGD1(D) ASM_VARS );
+#define MOV_ADDDE04(B1, B2, D, E) asm __volatile( _MOV_FIX03(B1, B2) _ADJD1(D, E) ASM_VARS );
+#define MOV_NEGD14(B1, B2, D) asm __volatile( _MOV_FIX13(B1, B2) _NEGD1(D) ASM_VARS );
+#define MOV_ADDDE14(B1, B2, D, E) asm __volatile( _MOV_FIX13(B1, B2) _ADJD1(D, E) ASM_VARS );
+#define MOV_NEGD24(B1, B2, D) asm __volatile( _MOV_FIX23(B1, B2) _NEGD1(D) ASM_VARS );
+
+// 2 cycles - decrement the counter
+#define DCOUNT2 asm __volatile__("sbiw %[count], 1" ASM_VARS );
+// 2 cycles - jump to the beginning of the loop
+#define JMPLOOP2 asm __volatile__("rjmp 1b" ASM_VARS );
+// 2 cycles - jump out of the loop
+#define BRLOOP1 asm __volatile__("brne 3\n\trjmp 2f\n\t3:" ASM_VARS );
+
+// 5 cycles 2 sbiw, 3 for the breq/rjmp
+#define ENDLOOP5 asm __volatile__("sbiw %[count], 1\n\tbreq L_%=\n\trjmp 1b\n\tL_%=:\n\t" ASM_VARS);
+
+// NOP using the variables, forcing a move
+#define DNOP asm __volatile__("mov r0,r0" ASM_VARS);
+
+#define DADVANCE 3
+#define DUSE (0xFF - (DADVANCE-1))
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+	static void /*__attribute__((optimize("O0")))*/  /*__attribute__ ((always_inline))*/  showRGBInternal(PixelController<RGB_ORDER> & pixels)  {
+		uint8_t *data = (uint8_t*)pixels.mData;
+		data_ptr_t port = FastPin<DATA_PIN>::port();
+		data_t mask = FastPin<DATA_PIN>::mask();
+		uint8_t scale_base = 0;
+
+		// register uint8_t *end = data + nLeds;
+		data_t hi = *port | mask;
+		data_t lo = *port & ~mask;
+		*port = lo;
+
+		// the byte currently being written out
+		uint8_t b0 = 0;
+		// the byte currently being worked on to write the next out
+		uint8_t b1 = 0;
+
+		// Setup the pixel controller
+		pixels.preStepFirstByteDithering();
+
+		// pull the dithering/adjustment values out of the pixels object for direct asm access
+		uint8_t advanceBy = pixels.advanceBy();
+		uint16_t count = pixels.mLen;
+
+		uint8_t s0 = pixels.mScale.raw[RO(0)];
+		uint8_t s1 = pixels.mScale.raw[RO(1)];
+		uint8_t s2 = pixels.mScale.raw[RO(2)];
+#if (FASTLED_SCALE8_FIXED==1)
+		s0++; s1++; s2++;
+#endif
+		uint8_t d0 = pixels.d[RO(0)];
+		uint8_t d1 = pixels.d[RO(1)];
+		uint8_t d2 = pixels.d[RO(2)];
+		uint8_t e0 = pixels.e[RO(0)];
+		uint8_t e1 = pixels.e[RO(1)];
+		uint8_t e2 = pixels.e[RO(2)];
+
+		uint8_t loopvar=0;
+
+		// This has to be done in asm to keep gcc from messing up the asm code further down
+		b0 = data[RO(0)];
+		{
+			LDSCL4(b0,O0) 	PRESCALEA2(d0)
+			PRESCALEB4(d0)	SCALE02(b0,0)
+			RORSC04(b0,1) 	ROR1(b0) CLC1
+			SCROR04(b0,2)		SCALE02(b0,3)
+			RORSC04(b0,4) 	ROR1(b0) CLC1
+			SCROR04(b0,5) 	SCALE02(b0,6)
+			RORSC04(b0,7) 	ROR1(b0) CLC1
+			MOV_ADDDE04(b1,b0,d0,e0)
+			MOV1(b0,b1)
+		}
+
+		{
+			// while(--count)
+			{
+				// Loop beginning
+				DNOP;
+				LOOP;
+
+				// Sum of the clock counts across each row should be 10 for 8Mhz, WS2811
+				// The values in the D1/D2/D3 indicate how many cycles the previous column takes
+				// to allow things to line back up.
+				//
+				// While writing out byte 0, we're loading up byte 1, applying the dithering adjustment,
+				// then scaling it using 8 cycles of shift/add interleaved in between writing the bits
+				// out.  When doing byte 1, we're doing the above for byte 2.  When we're doing byte 2,
+				// we're cycling back around and doing the above for byte 0.
+
+				// Inline scaling - RGB ordering
+				// DNOP
+				HI1 D1(1) QLO2(b0, 7) LDSCL4(b1,O1) 	D2(4)	LO1	PRESCALEA2(d1)	D3(2)
+				HI1	D1(1) QLO2(b0, 6) PRESCALEB4(d1)	D2(4)	LO1	SCALE12(b1,0)	D3(2)
+				HI1 D1(1) QLO2(b0, 5) RORSC14(b1,1) 	D2(4)	LO1 RORCLC2(b1)		D3(2)
+				HI1 D1(1) QLO2(b0, 4) SCROR14(b1,2)		D2(4)	LO1 SCALE12(b1,3)	D3(2)
+				HI1 D1(1) QLO2(b0, 3) RORSC14(b1,4) 	D2(4)	LO1 RORCLC2(b1) 	D3(2)
+				HI1 D1(1) QLO2(b0, 2) SCROR14(b1,5) 	D2(4)	LO1 SCALE12(b1,6)	D3(2)
+				HI1 D1(1) QLO2(b0, 1) RORSC14(b1,7) 	D2(4)	LO1 RORCLC2(b1) 	D3(2)
+				HI1 D1(1) QLO2(b0, 0)
+				switch(XTRA0) {
+					case 4: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 3: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 2: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 1: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+				}
+				MOV_ADDDE14(b0,b1,d1,e1) D2(4) LO1 D3(0)
+
+				HI1 D1(1) QLO2(b0, 7) LDSCL4(b1,O2) 	D2(4)	LO1	PRESCALEA2(d2)	D3(2)
+				HI1	D1(1) QLO2(b0, 6) PSBIDATA4(d2)		D2(4)	LO1	SCALE22(b1,0)	D3(2)
+				HI1 D1(1) QLO2(b0, 5) RORSC24(b1,1) 	D2(4)	LO1 RORCLC2(b1) 	D3(2)
+				HI1 D1(1) QLO2(b0, 4) SCROR24(b1,2)		D2(4)	LO1 SCALE22(b1,3)	D3(2)
+				HI1 D1(1) QLO2(b0, 3) RORSC24(b1,4) 	D2(4)	LO1 RORCLC2(b1) 	D3(2)
+				HI1 D1(1) QLO2(b0, 2) SCROR24(b1,5) 	D2(4)	LO1 SCALE22(b1,6)	D3(2)
+				HI1 D1(1) QLO2(b0, 1) RORSC24(b1,7) 	D2(4)	LO1 RORCLC2(b1) 	D3(2)
+				HI1 D1(1) QLO2(b0, 0)
+				switch(XTRA0) {
+					case 4: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 3: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 2: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 1: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+				}
+
+				// Because Prescale on the middle byte also increments the data counter,
+				// we have to do both halves of updating d2 here - negating it (in the
+				// MOV_NEGD24 macro) and then adding E back into it
+				MOV_NEGD24(b0,b1,d2) D2(4) LO1 ADDDE1(d2,e2) D3(1)
+				HI1 D1(1) QLO2(b0, 7) LDSCL4(b1,O0) 	D2(4)	LO1	PRESCALEA2(d0)	D3(2)
+				HI1	D1(1) QLO2(b0, 6) PRESCALEB4(d0)	D2(4)	LO1	SCALE02(b1,0)	D3(2)
+				HI1 D1(1) QLO2(b0, 5) RORSC04(b1,1) 	D2(4)	LO1 RORCLC2(b1) 	D3(2)
+				HI1 D1(1) QLO2(b0, 4) SCROR04(b1,2)		D2(4)	LO1 SCALE02(b1,3)	D3(2)
+				HI1 D1(1) QLO2(b0, 3) RORSC04(b1,4) 	D2(4)	LO1 RORCLC2(b1)  	D3(2)
+				HI1 D1(1) QLO2(b0, 2) SCROR04(b1,5) 	D2(4)	LO1 SCALE02(b1,6)	D3(2)
+				HI1 D1(1) QLO2(b0, 1) RORSC04(b1,7) 	D2(4)	LO1 RORCLC2(b1) 	D3(2)
+				HI1 D1(1) QLO2(b0, 0)
+				switch(XTRA0) {
+					case 4: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 3: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 2: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+					case 1: D2(0) LO1 D3(0) HI1 D1(1) QLO2(b0,0)
+				}
+				MOV_ADDDE04(b0,b1,d0,e0) D2(4) LO1 D3(5)
+				ENDLOOP5
+			}
+			DONE;
+		}
+
+		#if (FASTLED_ALLOW_INTERRUPTS == 1)
+		// stop using the clock juggler
+		TCCR0A &= ~0x30;
+		#endif
+	}
+
+};
+
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/avr/fastled_avr.h
+++ b/libraries/FastLED-3.2.0/platforms/avr/fastled_avr.h
@@ -0,0 +1,13 @@
+#ifndef __INC_FASTLED_AVR_H
+#define __INC_FASTLED_AVR_H
+
+#include "fastpin_avr.h"
+#include "fastspi_avr.h"
+#include "clockless_trinket.h"
+
+// Default to using PROGMEM
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/avr/fastpin_avr.h
+++ b/libraries/FastLED-3.2.0/platforms/avr/fastpin_avr.h
@@ -0,0 +1,341 @@
+#ifndef __INC_FASTPIN_AVR_H
+#define __INC_FASTPIN_AVR_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be slightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+#define AVR_PIN_CYCLES(_PIN) ((((int)FastPin<_PIN>::port())-0x20 < 64) ? 1 : 2)
+
+/// Class definition for a Pin where we know the port registers at compile time for said pin.  This allows us to make
+/// a lot of optimizations, as the inlined hi/lo methods will devolve to a single io register write/bitset.
+template<uint8_t PIN, uint8_t _MASK, typename _PORT, typename _DDR, typename _PIN> class _AVRPIN {
+public:
+	typedef volatile uint8_t * port_ptr_t;
+	typedef uint8_t port_t;
+
+	inline static void setOutput() { _DDR::r() |= _MASK; }
+	inline static void setInput() { _DDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { _PORT::r() |= _MASK; }
+	inline static void lo() __attribute__ ((always_inline)) { _PORT::r() &= ~_MASK; }
+	inline static void set(register uint8_t val) __attribute__ ((always_inline)) { _PORT::r() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { _PIN::r() = _MASK; }
+
+	inline static void hi(register port_ptr_t /*port*/) __attribute__ ((always_inline)) { hi(); }
+	inline static void lo(register port_ptr_t /*port*/) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t /*port*/, register uint8_t val) __attribute__ ((always_inline)) { set(val); }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return _PORT::r() | _MASK; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return _PORT::r() & ~_MASK; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PORT::r(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+
+
+/// AVR definitions for pins.  Getting around  the fact that I can't pass GPIO register addresses in as template arguments by instead creating
+/// a custom type for each GPIO register with a single, static, aggressively inlined function that returns that specific GPIO register.  A similar
+/// trick is used a bit further below for the ARM GPIO registers (of which there are far more than on AVR!)
+typedef volatile uint8_t & reg8_t;
+#define _R(T) struct __gen_struct_ ## T
+#define _RD8(T) struct __gen_struct_ ## T { static inline reg8_t r() { return T; }};
+#define _IO(L) _RD8(DDR ## L); _RD8(PORT ## L); _RD8(PIN ## L);
+#define _DEFPIN_AVR(_PIN, MASK, L) template<> class FastPin<_PIN> : public _AVRPIN<_PIN, MASK, _R(PORT ## L), _R(DDR ## L), _R(PIN ## L)> {};
+
+#if defined(__AVR_ATtiny85__) || defined(__AVR_ATtiny45__)
+_IO(B);
+
+#define MAX_PIN 5
+
+_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B); _DEFPIN_AVR(3, 0x08, B);
+_DEFPIN_AVR(4, 0x10, B); _DEFPIN_AVR(5, 0x20, B);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(__AVR_ATtiny841__) || defined(__AVR_ATtiny441__)
+#define MAX_PIN 11
+_IO(A); _IO(B);
+
+_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B);
+_DEFPIN_AVR(3, 0x80, A); _DEFPIN_AVR(4, 0x40, A); _DEFPIN_AVR(5, 0x20, A);
+_DEFPIN_AVR(6, 0x10, A); _DEFPIN_AVR(7, 0x08, A); _DEFPIN_AVR(8, 0x04, A);
+_DEFPIN_AVR(9, 0x02, A); _DEFPIN_AVR(10, 0x01, A); _DEFPIN_AVR(11, 0x08, B);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_AVR_DIGISPARK) // digispark pin layout
+#define MAX_PIN 5
+#define HAS_HARDWARE_PIN_SUPPORT 1
+_IO(A); _IO(B);
+
+_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B);
+_DEFPIN_AVR(3, 0x80, A); _DEFPIN_AVR(4, 0x40, A); _DEFPIN_AVR(5, 0x20, A);
+
+#elif defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__)
+_IO(A); _IO(B);
+
+#define MAX_PIN 10
+
+_DEFPIN_AVR(0, 0x01, A); _DEFPIN_AVR(1, 0x02, A); _DEFPIN_AVR(2, 0x04, A); _DEFPIN_AVR(3, 0x08, A);
+_DEFPIN_AVR(4, 0x10, A); _DEFPIN_AVR(5, 0x20, A); _DEFPIN_AVR(6, 0x40, A); _DEFPIN_AVR(7, 0x80, A);
+_DEFPIN_AVR(8, 0x04, B); _DEFPIN_AVR(9, 0x02, B); _DEFPIN_AVR(10, 0x01, B);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_AVR_DIGISPARKPRO)
+
+_IO(A); _IO(B);
+#define MAX_PIN 12
+
+_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B); _DEFPIN_AVR(3, 0x20, B);
+_DEFPIN_AVR(4, 0x08, B); _DEFPIN_AVR(5, 0x80, A); _DEFPIN_AVR(6, 0x01, A); _DEFPIN_AVR(7, 0x02, A);
+_DEFPIN_AVR(8, 0x04, A); _DEFPIN_AVR(9, 0x08, A); _DEFPIN_AVR(10, 0x10, A); _DEFPIN_AVR(11, 0x20, A);
+_DEFPIN_AVR(12, 0x40, A);
+
+#elif defined(__AVR_ATtiny167__) || defined(__AVR_ATtiny87__)
+_IO(A); _IO(B);
+
+#define MAX_PIN 15
+
+_DEFPIN_AVR(0, 0x01, A);  _DEFPIN_AVR(1, 0x02, A);   _DEFPIN_AVR(2, 0x04, A);  _DEFPIN_AVR(3, 0x08, A);
+_DEFPIN_AVR(4, 0x10, A);  _DEFPIN_AVR(5, 0x20, A);   _DEFPIN_AVR(6, 0x40, A);  _DEFPIN_AVR(7, 0x80, A);
+_DEFPIN_AVR(8, 0x01, B);  _DEFPIN_AVR(9, 0x02, B);   _DEFPIN_AVR(10, 0x04, B); _DEFPIN_AVR(11, 0x08, B);
+_DEFPIN_AVR(12, 0x10, B); _DEFPIN_AVR(13, 0x20, B); _DEFPIN_AVR(14, 0x40, B); _DEFPIN_AVR(15, 0x80, B);
+
+#define SPI_DATA 4
+#define SPI_CLOCK 5
+#define AVR_HARDWARE_SPI 1
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+#elif defined(ARDUINO_HOODLOADER2) && (defined(__AVR_ATmega32U2__) || defined(__AVR_ATmega16U2__) || defined(__AVR_ATmega8U2__)) || defined(__AVR_AT90USB82__) || defined(__AVR_AT90USB162__)
+
+_IO(D); _IO(B); _IO(C);
+
+#define MAX_PIN 20
+
+_DEFPIN_AVR( 0, 0x01, B); _DEFPIN_AVR( 1, 0x02, B); _DEFPIN_AVR( 2, 0x04, B); _DEFPIN_AVR( 3, 0x08, B);
+_DEFPIN_AVR( 4, 0x10, B); _DEFPIN_AVR( 5, 0x20, B); _DEFPIN_AVR( 6, 0x40, B); _DEFPIN_AVR( 7, 0x80, B);
+
+_DEFPIN_AVR( 8, 0x80, C); _DEFPIN_AVR( 9, 0x40, C); _DEFPIN_AVR( 10, 0x20,C); _DEFPIN_AVR( 11, 0x10, C);
+_DEFPIN_AVR( 12, 0x04, C); _DEFPIN_AVR( 13, 0x01, D); _DEFPIN_AVR( 14, 0x02, D); _DEFPIN_AVR(15, 0x04, D);
+_DEFPIN_AVR( 16, 0x08, D); _DEFPIN_AVR( 17, 0x10, D); _DEFPIN_AVR( 18, 0x20, D); _DEFPIN_AVR( 19, 0x40, D);
+_DEFPIN_AVR( 20, 0x80, D);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+// #define SPI_DATA 2
+// #define SPI_CLOCK 1
+// #define AVR_HARDWARE_SPI 1
+
+#elif defined(IS_BEAN)
+
+// Accelerated port definitions for arduino avrs
+_IO(D); _IO(B); _IO(C);
+
+#define MAX_PIN 19
+_DEFPIN_AVR( 0, 0x40, D); _DEFPIN_AVR( 1, 0x02, B); _DEFPIN_AVR( 2, 0x04, B); _DEFPIN_AVR( 3, 0x08, B);
+_DEFPIN_AVR( 4, 0x10, B); _DEFPIN_AVR( 5, 0x20, B); _DEFPIN_AVR( 6, 0x01, D); _DEFPIN_AVR( 7, 0x80, D);
+_DEFPIN_AVR( 8, 0x01, B); _DEFPIN_AVR( 9, 0x02, D); _DEFPIN_AVR(10, 0x04, D); _DEFPIN_AVR(11, 0x08, D);
+_DEFPIN_AVR(12, 0x10, D); _DEFPIN_AVR(13, 0x20, D); _DEFPIN_AVR(14, 0x01, C); _DEFPIN_AVR(15, 0x02, C);
+_DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _DEFPIN_AVR(19, 0x20, C);
+
+#define SPI_DATA 3
+#define SPI_CLOCK 5
+#define SPI_SELECT 2
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#ifndef __AVR_ATmega8__
+#define SPI_UART0_DATA 9
+#define SPI_UART0_CLOCK 12
+#endif
+
+#elif defined(__AVR_ATmega328P__)  || defined(__AVR_ATmega328__) || defined(__AVR_ATmega168__) || defined(__AVR_ATmega168P__) || defined(__AVR_ATmega8__)
+// Accelerated port definitions for arduino avrs
+_IO(D); _IO(B); _IO(C);
+
+#define MAX_PIN 19
+_DEFPIN_AVR( 0, 0x01, D); _DEFPIN_AVR( 1, 0x02, D); _DEFPIN_AVR( 2, 0x04, D); _DEFPIN_AVR( 3, 0x08, D);
+_DEFPIN_AVR( 4, 0x10, D); _DEFPIN_AVR( 5, 0x20, D); _DEFPIN_AVR( 6, 0x40, D); _DEFPIN_AVR( 7, 0x80, D);
+_DEFPIN_AVR( 8, 0x01, B); _DEFPIN_AVR( 9, 0x02, B); _DEFPIN_AVR(10, 0x04, B); _DEFPIN_AVR(11, 0x08, B);
+_DEFPIN_AVR(12, 0x10, B); _DEFPIN_AVR(13, 0x20, B); _DEFPIN_AVR(14, 0x01, C); _DEFPIN_AVR(15, 0x02, C);
+_DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _DEFPIN_AVR(19, 0x20, C);
+
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+#define SPI_SELECT 10
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#ifndef __AVR_ATmega8__
+#define SPI_UART0_DATA 1
+#define SPI_UART0_CLOCK 4
+#endif
+
+#elif defined(__AVR_ATmega1284P__)
+
+_IO(A); _IO(B); _IO(C); _IO(D);
+
+#define MAX_PIN 31
+_DEFPIN_AVR(0, 1<<0, B); _DEFPIN_AVR(1, 1<<1, B); _DEFPIN_AVR(2, 1<<2, B); _DEFPIN_AVR(3, 1<<3, B);
+_DEFPIN_AVR(4, 1<<4, B); _DEFPIN_AVR(5, 1<<5, B); _DEFPIN_AVR(6, 1<<6, B); _DEFPIN_AVR(7, 1<<7, B);
+_DEFPIN_AVR(8, 1<<0, D); _DEFPIN_AVR(9, 1<<1, D); _DEFPIN_AVR(10, 1<<2, D); _DEFPIN_AVR(11, 1<<3, D);
+_DEFPIN_AVR(12, 1<<4, D); _DEFPIN_AVR(13, 1<<5, D); _DEFPIN_AVR(14, 1<<6, D); _DEFPIN_AVR(15, 1<<7, D);
+_DEFPIN_AVR(16, 1<<0, C); _DEFPIN_AVR(17, 1<<1, C); _DEFPIN_AVR(18, 1<<2, C); _DEFPIN_AVR(19, 1<<3, C);
+_DEFPIN_AVR(20, 1<<4, C); _DEFPIN_AVR(21, 1<<5, C); _DEFPIN_AVR(22, 1<<6, C); _DEFPIN_AVR(23, 1<<7, C);
+_DEFPIN_AVR(24, 1<<0, A); _DEFPIN_AVR(25, 1<<1, A); _DEFPIN_AVR(26, 1<<2, A); _DEFPIN_AVR(27, 1<<3, A);
+_DEFPIN_AVR(28, 1<<4, A); _DEFPIN_AVR(29, 1<<5, A); _DEFPIN_AVR(30, 1<<6, A); _DEFPIN_AVR(31, 1<<7, A);
+
+#define SPI_DATA 5
+#define SPI_CLOCK 7
+#define SPI_SELECT 4
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif  defined(__AVR_ATmega128RFA1__) || defined(__AVR_ATmega256RFR2__)
+
+// AKA the Pinoccio
+
+_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
+
+_DEFPIN_AVR( 0, 1<<0, E); _DEFPIN_AVR( 1, 1<<1, E); _DEFPIN_AVR( 2, 1<<7, B); _DEFPIN_AVR( 3, 1<<3, E);
+_DEFPIN_AVR( 4, 1<<4, E); _DEFPIN_AVR( 5, 1<<5, E); _DEFPIN_AVR( 6, 1<<2, E); _DEFPIN_AVR( 7, 1<<6, E);
+_DEFPIN_AVR( 8, 1<<5, D); _DEFPIN_AVR( 9, 1<<0, B); _DEFPIN_AVR(10, 1<<2, B); _DEFPIN_AVR(11, 1<<3, B);
+_DEFPIN_AVR(12, 1<<1, B); _DEFPIN_AVR(13, 1<<2, D); _DEFPIN_AVR(14, 1<<3, D); _DEFPIN_AVR(15, 1<<0, D);
+_DEFPIN_AVR(16, 1<<1, D); _DEFPIN_AVR(17, 1<<4, D); _DEFPIN_AVR(18, 1<<7, E); _DEFPIN_AVR(19, 1<<6, D);
+_DEFPIN_AVR(20, 1<<7, D); _DEFPIN_AVR(21, 1<<4, B); _DEFPIN_AVR(22, 1<<5, B); _DEFPIN_AVR(23, 1<<6, B);
+_DEFPIN_AVR(24, 1<<0, F); _DEFPIN_AVR(25, 1<<1, F); _DEFPIN_AVR(26, 1<<2, F); _DEFPIN_AVR(27, 1<<3, F);
+_DEFPIN_AVR(28, 1<<4, F); _DEFPIN_AVR(29, 1<<5, F); _DEFPIN_AVR(30, 1<<6, F); _DEFPIN_AVR(31, 1<<7, F);
+
+#define SPI_DATA 10
+#define SPI_CLOCK 12
+#define SPI_SELECT 9
+
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(__AVR_ATmega1280__) || defined(__AVR_ATmega2560__)
+// megas
+
+_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F); _IO(G); _IO(H); _IO(J); _IO(K); _IO(L);
+
+#define MAX_PIN 69
+_DEFPIN_AVR(0, 1, E); _DEFPIN_AVR(1, 2, E); _DEFPIN_AVR(2, 16, E); _DEFPIN_AVR(3, 32, E);
+_DEFPIN_AVR(4, 32, G); _DEFPIN_AVR(5, 8, E); _DEFPIN_AVR(6, 8, H); _DEFPIN_AVR(7, 16, H);
+_DEFPIN_AVR(8, 32, H); _DEFPIN_AVR(9, 64, H); _DEFPIN_AVR(10, 16, B); _DEFPIN_AVR(11, 32, B);
+_DEFPIN_AVR(12, 64, B); _DEFPIN_AVR(13, 128, B); _DEFPIN_AVR(14, 2, J); _DEFPIN_AVR(15, 1, J);
+_DEFPIN_AVR(16, 2, H); _DEFPIN_AVR(17, 1, H); _DEFPIN_AVR(18, 8, D); _DEFPIN_AVR(19, 4, D);
+_DEFPIN_AVR(20, 2, D); _DEFPIN_AVR(21, 1, D); _DEFPIN_AVR(22, 1, A); _DEFPIN_AVR(23, 2, A);
+_DEFPIN_AVR(24, 4, A); _DEFPIN_AVR(25, 8, A); _DEFPIN_AVR(26, 16, A); _DEFPIN_AVR(27, 32, A);
+_DEFPIN_AVR(28, 64, A); _DEFPIN_AVR(29, 128, A); _DEFPIN_AVR(30, 128, C); _DEFPIN_AVR(31, 64, C);
+_DEFPIN_AVR(32, 32, C); _DEFPIN_AVR(33, 16, C); _DEFPIN_AVR(34, 8, C); _DEFPIN_AVR(35, 4, C);
+_DEFPIN_AVR(36, 2, C); _DEFPIN_AVR(37, 1, C); _DEFPIN_AVR(38, 128, D); _DEFPIN_AVR(39, 4, G);
+_DEFPIN_AVR(40, 2, G); _DEFPIN_AVR(41, 1, G); _DEFPIN_AVR(42, 128, L); _DEFPIN_AVR(43, 64, L);
+_DEFPIN_AVR(44, 32, L); _DEFPIN_AVR(45, 16, L); _DEFPIN_AVR(46, 8, L); _DEFPIN_AVR(47, 4, L);
+_DEFPIN_AVR(48, 2, L); _DEFPIN_AVR(49, 1, L); _DEFPIN_AVR(50, 8, B); _DEFPIN_AVR(51, 4, B);
+_DEFPIN_AVR(52, 2, B); _DEFPIN_AVR(53, 1, B); _DEFPIN_AVR(54, 1, F); _DEFPIN_AVR(55, 2, F);
+_DEFPIN_AVR(56, 4, F); _DEFPIN_AVR(57, 8, F); _DEFPIN_AVR(58, 16, F); _DEFPIN_AVR(59, 32, F);
+_DEFPIN_AVR(60, 64, F); _DEFPIN_AVR(61, 128, F); _DEFPIN_AVR(62, 1, K); _DEFPIN_AVR(63, 2, K);
+_DEFPIN_AVR(64, 4, K); _DEFPIN_AVR(65, 8, K); _DEFPIN_AVR(66, 16, K); _DEFPIN_AVR(67, 32, K);
+_DEFPIN_AVR(68, 64, K); _DEFPIN_AVR(69, 128, K);
+
+#define SPI_DATA 51
+#define SPI_CLOCK 52
+#define SPI_SELECT 53
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+// Leonardo, teensy, blinkm
+#elif defined(__AVR_ATmega32U4__) && defined(CORE_TEENSY)
+
+// teensy defs
+_IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
+
+#define MAX_PIN 23
+_DEFPIN_AVR(0, 1, B); _DEFPIN_AVR(1, 2, B); _DEFPIN_AVR(2, 4, B); _DEFPIN_AVR(3, 8, B);
+_DEFPIN_AVR(4, 128, B); _DEFPIN_AVR(5, 1, D); _DEFPIN_AVR(6, 2, D); _DEFPIN_AVR(7, 4, D);
+_DEFPIN_AVR(8, 8, D); _DEFPIN_AVR(9, 64, C); _DEFPIN_AVR(10, 128, C); _DEFPIN_AVR(11, 64, D);
+_DEFPIN_AVR(12, 128, D); _DEFPIN_AVR(13, 16, B); _DEFPIN_AVR(14, 32, B); _DEFPIN_AVR(15, 64, B);
+_DEFPIN_AVR(16, 128, F); _DEFPIN_AVR(17, 64, F); _DEFPIN_AVR(18, 32, F); _DEFPIN_AVR(19, 16, F);
+_DEFPIN_AVR(20, 2, F); _DEFPIN_AVR(21, 1, F); _DEFPIN_AVR(22, 16, D); _DEFPIN_AVR(23, 32, D);
+
+#define SPI_DATA 2
+#define SPI_CLOCK 1
+#define SPI_SELECT 0
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+// PD3/PD5
+#define SPI_UART1_DATA 8
+#define SPI_UART1_CLOCK 23
+
+#elif defined(__AVR_AT90USB646__) || defined(__AVR_AT90USB1286__)
+// teensy++ 2 defs
+
+_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
+
+#define MAX_PIN 45
+_DEFPIN_AVR(0, 1, D); _DEFPIN_AVR(1, 2, D); _DEFPIN_AVR(2, 4, D); _DEFPIN_AVR(3, 8, D);
+_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 32, D); _DEFPIN_AVR(6, 64, D); _DEFPIN_AVR(7, 128, D);
+_DEFPIN_AVR(8, 1, E); _DEFPIN_AVR(9, 2, E); _DEFPIN_AVR(10, 1, C); _DEFPIN_AVR(11, 2, C);
+_DEFPIN_AVR(12, 4, C); _DEFPIN_AVR(13, 8, C); _DEFPIN_AVR(14, 16, C); _DEFPIN_AVR(15, 32, C);
+_DEFPIN_AVR(16, 64, C); _DEFPIN_AVR(17, 128, C); _DEFPIN_AVR(18, 64, E); _DEFPIN_AVR(19, 128, E);
+_DEFPIN_AVR(20, 1, B); _DEFPIN_AVR(21, 2, B); _DEFPIN_AVR(22, 4, B); _DEFPIN_AVR(23, 8, B);
+_DEFPIN_AVR(24, 16, B); _DEFPIN_AVR(25, 32, B); _DEFPIN_AVR(26, 64, B); _DEFPIN_AVR(27, 128, B);
+_DEFPIN_AVR(28, 1, A); _DEFPIN_AVR(29, 2, A); _DEFPIN_AVR(30, 4, A); _DEFPIN_AVR(31, 8, A);
+_DEFPIN_AVR(32, 16, A); _DEFPIN_AVR(33, 32, A); _DEFPIN_AVR(34, 64, A); _DEFPIN_AVR(35, 128, A);
+_DEFPIN_AVR(36, 16, E); _DEFPIN_AVR(37, 32, E); _DEFPIN_AVR(38, 1, F); _DEFPIN_AVR(39, 2, F);
+_DEFPIN_AVR(40, 4, F); _DEFPIN_AVR(41, 8, F); _DEFPIN_AVR(42, 16, F); _DEFPIN_AVR(43, 32, F);
+_DEFPIN_AVR(44, 64, F); _DEFPIN_AVR(45, 128, F);
+
+#define SPI_DATA 22
+#define SPI_CLOCK 21
+#define SPI_SELECT 20
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+// PD3/PD5
+#define SPI_UART1_DATA 3
+#define SPI_UART1_CLOCK 5
+
+
+#elif defined(__AVR_ATmega32U4__)
+
+// leonard defs
+_IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
+
+#define MAX_PIN 30
+_DEFPIN_AVR(0, 4, D); _DEFPIN_AVR(1, 8, D); _DEFPIN_AVR(2, 2, D); _DEFPIN_AVR(3, 1, D);
+_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 64, C); _DEFPIN_AVR(6, 128, D); _DEFPIN_AVR(7, 64, E);
+_DEFPIN_AVR(8, 16, B); _DEFPIN_AVR(9, 32, B); _DEFPIN_AVR(10, 64, B); _DEFPIN_AVR(11, 128, B);
+_DEFPIN_AVR(12, 64, D); _DEFPIN_AVR(13, 128, C); _DEFPIN_AVR(14, 8, B); _DEFPIN_AVR(15, 2, B);
+_DEFPIN_AVR(16, 4, B); _DEFPIN_AVR(17, 1, B); _DEFPIN_AVR(18, 128, F); _DEFPIN_AVR(19, 64, F);
+_DEFPIN_AVR(20, 32, F); _DEFPIN_AVR(21, 16, F); _DEFPIN_AVR(22, 2, F); _DEFPIN_AVR(23, 1, F);
+_DEFPIN_AVR(24, 16, D); _DEFPIN_AVR(25, 128, D); _DEFPIN_AVR(26, 16, B); _DEFPIN_AVR(27, 32, B);
+_DEFPIN_AVR(28, 64, B); _DEFPIN_AVR(29, 64, D); _DEFPIN_AVR(30, 32, D);
+
+#define SPI_DATA 16
+#define SPI_CLOCK 15
+#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+// PD3/PD5
+#define SPI_UART1_DATA 1
+#define SPI_UART1_CLOCK 30
+
+
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+#endif // __INC_FASTPIN_AVR_H
--- a/libraries/FastLED-3.2.0/platforms/avr/fastspi_avr.h
+++ b/libraries/FastLED-3.2.0/platforms/avr/fastspi_avr.h
@@ -0,0 +1,505 @@
+#ifndef __INC_FASTSPI_AVR_H
+#define __INC_FASTSPI_AVR_H
+
+FASTLED_NAMESPACE_BEGIN
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Hardware SPI support using USART registers and friends
+//
+// TODO: Complete/test implementation - right now this doesn't work
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// uno/mini/duemilanove
+#if defined(AVR_HARDWARE_SPI)
+
+#if defined(UBRR1)
+
+#ifndef UCPHA1
+#define UCPHA1 1
+#endif
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class AVRUSART1SPIOutput {
+	Selectable *m_pSelect;
+
+public:
+	AVRUSART1SPIOutput() { m_pSelect = NULL; }
+	AVRUSART1SPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+	void init() {
+		UBRR1 = 0;
+
+		/* Set MSPI mode of operation and SPI data mode 0. */
+		UCSR1C = (1<<UMSEL11)|(1<<UMSEL10)|(0<<UCPHA1)|(0<<UCPOL1);
+		/* Enable receiver and transmitter. */
+		UCSR1B = (1<<RXEN1)|(1<<TXEN1);
+
+		FastPin<_CLOCK_PIN>::setOutput();
+		FastPin<_DATA_PIN>::setOutput();
+
+		// must be done last, see page 206
+		setSPIRate();
+	}
+
+	void setSPIRate() {
+		if(_SPI_CLOCK_DIVIDER > 2) {
+			UBRR1 = (_SPI_CLOCK_DIVIDER/2)-1;
+		} else {
+			UBRR1 = 0;
+		}
+	}
+
+
+	static void stop() {
+		// TODO: stop the uart spi output
+	}
+
+	static bool shouldWait(bool wait = false) __attribute__((always_inline)) {
+		static bool sWait=false;
+		if(sWait) {
+			sWait = wait; return true;
+		} else {
+			sWait = wait; return false;
+		}
+		// return true;
+	}
+	static void wait() __attribute__((always_inline)) {
+		if(shouldWait()) {
+			while(!(UCSR1A & (1<<UDRE1)));
+		}
+	}
+	static void waitFully() __attribute__((always_inline)) { wait(); }
+
+	static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); }
+
+	static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); UDR1=b;  shouldWait(true); }
+	static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { UDR1=b; shouldWait(true); wait(); }
+	static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { UDR1=b; shouldWait(true); }
+
+
+	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+		if(b && (1 << BIT)) {
+			FastPin<_DATA_PIN>::hi();
+		} else {
+			FastPin<_DATA_PIN>::lo();
+		}
+
+		FastPin<_CLOCK_PIN>::hi();
+		FastPin<_CLOCK_PIN>::lo();
+	}
+
+	void enable_pins() { }
+	void disable_pins() { }
+
+	void select() {
+		if(m_pSelect != NULL) {
+			m_pSelect->select();
+		}
+		enable_pins();
+		setSPIRate();
+	}
+
+	void release() {
+		if(m_pSelect != NULL) {
+			m_pSelect->release();
+		}
+		disable_pins();
+	}
+
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) {
+			writeByte(value);
+		}
+	}
+
+	void writeBytesValue(uint8_t value, int len) {
+		//setSPIRate();
+		select();
+		while(len--) {
+			writeByte(value);
+		}
+		release();
+	}
+
+	// Write a block of n uint8_ts out
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		//setSPIRate();
+		uint8_t *end = data + len;
+		select();
+		while(data != end) {
+			// a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM)
+			writeByte(D::adjust(*data++)); delaycycles<3>();
+		}
+		release();
+	}
+
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		//setSPIRate();
+		int len = pixels.mLen;
+
+		select();
+		while(pixels.has(1)) {
+			if(FLAGS & FLAG_START_BIT) {
+				writeBit<0>(1);
+				writeBytePostWait(D::adjust(pixels.loadAndScale0()));
+				writeBytePostWait(D::adjust(pixels.loadAndScale1()));
+				writeBytePostWait(D::adjust(pixels.loadAndScale2()));
+			} else {
+				writeByte(D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+			}
+
+			pixels.advanceData();
+			pixels.stepDithering();
+		}
+		D::postBlock(len);
+		release();
+	}
+};
+#endif
+
+#if defined(UBRR0)
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class AVRUSART0SPIOutput {
+	Selectable *m_pSelect;
+
+public:
+	AVRUSART0SPIOutput() { m_pSelect = NULL; }
+	AVRUSART0SPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+	void init() {
+		UBRR0 = 0;
+
+		/* Set MSPI mode of operation and SPI data mode 0. */
+		UCSR0C = (1<<UMSEL01)|(1<<UMSEL00)/*|(0<<UCPHA0)*/|(0<<UCPOL0);
+		/* Enable receiver and transmitter. */
+		UCSR0B = (1<<RXEN0)|(1<<TXEN0);
+
+		FastPin<_CLOCK_PIN>::setOutput();
+		FastPin<_DATA_PIN>::setOutput();
+
+
+		// must be done last, see page 206
+		setSPIRate();
+	}
+
+	void setSPIRate() {
+		if(_SPI_CLOCK_DIVIDER > 2) {
+			UBRR0 = (_SPI_CLOCK_DIVIDER/2)-1;
+		} else {
+			UBRR0 = 0;
+		}
+	}
+
+	static void stop() {
+		// TODO: stop the uart spi output
+	}
+
+	static bool shouldWait(bool wait = false) __attribute__((always_inline)) {
+		static bool sWait=false;
+		if(sWait) {
+			sWait = wait; return true;
+		} else {
+			sWait = wait; return false;
+		}
+		// return true;
+	}
+	static void wait() __attribute__((always_inline)) {
+		if(shouldWait()) {
+			while(!(UCSR0A & (1<<UDRE0)));
+		}
+	}
+	static void waitFully() __attribute__((always_inline)) { wait(); }
+
+	static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); }
+
+	static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); UDR0=b;  shouldWait(true); }
+	static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { UDR0=b; shouldWait(true); wait(); }
+	static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { UDR0=b; shouldWait(true); }
+
+
+	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+		if(b && (1 << BIT)) {
+			FastPin<_DATA_PIN>::hi();
+		} else {
+			FastPin<_DATA_PIN>::lo();
+		}
+
+		FastPin<_CLOCK_PIN>::hi();
+		FastPin<_CLOCK_PIN>::lo();
+	}
+
+	void enable_pins() { }
+	void disable_pins() { }
+
+	void select() {
+		if(m_pSelect != NULL) {
+			m_pSelect->select();
+		}
+		enable_pins();
+		setSPIRate();
+	}
+
+		void release() {
+			if(m_pSelect != NULL) {
+				m_pSelect->release();
+			}
+			disable_pins();
+		}
+
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) {
+			writeByte(value);
+		}
+	}
+
+	void writeBytesValue(uint8_t value, int len) {
+		//setSPIRate();
+		select();
+		while(len--) {
+			writeByte(value);
+		}
+		release();
+	}
+
+	// Write a block of n uint8_ts out
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		//setSPIRate();
+		uint8_t *end = data + len;
+		select();
+		while(data != end) {
+			// a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM)
+			writeByte(D::adjust(*data++)); delaycycles<3>();
+		}
+		release();
+	}
+
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		//setSPIRate();
+		int len = pixels.mLen;
+
+		select();
+		while(pixels.has(1)) {
+			if(FLAGS & FLAG_START_BIT) {
+				writeBit<0>(1);
+				writeBytePostWait(D::adjust(pixels.loadAndScale0()));
+				writeBytePostWait(D::adjust(pixels.loadAndScale1()));
+				writeBytePostWait(D::adjust(pixels.loadAndScale2()));
+			} else {
+				writeByte(D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+			}
+
+			pixels.advanceData();
+			pixels.stepDithering();
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+};
+
+#endif
+
+
+#if defined(SPSR)
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Hardware SPI support using SPDR registers and friends
+//
+// Technically speaking, this uses the AVR SPI registers.  This will work on the Teensy 3.0 because Paul made a set of compatability
+// classes that map the AVR SPI registers to ARM's, however this caps the performance of output.
+//
+// TODO: implement ARMHardwareSPIOutput
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class AVRHardwareSPIOutput {
+	Selectable *m_pSelect;
+	bool mWait;
+public:
+	AVRHardwareSPIOutput() { m_pSelect = NULL; mWait = false;}
+	AVRHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+	void setSPIRate() {
+		SPCR &= ~ ( (1<<SPR1) | (1<<SPR0) ); 	// clear out the prescalar bits
+
+	    bool b2x = false;
+
+	    if(_SPI_CLOCK_DIVIDER >= 128) { SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); }
+	    else if(_SPI_CLOCK_DIVIDER >= 64) { SPCR |= (1<<SPR1);}
+	    else if(_SPI_CLOCK_DIVIDER >= 32) { SPCR |= (1<<SPR1); b2x = true;  }
+	    else if(_SPI_CLOCK_DIVIDER >= 16) { SPCR |= (1<<SPR0); }
+	    else if(_SPI_CLOCK_DIVIDER >= 8) { SPCR |= (1<<SPR0); b2x = true; }
+	    else if(_SPI_CLOCK_DIVIDER >= 4) { /* do nothing - default rate */ }
+	    else { b2x = true; }
+
+	    if(b2x) { SPSR |= (1<<SPI2X); }
+	    else { SPSR &= ~ (1<<SPI2X); }
+	}
+
+	void init() {
+		volatile uint8_t clr;
+
+		// set the pins to output
+		FastPin<_DATA_PIN>::setOutput();
+		FastPin<_CLOCK_PIN>::setOutput();
+#ifdef SPI_SELECT
+		// Make sure the slave select line is set to output, or arduino will block us
+		FastPin<SPI_SELECT>::setOutput();
+		FastPin<SPI_SELECT>::lo();
+#endif
+
+		SPCR |= ((1<<SPE) | (1<<MSTR) ); 		// enable SPI as master
+		SPCR &= ~ ( (1<<SPR1) | (1<<SPR0) ); 	// clear out the prescalar bits
+
+		clr = SPSR; // clear SPI status register
+		clr = SPDR; // clear SPI data register
+		clr;
+
+	    bool b2x = false;
+
+	    if(_SPI_CLOCK_DIVIDER >= 128) { SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); }
+	    else if(_SPI_CLOCK_DIVIDER >= 64) { SPCR |= (1<<SPR1);}
+	    else if(_SPI_CLOCK_DIVIDER >= 32) { SPCR |= (1<<SPR1); b2x = true;  }
+	    else if(_SPI_CLOCK_DIVIDER >= 16) { SPCR |= (1<<SPR0); }
+	    else if(_SPI_CLOCK_DIVIDER >= 8) { SPCR |= (1<<SPR0); b2x = true; }
+	    else if(_SPI_CLOCK_DIVIDER >= 4) { /* do nothing - default rate */ }
+	    else { b2x = true; }
+
+	    if(b2x) { SPSR |= (1<<SPI2X); }
+	    else { SPSR &= ~ (1<<SPI2X); }
+
+	    SPDR=0;
+	    shouldWait(false);
+			release();
+		}
+
+	static bool shouldWait(bool wait = false) __attribute__((always_inline)) {
+		static bool sWait=false;
+		if(sWait) { sWait = wait; return true; } else { sWait = wait; return false; }
+		// return true;
+	}
+	static void wait() __attribute__((always_inline)) { if(shouldWait()) { while(!(SPSR & (1<<SPIF))); } }
+	static void waitFully() __attribute__((always_inline)) { wait(); }
+
+	static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); }
+
+	static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPDR=b;  shouldWait(true); }
+	static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPDR=b; shouldWait(true); wait(); }
+	static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPDR=b; shouldWait(true); }
+
+	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+		SPCR &= ~(1 << SPE);
+		if(b & (1 << BIT)) {
+			FastPin<_DATA_PIN>::hi();
+		} else {
+			FastPin<_DATA_PIN>::lo();
+		}
+
+		FastPin<_CLOCK_PIN>::hi();
+		FastPin<_CLOCK_PIN>::lo();
+		SPCR |= 1 << SPE;
+		shouldWait(false);
+	}
+
+	void enable_pins() {
+		SPCR |= ((1<<SPE) | (1<<MSTR) ); 		// enable SPI as master
+	}
+
+	void disable_pins() {
+		SPCR &= ~(((1<<SPE) | (1<<MSTR) )); // disable SPI
+	}
+
+	void select() {
+		if(m_pSelect != NULL) { m_pSelect->select(); }
+		enable_pins();
+		setSPIRate();
+	}
+
+	void release() {
+		if(m_pSelect != NULL) { m_pSelect->release(); }
+		disable_pins();
+	}
+
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) { writeByte(value); }
+	}
+
+	void writeBytesValue(uint8_t value, int len) {
+		//setSPIRate();
+		select();
+		while(len--) {
+			writeByte(value);
+		}
+		release();
+	}
+
+	// Write a block of n uint8_ts out
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		//setSPIRate();
+		uint8_t *end = data + len;
+		select();
+		while(data != end) {
+			// a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM)
+			writeByte(D::adjust(*data++)); delaycycles<3>();
+		}
+		release();
+	}
+
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		//setSPIRate();
+		int len = pixels.mLen;
+
+		select();
+		while(pixels.has(1)) {
+			if(FLAGS & FLAG_START_BIT) {
+				writeBit<0>(1);
+				writeBytePostWait(D::adjust(pixels.loadAndScale0()));
+				writeBytePostWait(D::adjust(pixels.loadAndScale1()));
+				writeBytePostWait(D::adjust(pixels.loadAndScale2()));
+			} else {
+				writeByte(D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+			}
+
+			pixels.advanceData();
+			pixels.stepDithering();
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+};
+#endif
+
+#else
+// #define FASTLED_FORCE_SOFTWARE_SPI
+#endif
+
+FASTLED_NAMESPACE_END;
+
+
+#endif
--- a/libraries/FastLED-3.2.0/platforms/avr/led_sysdefs_avr.h
+++ b/libraries/FastLED-3.2.0/platforms/avr/led_sysdefs_avr.h
@@ -0,0 +1,67 @@
+#ifndef __INC_LED_SYSDEFS_AVR_H
+#define __INC_LED_SYSDEFS_AVR_H
+
+#define FASTLED_AVR
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 2
+#endif
+
+#define FASTLED_SPI_BYTE_ONLY
+
+#include <avr/io.h>
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Define the register types
+typedef volatile       uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+
+
+// Default to disallowing interrupts (may want to gate this on teensy2 vs. other arm platforms, since the
+// teensy2 has a good, fast millis interrupt implementation)
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 0
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+
+// Default to using PROGMEM here
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+#if defined(ARDUINO_AVR_DIGISPARK) || defined(ARDUINO_AVR_DIGISPARKPRO)
+#ifndef NO_CORRECTION
+#define NO_CORRECTION 1
+#endif
+#endif
+
+extern "C" {
+#  if defined(CORE_TEENSY) || defined(TEENSYDUINO)
+extern volatile unsigned long timer0_millis_count;
+#    define MS_COUNTER timer0_millis_count
+#  elif defined(ATTINY_CORE)
+extern volatile unsigned long millis_timer_millis;
+#    define MS_COUNTER millis_timer_millis
+#  else
+extern volatile unsigned long timer0_millis;
+#    define MS_COUNTER timer0_millis
+#  endif
+};
+
+// special defs for the tiny environments
+#if defined(__AVR_ATmega32U2__) || defined(__AVR_ATmega16U2__) || defined(__AVR_ATmega8U2__) || defined(__AVR_AT90USB162__) || defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny85__) || defined(__AVR_ATtiny167__) || defined(__AVR_ATtiny87__) || defined(__AVR_ATtinyX41__)
+#define LIB8_ATTINY 1
+#define FASTLED_NEEDS_YIELD
+#endif
+
+#if defined(ARDUINO) && (ARDUINO > 150) && !defined(IS_BEAN) && !defined (ARDUINO_AVR_DIGISPARK) && !defined (LIB8_TINY)
+// don't need YIELD defined by the library 
+#else 
+#define FASTLED_NEEDS_YIELD
+extern "C" void yield();
+#endif
+#endif
--- a/libraries/FastLED-3.2.0/platforms/esp/32/clockless_block_esp32.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/32/clockless_block_esp32.h
@@ -0,0 +1,168 @@
+#ifndef __INC_CLOCKLESS_BLOCK_ESP8266_H
+#define __INC_CLOCKLESS_BLOCK_ESP8266_H
+
+#define FASTLED_HAS_BLOCKLESS 1
+
+#define PORT_MASK (((1<<LANES)-1) & 0x0000FFFFL)
+#define MIN(X,Y) (((X)<(Y)) ? (X):(Y))
+#define USED_LANES (MIN(LANES,4))
+#define REAL_FIRST_PIN 12
+#define LAST_PIN (12 + USED_LANES - 1)
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+extern uint32_t _frame_cnt;
+extern uint32_t _retry_cnt;
+#endif
+
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
+    typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
+    typedef typename FastPin<FIRST_PIN>::port_t data_t;
+
+    data_t mPinMask;
+    data_ptr_t mPort;
+    CMinWait<WAIT_TIME> mWait;
+public:
+    virtual int size() { return CLEDController::size() * LANES; }
+
+    virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) {
+	// mWait.wait();
+	/*uint32_t clocks = */
+	int cnt=FASTLED_INTERRUPT_RETRY_COUNT;
+	while(!showRGBInternal(pixels) && cnt--) {
+	    ets_intr_unlock();
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	    _retry_cnt++;
+#endif
+	    delayMicroseconds(WAIT_TIME * 10);
+	    ets_intr_lock();
+	}
+	// #if FASTLED_ALLOW_INTTERUPTS == 0
+	// Adjust the timer
+	// long microsTaken = CLKS_TO_MICROS(clocks);
+	// MS_COUNTER += (1 + (microsTaken / 1000));
+	// #endif
+	
+	// mWait.mark();
+    }
+
+    template<int PIN> static void initPin() {
+	if(PIN >= REAL_FIRST_PIN && PIN <= LAST_PIN) {
+	    _ESPPIN<PIN, 1<<(PIN & 0xFF)>::setOutput();
+	    // FastPin<PIN>::setOutput();
+	}
+    }
+
+    virtual void init() {
+	// Only supportd on pins 12-15
+        // SZG: This probably won't work (check pins definitions in fastpin_esp32)
+	initPin<12>();
+	initPin<13>();
+	initPin<14>();
+	initPin<15>();
+	mPinMask = FastPin<FIRST_PIN>::mask();
+	mPort = FastPin<FIRST_PIN>::port();
+	
+	// Serial.print("Mask is "); Serial.println(PORT_MASK);
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+    
+    typedef union {
+	uint8_t bytes[8];
+	uint16_t shorts[4];
+	uint32_t raw[2];
+    } Lines;
+
+#define ESP_ADJUST 0 // (2*(F_CPU/24000000))
+#define ESP_ADJUST2 0
+    template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register Lines & b, PixelController<RGB_ORDER, LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
+	Lines b2 = b;
+	transpose8x1_noinline(b.bytes,b2.bytes);
+	
+	register uint8_t d = pixels.template getd<PX>(pixels);
+	register uint8_t scale = pixels.template getscale<PX>(pixels);
+	
+	for(register uint32_t i = 0; i < USED_LANES; i++) {
+	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
+	    last_mark = __clock_cycles();
+	    *FastPin<FIRST_PIN>::sport() = PORT_MASK << REAL_FIRST_PIN;
+	    
+	    uint32_t nword = ((uint32_t)(~b2.bytes[7-i]) & PORT_MASK) << REAL_FIRST_PIN;
+	    while((__clock_cycles() - last_mark) < (T1-6));
+	    *FastPin<FIRST_PIN>::cport() = nword;
+	    
+	    while((__clock_cycles() - last_mark) < (T1+T2));
+	    *FastPin<FIRST_PIN>::cport() = PORT_MASK << REAL_FIRST_PIN;
+	    
+	    b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+	}
+
+	for(register uint32_t i = USED_LANES; i < 8; i++) {
+	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
+	    last_mark = __clock_cycles();
+	    *FastPin<FIRST_PIN>::sport() = PORT_MASK << REAL_FIRST_PIN;
+	    
+	    uint32_t nword = ((uint32_t)(~b2.bytes[7-i]) & PORT_MASK) << REAL_FIRST_PIN;
+	    while((__clock_cycles() - last_mark) < (T1-6));
+	    *FastPin<FIRST_PIN>::cport() = nword;
+	    
+	    while((__clock_cycles() - last_mark) < (T1+T2));
+	    *FastPin<FIRST_PIN>::cport() = PORT_MASK << REAL_FIRST_PIN;
+	}
+    }
+
+    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+    // gcc will use register Y for the this pointer.
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+	
+	// Setup the pixel controller and load/scale the first byte
+	Lines b0;
+	
+	for(int i = 0; i < USED_LANES; i++) {
+	    b0.bytes[i] = allpixels.loadAndScale0(i);
+	}
+	allpixels.preStepFirstByteDithering();
+	
+	ets_intr_lock();
+	uint32_t _start = __clock_cycles();
+	uint32_t last_mark = _start;
+	
+	while(allpixels.has(1)) {
+	    // Write first byte, read next byte
+	    writeBits<8+XTRA0,1>(last_mark, b0, allpixels);
+	    
+	    // Write second byte, read 3rd byte
+	    writeBits<8+XTRA0,2>(last_mark, b0, allpixels);
+	    allpixels.advanceData();
+	    
+	    // Write third byte
+	    writeBits<8+XTRA0,0>(last_mark, b0, allpixels);
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_unlock();
+#endif
+	    
+	    allpixels.stepDithering();
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_lock();
+	    // if interrupts took longer than 45µs, punt on the current frame
+	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
+		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) { ets_intr_unlock(); return 0; }
+	    }
+#endif
+	};
+	
+	ets_intr_unlock();
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	_frame_cnt++;
+#endif
+	return __clock_cycles() - _start;
+    }
+};
+
+FASTLED_NAMESPACE_END
+#endif
--- a/libraries/FastLED-3.2.0/platforms/esp/32/clockless_esp32.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/32/clockless_esp32.h
@@ -0,0 +1,567 @@
+/*
+ * Integration into FastLED ClocklessController
+ * Copyright (c) 2018 Samuel Z. Guyer
+ * Copyright (c) 2017 Thomas Basler
+ * Copyright (c) 2017 Martin F. Falatic
+ *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently to send sequences
+ * of high/low bits. Memory for each channel is limited, however, so
+ * in order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * Since the RMT device only has 8 channels, we need a strategy to
+ * allow more than 8 LED controllers. Our driver assigns controllers
+ * to channels on the fly, queuing up controllers as necessary until a
+ * channel is free. The main showPixels routine just fires off the
+ * first 8 controllers; the interrupt handler starts new controllers
+ * asynchronously as previous ones finish. So, for example, it can
+ * send the data for 8 controllers simultaneously, but 16 controllers
+ * would take approximately twice as much time.
+ *
+ * There is a #define that allows a program to control the total
+ * number of channels that the driver is allowed to use. It defaults
+ * to 8 -- use all the channels. Setting it to 1, for example, results
+ * in fully serial output:
+ *
+ *     #define FASTLED_RMT_MAX_CHANNELS 1
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT interrupt
+ * handler, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
+ *
+ * There may be a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to signal the strip to latch
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_BUILTIN_DRIVER
+#define FASTLED_RMT_BUILTIN_DRIVER false
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_RMT_MAX_CONTROLLERS
+#define FASTLED_RMT_MAX_CONTROLLERS 32
+#endif
+
+// -- Number of RMT channels to use (up to 8)
+//    Redefine this value to 1 to force serial output
+#ifndef FASTLED_RMT_MAX_CHANNELS
+#define FASTLED_RMT_MAX_CHANNELS 8
+#endif
+
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
+
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
+
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+static int gNumDone = 0;
+static int gNext = 0;
+
+static intr_handle_t gRMT_intr_handle = NULL;
+
+// -- Global semaphore for the whole show process
+//    Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+static bool gInitialized = false;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- RMT has 8 channels, numbered 0 to 7
+    rmt_channel_t  mRMT_channel;
+
+    // -- Store the GPIO pin
+    gpio_num_t     mPin;
+
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
+
+    // -- State information for keeping track of where we are in the pixel data
+    uint8_t *      mPixelData = NULL;
+    int            mSize = 0;
+    int            mCurByte;
+    uint16_t       mCurPulse;
+
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t       mBufferSize;
+
+public:
+
+    void init()
+    {
+        // -- Precompute rmt items corresponding to a zero bit and a one bit
+        //    according to the timing values given in the template instantiation
+        // T1H
+        mOne.level0 = 1;
+        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        // T1L
+        mOne.level1 = 0;
+        mOne.duration1 = TO_RMT_CYCLES(T3);
+
+        // T0H
+        mZero.level0 = 1;
+        mZero.duration0 = TO_RMT_CYCLES(T1);
+        // T0L
+        mZero.level1 = 0;
+        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+        gControllers[gNumControllers] = this;
+        gNumControllers++;
+
+        mPin = gpio_num_t(DATA_PIN);
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    void initRMT()
+    {
+        // -- Only need to do this once
+        if (gInitialized) return;
+
+        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+            gOnChannel[i] = NULL;
+
+            // -- RMT configuration for transmission
+            rmt_config_t rmt_tx;
+            rmt_tx.channel = rmt_channel_t(i);
+            rmt_tx.rmt_mode = RMT_MODE_TX;
+            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+            rmt_tx.mem_block_num = 1;
+            rmt_tx.clk_div = DIVIDER;
+            rmt_tx.tx_config.loop_en = false;
+            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+            rmt_tx.tx_config.carrier_en = false;
+            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+            rmt_tx.tx_config.idle_output_en = true;
+                
+            // -- Apply the configuration
+            rmt_config(&rmt_tx);
+
+            if (FASTLED_RMT_BUILTIN_DRIVER) {
+                rmt_driver_install(rmt_channel_t(i), 0, 0);
+            } else {
+                // -- Set up the RMT to send 1/2 of the pulse buffer and then
+                //    generate an interrupt. When we get this interrupt we
+                //    fill the other half in preparation (kind of like double-buffering)
+                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+            }
+        }
+
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+                
+        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+        }
+
+        gInitialized = true;
+    }
+
+    // -- Show pixels
+    //    This is the main entry point for the controller.
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            initRMT();
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        }
+
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+
+        //if (mPixels != NULL) delete mPixels;
+        //mPixels = new PixelController<RGB_ORDER>(pixels);
+        if (FASTLED_RMT_BUILTIN_DRIVER)
+            convertAllPixelData(pixels);
+        else
+            copyPixelData(pixels);
+
+        // -- Keep track of the number of strips we've seen
+        gNumStarted++;
+
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual worl
+        if (gNumStarted == gNumControllers) {
+            gNext = 0;
+
+            // -- First, fill all the available channels
+            int channel = 0;
+            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+                startNext(channel);
+                channel++;
+            }
+
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the RMT buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+
+            // -- Reset the counters
+            gNumStarted = 0;
+            gNumDone = 0;
+            gNext = 0;
+        }
+    }
+
+    // -- Copy pixel data
+    //    Make a safe copy of the pixel data, so that the FastLED show
+    //    function can continue to the next controller while the RMT
+    //    device starts sending this data asynchronously.
+    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Make sure we have a buffer of the right size
+        //    (3 bytes per pixel)
+        int size_needed = pixels.size() * 3;
+        if (size_needed > mSize) {
+            if (mPixelData != NULL) free(mPixelData);
+            mSize = size_needed;
+            mPixelData = (uint8_t *) malloc( mSize);
+        }
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the resulting raw pixel data in the buffer.
+        int cur = 0;
+        while (pixels.has(1)) {
+            mPixelData[cur++] = pixels.loadAndScale0();
+            mPixelData[cur++] = pixels.loadAndScale1();
+            mPixelData[cur++] = pixels.loadAndScale2();
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+    }
+
+    // -- Convert all pixels to RMT pulses
+    //    This function is only used when the user chooses to use the
+    //    built-in RMT driver, which needs all of the RMT pulses
+    //    up-front.
+    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+        mBufferSize = pixels.size() * 3 * 8;
+
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the pulses in the big buffer
+        mCurPulse = 0;
+        int cur = 0;
+        uint32_t byteval;
+        while (pixels.has(1)) {
+            byteval = pixels.loadAndScale0();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale1();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale2();
+            convertByte(byteval);
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+    }
+
+    void convertByte(uint32_t byteval)
+    {
+        // -- Write one byte's worth of RMT pulses to the big buffer
+        byteval <<= 24;
+        for (register uint32_t j = 0; j < 8; j++) {
+            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+            byteval <<= 1;
+            mCurPulse++;
+        }
+    }
+
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the
+    //    appropriate startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+        if (gNext < gNumControllers) {
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+            pController->startOnChannel(channel);
+            gNext++;
+        }
+    }
+
+    // -- Start this controller on the given channel
+    //    This function just initiates the RMT write; it does not wait
+    //    for it to finish.
+    void startOnChannel(int channel)
+    {
+        // -- Assign this channel and configure the RMT
+        mRMT_channel = rmt_channel_t(channel);
+
+        // -- Store a reference to this controller, so we can get it
+        //    inside the interrupt handler
+        gOnChannel[channel] = this;
+
+        // -- Assign the pin to this channel
+        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Use the built-in RMT driver to send all the data in one shot
+            rmt_register_tx_end_callback(doneOnChannel, 0);
+            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+        } else {
+            // -- Use our custom driver to send the data incrementally
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+        
+            // -- Initialize the counters that keep track of where we are in
+            //    the pixel data.
+            mCurPulse = 0;
+            mCurByte = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+            
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+        }
+    }
+
+    // -- A controller is done 
+    //    This function is called when a controller finishes writing
+    //    its data. It is called either by the custom interrupt
+    //    handler (below), or as a callback from the built-in
+    //    interrupt handler. It is static because we don't know which
+    //    controller is done until we look it up.
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        // -- Turn off output on the pin
+        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+        gOnChannel[channel] = NULL;
+        gNumDone++;
+
+        if (gNumDone == gNumControllers) {
+            // -- If this is the last controller, signal that we are all done
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+        } else {
+            // -- Otherwise, if there are still controllers waiting, then
+            //    start the next one on this channel
+            if (gNext < gNumControllers)
+                startNext(channel);
+        }
+    }
+    
+    // -- Custom interrupt handler
+    //    This interrupt handler handles two cases: a controller is
+    //    done writing its data, or a controller needs to fill the
+    //    next half of the RMT buffer with data.
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint8_t channel;
+
+        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (gOnChannel[channel] != NULL) {
+
+                // -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+                    RMT.int_clr.val |= BIT(tx_next_bit);
+                    
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+                    ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+                    controller->fillHalfRMTBuffer();
+                } else {
+                    // -- Transmission is complete on this channel
+                    if (intr_st & BIT(tx_done_bit)) {
+                        RMT.int_clr.val |= BIT(tx_done_bit);
+                        doneOnChannel(rmt_channel_t(channel), 0);
+                    }
+                }
+            }
+        }
+    }
+
+    // -- Fill the RMT buffer
+    //    This function fills the next 32 slots in the RMT write
+    //    buffer with pixel data. It also handles the case where the
+    //    pixel data is exhausted, so we need to fill the RMT buffer
+    //    with zeros to signal that it's done.
+    void fillHalfRMTBuffer()
+    {
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+
+        // -- Convert (up to) 32 bits of the raw pixel data into
+        //    into RMT pulses that encode the zeros and ones.
+        int pulses = 0;
+        uint32_t byteval;
+        while (pulses < 32 && mCurByte < mSize) {
+            // -- Get one byte
+            byteval = mPixelData[mCurByte++];
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+            pulses += 8;
+        }
+
+        // -- When we reach the end of the pixel data, fill the rest of the
+        //    RMT buffer with 0's, which signals to the device that we're done.
+        if (mCurByte == mSize) {
+            while (pulses < 32) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulses++;
+            }
+        }
+        
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+};
+
+FASTLED_NAMESPACE_END
--- a/libraries/FastLED-3.2.0/platforms/esp/32/clockless_esp32.h.orig
+++ b/libraries/FastLED-3.2.0/platforms/esp/32/clockless_esp32.h.orig
@@ -0,0 +1,786 @@
+/*
+ * Integration into FastLED ClocklessController 2017 Thomas Basler
+ *
+ * Modifications Copyright (c) 2017 Martin F. Falatic
+ *
+ * Modifications Copyright (c) 2018 Samuel Z. Guyer
+ *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently to send sequences
+ * of high/low bits. Memory for each channel is limited, however, so
+ * in order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * Since the RMT device only has 8 channels, we need a strategy to
+ * allow more than 8 LED controllers. Our driver assigns controllers
+ * to channels on the fly, queuing up controllers as necessary until a
+ * channel is free. The main showPixels routine just fires off the
+ * first 8 controllers; the interrupt handler starts new controllers
+ * asynchronously as previous ones finish. So, for example, it can
+ * send the data for 8 controllers simultaneously, but 16 controllers
+ * would take approximately twice as much time.
+ *
+ * There is a #define that allows a program to control the total
+ * number of channels that the driver is allowed to use. It defaults
+ * to 8 -- use all the channels. Setting it to 1, for example, results
+ * in fully serial output:
+ *
+ *     #define FASTLED_RMT_MAX_CHANNELS 1
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT interrupt
+ * handler, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
+ *
+ * There may be a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to signal the strip to latch
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_BUILTIN_DRIVER
+#define FASTLED_RMT_BUILTIN_DRIVER false
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_RMT_MAX_CONTROLLERS
+#define FASTLED_RMT_MAX_CONTROLLERS 32
+#endif
+
+// -- Number of RMT channels to use (up to 8)
+//    Redefine this value to 1 to force serial output
+#ifndef FASTLED_RMT_MAX_CHANNELS
+#define FASTLED_RMT_MAX_CHANNELS 8
+#endif
+
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
+
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
+
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+static int gNumDone = 0;
+static int gNext = 0;
+
+static intr_handle_t gRMT_intr_handle = NULL;
+
+// -- Global semaphore for the whole show process
+//    Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+static bool gInitialized = false;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- RMT has 8 channels, numbered 0 to 7
+    rmt_channel_t  mRMT_channel;
+
+    // -- Store the GPIO pin
+    gpio_num_t     mPin;
+<<<<<<< HEAD
+
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
+
+=======
+
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
+
+>>>>>>> upstream/master
+    // -- State information for keeping track of where we are in the pixel data
+    PixelController<RGB_ORDER> * mPixels = NULL;
+    void *         mPixelSpace = NULL;
+    uint8_t        mRGB_channel;
+    uint16_t       mCurPulse;
+
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t       mBufferSize;
+
+public:
+
+    virtual void init()
+    {
+        // -- Precompute rmt items corresponding to a zero bit and a one bit
+        //    according to the timing values given in the template instantiation
+        // T1H
+        mOne.level0 = 1;
+        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        // T1L
+        mOne.level1 = 0;
+        mOne.duration1 = TO_RMT_CYCLES(T3);
+
+        // T0H
+        mZero.level0 = 1;
+        mZero.duration0 = TO_RMT_CYCLES(T1);
+        // T0L
+        mZero.level1 = 0;
+        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+<<<<<<< HEAD
+        gControllers[gNumControllers] = this;
+        gNumControllers++;
+
+        mPin = gpio_num_t(DATA_PIN);
+=======
+	gControllers[gNumControllers] = this;
+        gNumControllers++;
+
+	mPin = gpio_num_t(DATA_PIN);
+>>>>>>> upstream/master
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    void initRMT()
+    {
+<<<<<<< HEAD
+        // -- Only need to do this once
+        if (gInitialized) return;
+
+        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+            gOnChannel[i] = NULL;
+
+            // -- RMT configuration for transmission
+            rmt_config_t rmt_tx;
+            rmt_tx.channel = rmt_channel_t(i);
+            rmt_tx.rmt_mode = RMT_MODE_TX;
+            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+            rmt_tx.mem_block_num = 1;
+            rmt_tx.clk_div = DIVIDER;
+            rmt_tx.tx_config.loop_en = false;
+            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+            rmt_tx.tx_config.carrier_en = false;
+            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+            rmt_tx.tx_config.idle_output_en = true;
+                
+            // -- Apply the configuration
+            rmt_config(&rmt_tx);
+
+            if (FASTLED_RMT_BUILTIN_DRIVER) {
+                rmt_driver_install(rmt_channel_t(i), 0, 0);
+            } else {
+                // -- Set up the RMT to send 1/2 of the pulse buffer and then
+                //    generate an interrupt. When we get this interrupt we
+                //    fill the other half in preparation (kind of like double-buffering)
+                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+            }
+        }
+
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+                
+        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+        }
+
+        gInitialized = true;
+    }
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            initRMT();
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        }
+
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+
+        if (mPixels != NULL) delete mPixels;
+        mPixels = new PixelController<RGB_ORDER>(pixels);
+        
+        // -- Keep track of the number of strips we've seen
+        gNumStarted++;
+
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual worl
+        if (gNumStarted == gNumControllers) {
+            gNext = 0;
+
+            // -- First, fill all the available channels
+            int channel = 0;
+            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+                startNext(channel);
+                channel++;
+            }
+
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the RMT buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+
+            // -- Reset the counters
+            gNumStarted = 0;
+            gNumDone = 0;
+            gNext = 0;
+        }
+    }
+
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the appropriate
+    //    startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+        if (gNext < gNumControllers) {
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+            pController->startOnChannel(channel);
+            gNext++;
+        }
+    }
+
+    virtual void startOnChannel(int channel)
+    {
+        // -- Assign this channel and configure the RMT
+        mRMT_channel = rmt_channel_t(channel);
+
+        // -- Store a reference to this controller, so we can get it
+        //    inside the interrupt handler
+        gOnChannel[channel] = this;
+
+        // -- Assign the pin to this channel
+        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Use the built-in RMT driver to send all the data in one shot
+            rmt_register_tx_end_callback(doneOnChannel, 0);
+            writeAllRMTItems();
+        } else {
+            // -- Use our custom driver to send the data incrementally
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+        
+            // -- Initialize the counters that keep track of where we are in
+            //    the pixel data.
+            mCurPulse = 0;
+            mRGB_channel = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+            
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+        }
+    }
+
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        // -- Turn off output on the pin
+        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+        gOnChannel[channel] = NULL;
+        gNumDone++;
+
+        if (gNumDone == gNumControllers) {
+            // -- If this is the last controller, signal that we are all done
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+        } else {
+            // -- Otherwise, if there are still controllers waiting, then
+            //    start the next one on this channel
+            if (gNext < gNumControllers)
+                startNext(channel);
+        }
+=======
+	// -- Only need to do this once
+	if (gInitialized) return;
+
+	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+	    gOnChannel[i] = NULL;
+
+	    // -- RMT configuration for transmission
+	    rmt_config_t rmt_tx;
+	    rmt_tx.channel = rmt_channel_t(i);
+	    rmt_tx.rmt_mode = RMT_MODE_TX;
+	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+	    rmt_tx.mem_block_num = 1;
+	    rmt_tx.clk_div = DIVIDER;
+	    rmt_tx.tx_config.loop_en = false;
+	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+	    rmt_tx.tx_config.carrier_en = false;
+	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+	    rmt_tx.tx_config.idle_output_en = true;
+		
+	    // -- Apply the configuration
+	    rmt_config(&rmt_tx);
+
+	    if (FASTLED_RMT_BUILTIN_DRIVER) {
+		rmt_driver_install(rmt_channel_t(i), 0, 0);
+	    } else {
+		// -- Set up the RMT to send 1/2 of the pulse buffer and then
+		//    generate an interrupt. When we get this interrupt we
+		//    fill the other half in preparation (kind of like double-buffering)
+		rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+	    }
+	}
+
+	// -- Create a semaphore to block execution until all the controllers are done
+	if (gTX_sem == NULL) {
+	    gTX_sem = xSemaphoreCreateBinary();
+	    xSemaphoreGive(gTX_sem);
+	}
+		
+	if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+	    // -- Allocate the interrupt if we have not done so yet. This
+	    //    interrupt handler must work for all different kinds of
+	    //    strips, so it delegates to the refill function for each
+	    //    specific instantiation of ClocklessController.
+	    if (gRMT_intr_handle == NULL)
+		esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+	}
+
+	gInitialized = true;
+    }
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+	if (gNumStarted == 0) {
+	    // -- First controller: make sure everything is set up
+	    initRMT();
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	}
+
+	// -- Initialize the local state, save a pointer to the pixel
+	//    data. We need to make a copy because pixels is a local
+	//    variable in the calling function, and this data structure
+	//    needs to outlive this call to showPixels.
+
+	if (mPixels != NULL) delete mPixels;
+	mPixels = new PixelController<RGB_ORDER>(pixels);
+	
+	// -- Keep track of the number of strips we've seen
+	gNumStarted++;
+
+	// -- The last call to showPixels is the one responsible for doing
+	//    all of the actual worl
+	if (gNumStarted == gNumControllers) {
+	    gNext = 0;
+
+	    // -- First, fill all the available channels
+	    int channel = 0;
+	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+		startNext(channel);
+		channel++;
+	    }
+
+	    // -- Wait here while the rest of the data is sent. The interrupt handler
+	    //    will keep refilling the RMT buffers until it is all sent; then it
+	    //    gives the semaphore back.
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	    xSemaphoreGive(gTX_sem);
+
+	    // -- Reset the counters
+	    gNumStarted = 0;
+	    gNumDone = 0;
+	    gNext = 0;
+	}
+    }
+
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the appropriate
+    //    startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+	if (gNext < gNumControllers) {
+	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+	    pController->startOnChannel(channel);
+	    gNext++;
+	}
+    }
+
+    virtual void startOnChannel(int channel)
+    {
+	// -- Assign this channel and configure the RMT
+	mRMT_channel = rmt_channel_t(channel);
+
+	// -- Store a reference to this controller, so we can get it
+	//    inside the interrupt handler
+	gOnChannel[channel] = this;
+
+	// -- Assign the pin to this channel
+	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+	if (FASTLED_RMT_BUILTIN_DRIVER) {
+	    // -- Use the built-in RMT driver to send all the data in one shot
+	    rmt_register_tx_end_callback(doneOnChannel, 0);
+	    writeAllRMTItems();
+	} else {
+	    // -- Use our custom driver to send the data incrementally
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	
+	    // -- Initialize the counters that keep track of where we are in
+	    //    the pixel data.
+	    mCurPulse = 0;
+	    mRGB_channel = 0;
+
+	    // -- Fill both halves of the buffer
+	    fillHalfRMTBuffer();
+	    fillHalfRMTBuffer();
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	    
+	    // -- Start the RMT TX operation
+	    rmt_tx_start(mRMT_channel, true);
+	}
+    }
+
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
+
+	// -- Turn off output on the pin
+	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+	gOnChannel[channel] = NULL;
+	gNumDone++;
+
+	if (gNumDone == gNumControllers) {
+	    // -- If this is the last controller, signal that we are all done
+	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+	} else {
+	    // -- Otherwise, if there are still controllers waiting, then
+	    //    start the next one on this channel
+	    if (gNext < gNumControllers)
+		startNext(channel);
+	}
+>>>>>>> upstream/master
+    }
+    
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint8_t channel;
+
+        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (gOnChannel[channel] != NULL) {
+
+<<<<<<< HEAD
+                ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+
+                // -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+                    RMT.int_clr.val |= BIT(tx_next_bit);
+
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+                    controller->fillHalfRMTBuffer();
+                }
+
+                // -- Transmission is complete on this channel
+                if (intr_st & BIT(tx_done_bit)) {
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+                    doneOnChannel(rmt_channel_t(channel), 0);
+=======
+		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+
+		// -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+		    RMT.int_clr.val |= BIT(tx_next_bit);
+
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+		    controller->fillHalfRMTBuffer();
+                }
+
+		// -- Transmission is complete on this channel
+                if (intr_st & BIT(tx_done_bit)) {
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+		    doneOnChannel(rmt_channel_t(channel), 0);
+>>>>>>> upstream/master
+                }
+            }
+        }
+    }
+
+    virtual void fillHalfRMTBuffer()
+    {
+        // -- Fill half of the RMT pulse buffer
+
+        //    The buffer holds 64 total pulse items, so this loop converts
+        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
+        //    32 items). In our case, each pixel consists of three bytes,
+        //    each bit turns into one pulse item -- 24 items per pixel. So,
+        //    each half of the buffer can hold 1 and 1/3 of a pixel.
+
+        //    The member variable mCurPulse keeps track of which of the 64
+        //    items we are writing. During the first call to this method it
+        //    fills 0-31; in the second call it fills 32-63, and then wraps
+        //    back around to zero.
+
+        //    When we run out of pixel data, just fill the remaining items
+        //    with zero pulses.
+
+        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
+        uint32_t byteval = 0;
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+        bool done_strip = false;
+
+        while (pulse_count < MAX_PULSES) {
+            if (! mPixels->has(1)) {
+<<<<<<< HEAD
+                if (mCurPulse > 0) {
+                    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
+                    //    sure if this is really necessary.
+                    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+                }
+=======
+>>>>>>> upstream/master
+                done_strip = true;
+                break;
+            }
+
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = mPixels->loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = mPixels->loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+                pulse_count++;
+            }
+<<<<<<< HEAD
+=======
+
+	    if (done_strip)
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+>>>>>>> upstream/master
+        }
+        
+        if (done_strip) {
+            // -- And fill the remaining items with zero pulses. The zero values triggers
+            //    the tx_done interrupt.
+            while (pulse_count < MAX_PULSES) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulse_count++;
+            }
+        }
+
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+
+    virtual void writeAllRMTItems()
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+<<<<<<< HEAD
+        mBufferSize = mPixels->size() * 3 * 8;
+=======
+	mBufferSize = mPixels->size() * 3 * 8;
+>>>>>>> upstream/master
+
+        // TODO: need a specific number here
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        mCurPulse = 0;
+        mRGB_channel = 0;
+        uint32_t byteval = 0;
+        while (mPixels->has(1)) {
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = mPixels->loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = mPixels->loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+
+<<<<<<< HEAD
+        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+=======
+	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+>>>>>>> upstream/master
+    }
+};
+
+FASTLED_NAMESPACE_END
--- a/libraries/FastLED-3.2.0/platforms/esp/32/fastled_esp32.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/32/fastled_esp32.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include "fastpin_esp32.h"
+#include "clockless_esp32.h"
+// #include "clockless_block_esp32.h"
--- a/libraries/FastLED-3.2.0/platforms/esp/32/fastpin_esp32.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/32/fastpin_esp32.h
@@ -0,0 +1,116 @@
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
+
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  inline static void setOutput() { pinMode(PIN, OUTPUT); }
+  inline static void setInput() { pinMode(PIN, INPUT); }
+
+  inline static void hi() __attribute__ ((always_inline)) { 
+      if (PIN < 32) GPIO.out_w1ts = MASK;
+      else GPIO.out1_w1ts.val = MASK;
+  }
+
+  inline static void lo() __attribute__ ((always_inline)) {
+      if (PIN < 32) GPIO.out_w1tc = MASK;
+      else GPIO.out1_w1tc.val = MASK;
+  }
+
+  inline static void set(register port_t val) __attribute__ ((always_inline)) {
+      if (PIN < 32) GPIO.out = val;
+      else GPIO.out1.val = val;
+  }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { 
+      if(PIN < 32) { GPIO.out ^= MASK; } 
+      else { GPIO.out1.val ^=MASK; } 
+  }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) {
+      if (PIN < 32) return GPIO.out | MASK;
+      else return GPIO.out1.val | MASK;
+  }
+
+  inline static port_t loval() __attribute__ ((always_inline)) {
+      if (PIN < 32) return GPIO.out & ~MASK;
+      else return GPIO.out1.val & ~MASK;
+  }
+
+  inline static port_ptr_t port() __attribute__ ((always_inline)) {
+      if (PIN < 32) return &GPIO.out;
+      else return &GPIO.out1.val;
+  }
+
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) { 
+      if (PIN < 32) return &GPIO.out_w1ts;
+      else return &GPIO.out1_w1ts.val;
+  }
+
+  inline static port_ptr_t cport() __attribute__ ((always_inline)) {
+      if (PIN < 32) return &GPIO.out_w1tc;
+      else return &GPIO.out1_w1tc.val;
+  }
+
+  inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }
+
+  inline static bool isset() __attribute__ ((always_inline)) {
+      if (PIN < 32) return GPIO.out & MASK;
+      else return GPIO.out1.val & MASK;
+  }
+};
+
+#define _DEFPIN_ESP32(PIN)  template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << PIN)> {};
+#define _DEFPIN_32_33_ESP32(PIN) template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << (PIN-32))> {};
+
+_DEFPIN_ESP32(0);
+_DEFPIN_ESP32(1); // WARNING: Using TX causes flashiness when uploading
+_DEFPIN_ESP32(2); 
+_DEFPIN_ESP32(3); // WARNING: Using RX causes flashiness when uploading
+_DEFPIN_ESP32(4);
+_DEFPIN_ESP32(5);
+
+// -- These pins are not safe to use:
+// _DEFPIN_ESP32(6,6); _DEFPIN_ESP32(7,7); _DEFPIN_ESP32(8,8); 
+// _DEFPIN_ESP32(9,9); _DEFPIN_ESP32(10,10); _DEFPIN_ESP32(11,11); 
+
+_DEFPIN_ESP32(12);
+_DEFPIN_ESP32(13);
+_DEFPIN_ESP32(14);
+_DEFPIN_ESP32(15);
+_DEFPIN_ESP32(16);
+_DEFPIN_ESP32(17);
+_DEFPIN_ESP32(18);
+_DEFPIN_ESP32(19);
+
+// No pin 20 : _DEFPIN_ESP32(20,20); 
+
+_DEFPIN_ESP32(21); // Works, but note that GPIO21 is I2C SDA
+_DEFPIN_ESP32(22); // Works, but note that GPIO22 is I2C SCL
+_DEFPIN_ESP32(23); 
+
+// No pin 24 : _DEFPIN_ESP32(24,24); 
+
+_DEFPIN_ESP32(25);
+_DEFPIN_ESP32(26);
+_DEFPIN_ESP32(27); 
+
+// No pin 28-31: _DEFPIN_ESP32(28,28); _DEFPIN_ESP32(29,29); _DEFPIN_ESP32(30,30); _DEFPIN_ESP32(31,31);
+
+// Need special handling for pins > 31
+_DEFPIN_32_33_ESP32(32); 
+_DEFPIN_32_33_ESP32(33);
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+FASTLED_NAMESPACE_END
--- a/libraries/FastLED-3.2.0/platforms/esp/32/led_sysdefs_esp32.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/32/led_sysdefs_esp32.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#ifndef ESP32
+#define ESP32
+#endif
+
+#define FASTLED_ESP32
+
+// Use system millis timer
+#define FASTLED_HAS_MILLIS
+
+typedef volatile uint32_t RoReg;
+typedef volatile uint32_t RwReg;
+typedef unsigned long prog_uint32_t;
+typedef bool boolean;
+
+// Default to NOT using PROGMEM here
+#ifndef FASTLED_USE_PROGMEM
+# define FASTLED_USE_PROGMEM 0
+#endif
+
+#ifndef FASTLED_ALLOW_INTERRUPTS
+# define FASTLED_ALLOW_INTERRUPTS 1
+# define INTERRUPT_THRESHOLD 0
+#endif
+
+#define NEED_CXX_BITS
+
+// These can be overridden
+#   define FASTLED_ESP32_RAW_PIN_ORDER
+
+// #define cli() os_intr_lock();
+// #define sei() os_intr_lock();
--- a/libraries/FastLED-3.2.0/platforms/esp/8266/clockless_block_esp8266.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/8266/clockless_block_esp8266.h
@@ -0,0 +1,159 @@
+#ifndef __INC_CLOCKLESS_BLOCK_ESP8266_H
+#define __INC_CLOCKLESS_BLOCK_ESP8266_H
+
+#define FASTLED_HAS_BLOCKLESS 1
+
+#define FIX_BITS(bits) (((bits & 0x0fL) << 12) | (bits & 0x30))
+
+#define MIN(X,Y) (((X)<(Y)) ? (X):(Y))
+#define USED_LANES (MIN(LANES, 6))
+#define PORT_MASK (((1 << USED_LANES)-1) & 0x0000FFFFL)
+#define PIN_MASK FIX_BITS(PORT_MASK)
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+extern uint32_t _frame_cnt;
+extern uint32_t _retry_cnt;
+#endif
+
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
+	typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<FIRST_PIN>::port_t data_t;
+
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual int size() { return CLEDController::size() * LANES; }
+
+	virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) {
+		// mWait.wait();
+		/*uint32_t clocks = */
+		int cnt=FASTLED_INTERRUPT_RETRY_COUNT;
+		while(!showRGBInternal(pixels) && cnt--) {
+      os_intr_unlock();
+			#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+			_retry_cnt++;
+			#endif
+      delayMicroseconds(WAIT_TIME * 10);
+      os_intr_lock();
+    }
+		// #if FASTLED_ALLOW_INTTERUPTS == 0
+		// Adjust the timer
+		// long microsTaken = CLKS_TO_MICROS(clocks);
+		// MS_COUNTER += (1 + (microsTaken / 1000));
+		// #endif
+
+		// mWait.mark();
+	}
+
+  template<int PIN> static void initPin() {
+			_ESPPIN<PIN, 1<<(PIN & 0xFF)>::setOutput();
+  }
+
+  virtual void init() {
+		void (* funcs[])() ={initPin<12>, initPin<13>, initPin<14>, initPin<15>, initPin<4>, initPin<5>};
+
+		for (uint8_t i = 0; i < USED_LANES; ++i) {
+			funcs[i]();
+		}
+  }
+
+  virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+	typedef union {
+		uint8_t bytes[8];
+		uint16_t shorts[4];
+		uint32_t raw[2];
+	} Lines;
+
+#define ESP_ADJUST 0 // (2*(F_CPU/24000000))
+#define ESP_ADJUST2 0
+  template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register Lines & b, PixelController<RGB_ORDER, LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
+	  Lines b2 = b;
+		transpose8x1_noinline(b.bytes,b2.bytes);
+
+		register uint8_t d = pixels.template getd<PX>(pixels);
+		register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+		for(register uint32_t i = 0; i < USED_LANES; i++) {
+			while((__clock_cycles() - last_mark) < (T1+T2+T3));
+			last_mark = __clock_cycles();
+			*FastPin<FIRST_PIN>::sport() = PIN_MASK;
+
+			uint32_t nword = (uint32_t)(~b2.bytes[7-i]);
+			while((__clock_cycles() - last_mark) < (T1-6));
+			*FastPin<FIRST_PIN>::cport() = FIX_BITS(nword);
+
+			while((__clock_cycles() - last_mark) < (T1+T2));
+			*FastPin<FIRST_PIN>::cport() = PIN_MASK;
+
+			b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+		}
+
+		for(register uint32_t i = USED_LANES; i < 8; i++) {
+			while((__clock_cycles() - last_mark) < (T1+T2+T3));
+			last_mark = __clock_cycles();
+			*FastPin<FIRST_PIN>::sport() = PIN_MASK;
+
+			uint32_t nword = (uint32_t)(~b2.bytes[7-i]);
+			while((__clock_cycles() - last_mark) < (T1-6));
+			*FastPin<FIRST_PIN>::cport() = FIX_BITS(nword);
+
+			while((__clock_cycles() - last_mark) < (T1+T2));
+			*FastPin<FIRST_PIN>::cport() = PIN_MASK;
+		}
+	}
+
+  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+		static uint32_t ICACHE_RAM_ATTR showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+
+		// Setup the pixel controller and load/scale the first byte
+		Lines b0;
+
+		for(int i = 0; i < USED_LANES; i++) {
+			b0.bytes[i] = allpixels.loadAndScale0(i);
+		}
+		allpixels.preStepFirstByteDithering();
+
+		os_intr_lock();
+		uint32_t _start = __clock_cycles();
+		uint32_t last_mark = _start;
+
+		while(allpixels.has(1)) {
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(last_mark, b0, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(last_mark, b0, allpixels);
+			allpixels.advanceData();
+
+			// Write third byte
+			writeBits<8+XTRA0,0>(last_mark, b0, allpixels);
+
+      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+			os_intr_unlock();
+			#endif
+
+			allpixels.stepDithering();
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+      os_intr_lock();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if((int32_t)(__clock_cycles()-last_mark) > 0) {
+				if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) { os_intr_unlock(); return 0; }
+			}
+			#endif
+		};
+
+    os_intr_unlock();
+		#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+		_frame_cnt++;
+		#endif
+    return __clock_cycles() - _start;
+	}
+};
+
+FASTLED_NAMESPACE_END
+#endif
--- a/libraries/FastLED-3.2.0/platforms/esp/8266/clockless_esp8266.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/8266/clockless_esp8266.h
@@ -0,0 +1,117 @@
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+extern uint32_t _frame_cnt;
+extern uint32_t _retry_cnt;
+#endif
+
+// Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+		mPinMask = FastPin<DATA_PIN>::mask();
+		mPort = FastPin<DATA_PIN>::port();
+	}
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    // mWait.wait();
+		int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
+    while((showRGBInternal(pixels)==0) && cnt--) {
+      #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+      _retry_cnt++;
+      #endif
+      os_intr_unlock();
+      delayMicroseconds(WAIT_TIME);
+      os_intr_lock();
+    }
+    // mWait.mark();
+  }
+
+#define _ESP_ADJ (0)
+#define _ESP_ADJ2 (0)
+
+	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b)  {
+    b = ~b; b <<= 24;
+    for(register uint32_t i = BITS; i > 0; i--) {
+      while((__clock_cycles() - last_mark) < (T1+T2+T3));
+			last_mark = __clock_cycles();
+      FastPin<DATA_PIN>::hi();
+
+      while((__clock_cycles() - last_mark) < T1);
+      if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
+      b <<= 1;
+
+      while((__clock_cycles() - last_mark) < (T1+T2));
+      FastPin<DATA_PIN>::lo();
+		}
+	}
+
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+	static uint32_t ICACHE_RAM_ATTR showRGBInternal(PixelController<RGB_ORDER> pixels) {
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		register uint32_t b = pixels.loadAndScale0();
+    pixels.preStepFirstByteDithering();
+		os_intr_lock();
+    uint32_t start = __clock_cycles();
+		uint32_t last_mark = start;
+		while(pixels.has(1)) {
+			// Write first byte, read next byte
+			writeBits<8+XTRA0>(last_mark, b);
+			b = pixels.loadAndScale1();
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0>(last_mark, b);
+			b = pixels.loadAndScale2();
+
+			// Write third byte, read 1st byte of next pixel
+			writeBits<8+XTRA0>(last_mark, b);
+      b = pixels.advanceAndLoadAndScale0();
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			os_intr_unlock();
+			#endif
+
+      pixels.stepDithering();
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			os_intr_lock();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if((int32_t)(__clock_cycles()-last_mark) > 0) {
+				if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) { sei(); return 0; }
+			}
+			#endif
+		};
+
+		os_intr_unlock();
+    #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+    _frame_cnt++;
+    #endif
+		return __clock_cycles() - start;
+	}
+};
+
+FASTLED_NAMESPACE_END
--- a/libraries/FastLED-3.2.0/platforms/esp/8266/fastled_esp8266.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/8266/fastled_esp8266.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include "fastpin_esp8266.h"
+#include "clockless_esp8266.h"
+#include "clockless_block_esp8266.h"
--- a/libraries/FastLED-3.2.0/platforms/esp/8266/fastpin_esp8266.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/8266/fastpin_esp8266.h
@@ -0,0 +1,101 @@
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+struct FASTLED_ESP_IO {
+  volatile uint32_t _GPO;
+  volatile uint32_t _GPOS;
+  volatile uint32_t _GPOC;
+};
+
+#define _GPB (*(FASTLED_ESP_IO*)(0x60000000+(0x300)))
+
+
+template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
+
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  inline static void setOutput() { pinMode(PIN, OUTPUT); }
+  inline static void setInput() { pinMode(PIN, INPUT); }
+
+  inline static void hi() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOS = MASK; } else { GP16O |= MASK; } }
+  inline static void lo() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOC = MASK; } else { GP16O &= ~MASK; } }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPO = val; } else { GP16O = val; }}
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPO ^= MASK; } else { GP16O ^= MASK; } }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { if (PIN<16) { return GPO | MASK;  } else { return GP16O | MASK; } }
+  inline static port_t loval() __attribute__ ((always_inline)) { if (PIN<16) { return GPO & ~MASK; } else { return GP16O & ~MASK; } }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { if(PIN<16) { return &_GPB._GPO; } else { return &GP16O; } }
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPB._GPOS; } // there is no GP160 support for this
+	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPB._GPOC; }
+  inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }
+
+  inline static bool isset() __attribute__ ((always_inline)) { return (PIN < 16) ? (GPO & MASK) : (GP16O & MASK); }
+};
+
+#define _DEFPIN_ESP8266(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0xFF))> {};
+
+
+#ifdef FASTLED_ESP8266_RAW_PIN_ORDER
+#define MAX_PIN 16
+_DEFPIN_ESP8266(0,0); _DEFPIN_ESP8266(1,1); _DEFPIN_ESP8266(2,2); _DEFPIN_ESP8266(3,3);
+_DEFPIN_ESP8266(4,4); _DEFPIN_ESP8266(5,5);
+
+// These pins should be disabled, as they always cause WDT resets
+// _DEFPIN_ESP8266(6,6); _DEFPIN_ESP8266(7,7);
+// _DEFPIN_ESP8266(8,8); _DEFPIN_ESP8266(9,9); _DEFPIN_ESP8266(10,10); _DEFPIN_ESP8266(11,11);
+
+_DEFPIN_ESP8266(12,12); _DEFPIN_ESP8266(13,13); _DEFPIN_ESP8266(14,14); _DEFPIN_ESP8266(15,15);
+_DEFPIN_ESP8266(16,16);
+
+#define PORTA_FIRST_PIN 12
+#elif defined(FASTLED_ESP8266_D1_PIN_ORDER)
+#define MAX_PIN 15
+_DEFPIN_ESP8266(0,3);
+_DEFPIN_ESP8266(1,1);
+_DEFPIN_ESP8266(2,16);
+_DEFPIN_ESP8266(3,5);
+_DEFPIN_ESP8266(4,4);
+_DEFPIN_ESP8266(5,14);
+_DEFPIN_ESP8266(6,12);
+_DEFPIN_ESP8266(7,13);
+_DEFPIN_ESP8266(8,0);
+_DEFPIN_ESP8266(9,2);
+_DEFPIN_ESP8266(10,15);
+_DEFPIN_ESP8266(11,13);
+_DEFPIN_ESP8266(12,12);
+_DEFPIN_ESP8266(13,14);
+_DEFPIN_ESP8266(14,4);
+_DEFPIN_ESP8266(15,5);
+
+#define PORTA_FIRST_PIN 12
+
+#else // if defined(FASTLED_ESP8266_NODEMCU_PIN_ORDER)
+#define MAX_PIN 10
+
+// This seems to be the standard Dxx pin mapping on most of the esp boards that i've found
+_DEFPIN_ESP8266(0,16); _DEFPIN_ESP8266(1,5); _DEFPIN_ESP8266(2,4); _DEFPIN_ESP8266(3,0);
+_DEFPIN_ESP8266(4,2); _DEFPIN_ESP8266(5,14); _DEFPIN_ESP8266(6,12); _DEFPIN_ESP8266(7,13);
+_DEFPIN_ESP8266(8,15); _DEFPIN_ESP8266(9,3); _DEFPIN_ESP8266(10,1);
+
+#define PORTA_FIRST_PIN 6
+
+// The rest of the pins - these are generally not available
+// _DEFPIN_ESP8266(11,6);
+// _DEFPIN_ESP8266(12,7); _DEFPIN_ESP8266(13,8); _DEFPIN_ESP8266(14,9); _DEFPIN_ESP8266(15,10);
+// _DEFPIN_ESP8266(16,11);
+
+#endif
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#define FASTLED_NAMESPACE_END
--- a/libraries/FastLED-3.2.0/platforms/esp/8266/led_sysdefs_esp8266.h
+++ b/libraries/FastLED-3.2.0/platforms/esp/8266/led_sysdefs_esp8266.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#ifndef ESP8266
+#define ESP8266
+#endif
+
+#define FASTLED_ESP8266
+
+// Use system millis timer
+#define FASTLED_HAS_MILLIS
+
+typedef volatile uint32_t RoReg;
+typedef volatile uint32_t RwReg;
+typedef uint32_t prog_uint32_t;
+typedef uint8_t boolean;
+
+// Default to NOT using PROGMEM here
+#ifndef FASTLED_USE_PROGMEM
+# define FASTLED_USE_PROGMEM 0
+#endif
+
+#ifndef FASTLED_ALLOW_INTERRUPTS
+# define FASTLED_ALLOW_INTERRUPTS 1
+# define INTERRUPT_THRESHOLD 0
+#endif
+
+#define NEED_CXX_BITS
+
+// These can be overridden
+#if !defined(FASTLED_ESP8266_RAW_PIN_ORDER) && !defined(FASTLED_ESP8266_NODEMCU_PIN_ORDER) && !defined(FASTLED_ESP8266_D1_PIN_ORDER)
+# ifdef ARDUINO_ESP8266_NODEMCU
+#   define FASTLED_ESP8266_NODEMCU_PIN_ORDER
+# else
+#   define FASTLED_ESP8266_RAW_PIN_ORDER
+# endif
+#endif
+
+// #define cli() os_intr_lock();
+// #define sei() os_intr_lock();