aboutsummaryrefslogtreecommitdiff
path: root/circuitpython/lib/protomatter/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--circuitpython/lib/protomatter/src/Adafruit_Protomatter.cpp141
-rw-r--r--circuitpython/lib/protomatter/src/Adafruit_Protomatter.h152
-rw-r--r--circuitpython/lib/protomatter/src/arch/arch.h209
-rw-r--r--circuitpython/lib/protomatter/src/arch/esp32.h215
-rw-r--r--circuitpython/lib/protomatter/src/arch/nrf52.h216
-rw-r--r--circuitpython/lib/protomatter/src/arch/rp2040.h245
-rw-r--r--circuitpython/lib/protomatter/src/arch/samd-common.h98
-rw-r--r--circuitpython/lib/protomatter/src/arch/samd21.h150
-rw-r--r--circuitpython/lib/protomatter/src/arch/samd51.h216
-rw-r--r--circuitpython/lib/protomatter/src/arch/stm32.h146
-rw-r--r--circuitpython/lib/protomatter/src/arch/teensy4.h172
-rw-r--r--circuitpython/lib/protomatter/src/core.c1302
-rw-r--r--circuitpython/lib/protomatter/src/core.h274
13 files changed, 3536 insertions, 0 deletions
diff --git a/circuitpython/lib/protomatter/src/Adafruit_Protomatter.cpp b/circuitpython/lib/protomatter/src/Adafruit_Protomatter.cpp
new file mode 100644
index 0000000..c9c2951
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/Adafruit_Protomatter.cpp
@@ -0,0 +1,141 @@
+/*!
+ * @file Adafruit_Protomatter.cpp
+ *
+ * @mainpage Adafruit Protomatter RGB LED matrix library.
+ *
+ * @section intro_sec Introduction
+ *
+ * This is documentation for Adafruit's protomatter library for HUB75-style
+ * RGB LED matrices. It is designed to work with various matrices sold by
+ * Adafruit ("HUB75" is a vague term and other similar matrices are not
+ * guaranteed to work). This file is the Arduino-specific calls; the
+ * underlying C code is more platform-neutral.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing products
+ * from Adafruit!
+ *
+ * @section dependencies Dependencies
+ *
+ * This library depends on
+ * <a href="https://github.com/adafruit/Adafruit-GFX-Library">Adafruit_GFX</a>
+ * being present on your system. Please make sure you have installed the
+ * latest version before using this library.
+ *
+ * @section author Author
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * @section license License
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+// Arduino-specific wrapper for the Protomatter C library (provides
+// constructor and so forth, builds on Adafruit_GFX). There should
+// not be any device-specific #ifdefs here. See notes in core.c and
+// arch/arch.h regarding portability.
+
+#include "Adafruit_Protomatter.h" // Also includes core.h & Adafruit_GFX.h
+
+extern Protomatter_core *_PM_protoPtr; ///< In core.c (via arch.h)
+
+Adafruit_Protomatter::Adafruit_Protomatter(uint16_t bitWidth, uint8_t bitDepth,
+ uint8_t rgbCount, uint8_t *rgbList,
+ uint8_t addrCount, uint8_t *addrList,
+ uint8_t clockPin, uint8_t latchPin,
+ uint8_t oePin, bool doubleBuffer,
+ int8_t tile, void *timer)
+ : GFXcanvas16(bitWidth, (2 << min((int)addrCount, 5)) *
+ min((int)rgbCount, 5) *
+ (tile ? abs(tile) : 1)) {
+ if (bitDepth > 6)
+ bitDepth = 6; // GFXcanvas16 color limit (565)
+
+ // Arguments are passed through to the C _PM_init() function which does
+ // some input validation and minor allocation. Return value is ignored
+ // because we can't really do anything about it in a C++ constructor.
+ // The class begin() function checks rgbPins for NULL to determine
+ // whether to proceed or indicate an error.
+ (void)_PM_init(&core, bitWidth, bitDepth, rgbCount, rgbList, addrCount,
+ addrList, clockPin, latchPin, oePin, doubleBuffer, tile,
+ timer);
+}
+
+Adafruit_Protomatter::~Adafruit_Protomatter(void) {
+ _PM_deallocate(&core);
+ _PM_protoPtr = NULL;
+}
+
+ProtomatterStatus Adafruit_Protomatter::begin(void) {
+ _PM_protoPtr = &core;
+ return _PM_begin(&core);
+}
+
+// Transfer data from GFXcanvas16 to the matrix framebuffer's weird
+// internal format. The actual conversion functions referenced below
+// are in core.c, reasoning is explained there.
+void Adafruit_Protomatter::show(void) {
+ _PM_convert_565(&core, getBuffer(), WIDTH);
+ _PM_swapbuffer_maybe(&core);
+}
+
+// Returns current value of frame counter and resets its value to zero.
+// Two calls to this, timed one second apart (or use math with other
+// intervals), can be used to get a rough frames-per-second value for
+// the matrix (since this is difficult to estimate beforehand).
+uint32_t Adafruit_Protomatter::getFrameCount(void) {
+ return _PM_getFrameCount(_PM_protoPtr);
+}
+
+// This is based on the HSV function in Adafruit_NeoPixel.cpp, but with
+// 16-bit RGB565 output for GFX lib rather than 24-bit. See that code for
+// an explanation of the math, this is stripped of comments for brevity.
+uint16_t Adafruit_Protomatter::colorHSV(uint16_t hue, uint8_t sat,
+ uint8_t val) {
+ uint8_t r, g, b;
+
+ hue = (hue * 1530L + 32768) / 65536;
+
+ if (hue < 510) { // Red to Green-1
+ b = 0;
+ if (hue < 255) { // Red to Yellow-1
+ r = 255;
+ g = hue; // g = 0 to 254
+ } else { // Yellow to Green-1
+ r = 510 - hue; // r = 255 to 1
+ g = 255;
+ }
+ } else if (hue < 1020) { // Green to Blue-1
+ r = 0;
+ if (hue < 765) { // Green to Cyan-1
+ g = 255;
+ b = hue - 510; // b = 0 to 254
+ } else { // Cyan to Blue-1
+ g = 1020 - hue; // g = 255 to 1
+ b = 255;
+ }
+ } else if (hue < 1530) { // Blue to Red-1
+ g = 0;
+ if (hue < 1275) { // Blue to Magenta-1
+ r = hue - 1020; // r = 0 to 254
+ b = 255;
+ } else { // Magenta to Red-1
+ r = 255;
+ b = 1530 - hue; // b = 255 to 1
+ }
+ } else { // Last 0.5 Red (quicker than % operator)
+ r = 255;
+ g = b = 0;
+ }
+
+ // Apply saturation and value to R,G,B, pack into 16-bit 'RGB565' result:
+ uint32_t v1 = 1 + val; // 1 to 256; allows >>8 instead of /255
+ uint16_t s1 = 1 + sat; // 1 to 256; same reason
+ uint8_t s2 = 255 - sat; // 255 to 0
+ return (((((r * s1) >> 8) + s2) * v1) & 0xF800) |
+ ((((((g * s1) >> 8) + s2) * v1) & 0xFC00) >> 5) |
+ (((((b * s1) >> 8) + s2) * v1) >> 11);
+}
diff --git a/circuitpython/lib/protomatter/src/Adafruit_Protomatter.h b/circuitpython/lib/protomatter/src/Adafruit_Protomatter.h
new file mode 100644
index 0000000..a3656a3
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/Adafruit_Protomatter.h
@@ -0,0 +1,152 @@
+// Arduino-specific header, accompanies Adafruit_Protomatter.cpp.
+// There should not be any device-specific #ifdefs here.
+
+#pragma once
+
+#include "core.h"
+#include <Adafruit_GFX.h>
+
+/*!
+ @brief Class representing the Arduino-facing side of the Protomatter
+ library. Subclass of Adafruit_GFX's GFXcanvas16 to allow all
+ the drawing operations.
+*/
+class Adafruit_Protomatter : public GFXcanvas16 {
+public:
+ /*!
+ @brief Adafruit_Protomatter constructor.
+ @param bitWidth Total width of RGB matrix chain, in pixels.
+ Usu. some multiple of 32, but maybe exceptions.
+ @param bitDepth Color "depth" in bitplanes, determines range of
+ shades of red, green and blue. e.g. passing 4
+ bits = 16 shades ea. R,G,B = 16x16x16 = 4096
+ colors. Max is 6, since the GFX library works
+ with "565" RGB colors (6 bits green, 5 red/blue).
+ @param rgbCount Number of "sets" of RGB data pins, each set
+ containing 6 pins (2 ea. R,G,B). Typically 1,
+ indicating a single matrix (or matrix chain).
+ In theory (but not yet extensively tested),
+ multiple sets of pins can be driven in parallel,
+ up to 5 on some devices (if the hardware design
+ provides all those bits on one PORT).
+ @param rgbList A uint8_t array of pins (Arduino pin numbering),
+ 6X the prior rgbCount value, corresponding to
+ the 6 output color bits for a matrix (or chain).
+ Order is upper-half red, green, blue, lower-half
+ red, green blue (repeat for each add'l chain).
+ All the RGB pins (plus the clock pin below on
+ some architectures) MUST be on the same PORT
+ register. It's recommended (but not required)
+ that all RGB pins (and clock depending on arch)
+ be within the same byte of a PORT (but do not
+ need to be sequential or contiguous within that
+ byte) for more efficient RAM utilization. For
+ two concurrent chains, same principle but 16-bit
+ word instead of byte.
+ @param addrCount Number of row address lines required of matrix.
+ Total pixel height is then 2 x 2^addrCount, e.g.
+ 32-pixel-tall matrices have 4 row address lines.
+ @param addrList A uint8_t array of pins (Arduino pin numbering),
+ one per row address line.
+ @param clockPin RGB clock pin (Arduino pin #).
+ @param latchPin RGB data latch pin (Arduino pin #).
+ @param oePin Output enable pin (Arduino pin #), active low.
+ @param doubleBuffer If true, two matrix buffers are allocated,
+ so changing display contents doesn't introduce
+ artifacts mid-conversion. Requires ~2X RAM.
+ @param tile If multiple matrices are chained and stacked
+ vertically (rather than or in addition to
+ horizontally), the number of vertical tiles is
+ specified here. Positive values indicate a
+ "progressive" arrangement (always left-to-right),
+ negative for a "serpentine" arrangement (alternating
+ 180 degree orientation). Horizontal tiles are implied
+ in the 'bitWidth' argument.
+ @param timer Pointer to timer peripheral or timer-related
+ struct (architecture-dependent), or NULL to
+ use a default timer ID (also arch-dependent).
+ */
+ Adafruit_Protomatter(uint16_t bitWidth, uint8_t bitDepth, uint8_t rgbCount,
+ uint8_t *rgbList, uint8_t addrCount, uint8_t *addrList,
+ uint8_t clockPin, uint8_t latchPin, uint8_t oePin,
+ bool doubleBuffer, int8_t tile = 1, void *timer = NULL);
+ ~Adafruit_Protomatter(void);
+
+ /*!
+ @brief Start a Protomatter matrix display running -- initialize
+ pins, timer and interrupt into existence.
+ @return A ProtomatterStatus status, one of:
+ PROTOMATTER_OK if everything is good.
+ PROTOMATTER_ERR_PINS if data and/or clock pins are split
+ across different PORTs.
+ PROTOMATTER_ERR_MALLOC if insufficient RAM to allocate
+ display memory.
+ PROTOMATTER_ERR_ARG if a bad value was passed to the
+ constructor.
+ */
+ ProtomatterStatus begin(void);
+
+ /*!
+ @brief Process data from GFXcanvas16 to the matrix framebuffer's
+ internal format for display.
+ */
+ void show(void);
+
+ /*!
+ @brief Disable (but do not deallocate) a Protomatter matrix.
+ */
+ void stop(void) { _PM_stop(&core); }
+
+ /*!
+ @brief Resume a previously-stopped matrix.
+ */
+ void resume(void) { _PM_resume(&core); }
+
+ /*!
+ @brief Returns current value of frame counter and resets its value
+ to zero. Two calls to this, timed one second apart (or use
+ math with other intervals), can be used to get a rough
+ frames-per-second value for the matrix (since this is
+ difficult to estimate beforehand).
+ @return Frame count since previous call to function, as a uint32_t.
+ */
+ uint32_t getFrameCount(void);
+
+ /*!
+ @brief Converts 24-bit color (8 bits red, green, blue) used in a lot
+ a lot of existing graphics code down to the "565" color format
+ used by Adafruit_GFX. Might get further quantized by matrix if
+ using less than 6-bit depth.
+ @param red Red brightness, 0 (min) to 255 (max).
+ @param green Green brightness, 0 (min) to 255 (max).
+ @param blue Blue brightness, 0 (min) to 255 (max).
+ @return Packed 16-bit (uint16_t) color value suitable for GFX drawing
+ functions.
+ */
+ uint16_t color565(uint8_t red, uint8_t green, uint8_t blue) {
+ return ((red & 0xF8) << 8) | ((green & 0xFC) << 3) | (blue >> 3);
+ }
+
+ /*!
+ @brief Convert hue, saturation and value into a packed 16-bit RGB color
+ that can be passed to GFX drawing functions.
+ @param hue An unsigned 16-bit value, 0 to 65535, representing one full
+ loop of the color wheel, which allows 16-bit hues to "roll
+ over" while still doing the expected thing (and allowing
+ more precision than the wheel() function that was common to
+ older graphics examples).
+ @param sat Saturation, 8-bit value, 0 (min or pure grayscale) to 255
+ (max or pure hue). Default of 255 if unspecified.
+ @param val Value (brightness), 8-bit value, 0 (min / black / off) to
+ 255 (max or full brightness). Default of 255 if unspecified.
+ @return Packed 16-bit '565' RGB color. Result is linearly but not
+ perceptually correct (no gamma correction).
+ */
+ uint16_t colorHSV(uint16_t hue, uint8_t sat = 255, uint8_t val = 255);
+
+private:
+ Protomatter_core core; // Underlying C struct
+ void convert_byte(uint8_t *dest); // GFXcanvas16-to-matrix
+ void convert_word(uint16_t *dest); // conversion functions
+ void convert_long(uint32_t *dest); // for 8/16/32 bit bufs
+};
diff --git a/circuitpython/lib/protomatter/src/arch/arch.h b/circuitpython/lib/protomatter/src/arch/arch.h
new file mode 100644
index 0000000..2e28d2f
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/arch.h
@@ -0,0 +1,209 @@
+/*!
+ * @file arch.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file establishes some very low-level things and includes headers
+ * specific to each supported device. This should ONLY be included by
+ * core.c, nowhere else. Ever.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#include <string.h>
+
+/*
+Common ground for architectures to support this library:
+
+- 32-bit device (e.g. ARM core, ESP32, potentially others in the future)
+- One or more 32-bit GPIO PORTs with atomic bitmask SET and CLEAR registers.
+ A TOGGLE register, if present, may improve performance but is NOT required.
+- Tolerate 8-bit or word-aligned 16-bit accesses within the 32-bit PORT
+ registers (e.g. writing just one of four bytes, rather than the whole
+ 32 bits). The library does not use any unaligned accesses (i.e. the
+ "middle word" of a 32-bit register), even if a device tolerates such.
+
+"Pin" as used in this code is always a uint8_t value, but the semantics
+of what it means may vary between Arduino and non-Arduino situations.
+In Arduino, it's the pin index one would pass to functions such as
+digitalWrite(), and doesn't necessarily correspond to physical hardware
+pins or any other arrangement. Some may have names like 'A0' that really
+just map to higher indices.
+In non-Arduino settings (CircuitPython, other languages, etc.), how a
+pin index relates to hardware is entirely implementation dependent, and
+how to get from one to the other is what must be implemented in this file.
+Quite often an environment will follow the Arduino pin designations
+(since the numbers are on a board's silkscreen) and will have an internal
+table mapping those indices to registers and bitmasks...but probably not
+an identically-named and -structured table to the Arduino code, hence the
+reason for many "else" situations in this code.
+
+Each architecture defines the following macros and/or functions (the _PM_
+prefix on each is to reduce likelihood of naming collisions...especially
+on ESP32, which has some similarly-named timer functions:
+
+GPIO-related macros/functions:
+
+_PM_portOutRegister(pin): Get address of PORT out register. Code calling
+ this can cast it to whatever type's needed.
+_PM_portSetRegister(pin): Get address of PORT set-bits register.
+_PM_portClearRegister(pin): Get address of PORT clear-bits register.
+_PM_portToggleRegister(pin): Get address of PORT toggle-bits register.
+ Not all devices support this, in which case
+ it must be left undefined.
+_PM_portBitMask(pin): Get bit mask within PORT register corresponding
+ to a pin number. When compiling for Arduino,
+ this just maps to digitalPinToBitMask(), other
+ environments will need an equivalent.
+_PM_byteOffset(pin): Get index of byte (0 to 3) within 32-bit PORT
+ corresponding to a pin number.
+_PM_wordOffset(pin): Get index of word (0 or 1) within 32-bit PORT
+ corresponding to a pin number.
+_PM_pinOutput(pin): Set a pin to output mode. In Arduino this maps
+ to pinMode(pin, OUTPUT). Other environments
+ will need an equivalent.
+_PM_pinInput(pin): Set a pin to input mode, no pullup. In Arduino
+ this maps to pinMode(pin, INPUT).
+_PM_pinHigh(pin): Set an output pin to a high or 1 state. In
+ Arduino this maps to digitalWrite(pin, HIGH).
+_PM_pinLow(pin): Set an output pin to a low or 0 state. In
+ Arduino this maps to digitalWrite(pin, LOW).
+
+Timer-related macros/functions:
+
+_PM_timerFreq: A numerical constant - the source clock rate
+ (in Hz) that's fed to the timer peripheral.
+_PM_timerInit(void*): Initialize (but do not start) timer.
+_PM_timerStart(void*,count): (Re)start timer for a given timer-tick interval.
+_PM_timerStop(void*): Stop timer, return current timer counter value.
+_PM_timerGetCount(void*): Get current timer counter value (whether timer
+ is running or stopped).
+A timer interrupt service routine is also required, syntax for which varies
+between architectures.
+The void* argument passed to the timer functions is some indeterminate type
+used to uniquely identify a timer peripheral within a given environment. For
+example, in the Arduino wrapper for this library, compiling for SAMD chips,
+it's just a pointer directly to a timer/counter peripheral base address. If
+an implementation needs more data associated alongside a peripheral, this
+could instead be a pointer to a struct, or an integer index.
+
+Other macros/functions:
+
+_PM_chunkSize: Matrix bitmap width (both in RAM and as issued
+ to the device) is rounded up (if necessary) to
+ a multiple of this value as a way of explicitly
+ unrolling the innermost data-stuffing loops.
+ So far all HUB75 displays I've encountered are
+ a multiple of 32 pixels wide, but in case
+ something new comes along, or if a larger
+ unroll actually decreases performance due to
+ cache size, this can be set to whatever works
+ best (any additional data is simply shifted
+ out the other end of the matrix). Default if
+ unspecified is 8 (e.g. four loop passes on a
+ 32-pixel matrix, eight if 64-pixel). Only
+ certain chunkSizes are actually implemented,
+ see .cpp code (avoiding GCC-specific tricks
+ that would handle arbitrary chunk sizes).
+_PM_delayMicroseconds(us): Function or macro to delay some number of
+ microseconds. For Arduino, this just maps to
+ delayMicroseconds(). Other environments will
+ need to provide their own or map to an
+ an equivalent function.
+_PM_clockHoldHigh: Additional code (typically some number of NOPs)
+ needed to delay the clock fall after RGB data is
+ written to PORT. Only required on fast devices.
+ If left undefined, no delay happens.
+_PM_clockHoldLow: Additional code (e.g. NOPs) needed to delay
+ clock rise after writing RGB data to PORT.
+ No delay if left undefined.
+_PM_minMinPeriod: Mininum value for the "minPeriod" class member,
+ so bit-angle-modulation time always doubles with
+ each bitplane (else lower bits may be the same).
+_PM_allocate: Memory allocation function, should return a
+ pointer to a buffer of requested size, aligned
+ to the architecture's largest native type.
+ If not defined, malloc() is used.
+_PM_free: Corresponding deallocator for _PM_allocate().
+ If not defined, free() is used.
+*/
+
+// ENVIRONMENT-SPECIFIC DECLARATIONS ---------------------------------------
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+#include <Arduino.h> // Pull in all that stuff.
+
+#define _PM_delayMicroseconds(us) delayMicroseconds(us)
+#define _PM_pinOutput(pin) pinMode(pin, OUTPUT)
+#define _PM_pinInput(pin) pinMode(pin, INPUT)
+#define _PM_pinHigh(pin) digitalWrite(pin, HIGH)
+#define _PM_pinLow(pin) digitalWrite(pin, LOW)
+#define _PM_portBitMask(pin) digitalPinToBitMask(pin)
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+#include "py/mphal.h"
+#include "shared-bindings/microcontroller/Pin.h"
+
+#define _PM_delayMicroseconds(us) mp_hal_delay_us(us)
+
+// No #else here. In non-Arduino case, declare things in the arch-specific
+// files below...unless other environments provide device-neutral functions
+// as above, in which case those could go here (w/#elif).
+
+#endif // END CIRCUITPYTHON ------------------------------------------------
+
+// ARCHITECTURE-SPECIFIC HEADERS -------------------------------------------
+
+#include "esp32.h"
+#include "nrf52.h"
+#include "rp2040.h"
+#include "samd-common.h"
+#include "samd21.h"
+#include "samd51.h"
+#include "stm32.h"
+#include "teensy4.h"
+
+// DEFAULTS IF NOT DEFINED ABOVE -------------------------------------------
+
+#if !defined(_PM_chunkSize)
+#define _PM_chunkSize 8 ///< Unroll data-stuffing loop to this size
+#endif
+
+#if !defined(_PM_clockHoldHigh)
+#define _PM_clockHoldHigh ///< Extra cycles (if any) on clock HIGH signal
+#endif
+
+#if !defined(_PM_clockHoldLow)
+#define _PM_clockHoldLow ///< Extra cycles (if any) on clock LOW signal
+#endif
+
+#if !defined(_PM_minMinPeriod)
+#define _PM_minMinPeriod 100 ///< Minimum timer interval for least bit
+#endif
+
+#if !defined(_PM_allocate)
+#define _PM_allocate(x) (malloc((x))) ///< Memory alloc call
+#endif
+
+#if !defined(_PM_free)
+#define _PM_free(x) (free((x))) ///< Corresponding memory free call
+#endif
+
+#if !defined(IRAM_ATTR)
+#define IRAM_ATTR ///< Neutralize ESP32-specific attribute in core.c
+#endif
+
+#if !defined(_PM_PORT_TYPE)
+#define _PM_PORT_TYPE uint32_t ///< PORT register size/type
+#endif
diff --git a/circuitpython/lib/protomatter/src/arch/esp32.h b/circuitpython/lib/protomatter/src/arch/esp32.h
new file mode 100644
index 0000000..7171f9a
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/esp32.h
@@ -0,0 +1,215 @@
+/*!
+ * @file esp32.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains ESP32-SPECIFIC CODE.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#if defined(ESP32)
+
+#include "driver/timer.h"
+
+#ifdef CONFIG_IDF_TARGET_ESP32C3
+#define _PM_portOutRegister(pin) (volatile uint32_t *)&GPIO.out
+#define _PM_portSetRegister(pin) (volatile uint32_t *)&GPIO.out_w1ts
+#define _PM_portClearRegister(pin) (volatile uint32_t *)&GPIO.out_w1tc
+#else
+#define _PM_portOutRegister(pin) \
+ (volatile uint32_t *)((pin < 32) ? &GPIO.out : &GPIO.out1.val)
+#define _PM_portSetRegister(pin) \
+ (volatile uint32_t *)((pin < 32) ? &GPIO.out_w1ts : &GPIO.out1_w1ts.val)
+#define _PM_portClearRegister(pin) \
+ (volatile uint32_t *)((pin < 32) ? &GPIO.out_w1tc : &GPIO.out1_w1tc.val)
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) ((pin & 31) / 8)
+#define _PM_wordOffset(pin) ((pin & 31) / 16)
+#else
+#define _PM_byteOffset(pin) (3 - ((pin & 31) / 8))
+#define _PM_wordOffset(pin) (1 - ((pin & 31) / 16))
+#endif
+
+// As written, because it's tied to a specific timer right now, the
+// Arduino lib only permits one instance of the Protomatter_core struct,
+// which it sets up when calling begin().
+void *_PM_protoPtr = NULL;
+
+#define _PM_timerFreq 40000000 // 40 MHz (1:2 prescale)
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+// ESP32 requires a custom PEW declaration (issues one set of RGB color bits
+// followed by clock pulse). Turns out the bit set/clear registers are not
+// actually atomic. If two writes are made in quick succession, the second
+// has no effect. One option is NOPs, other is to write a 0 (no effect) to
+// the opposing register (set vs clear) to synchronize the next write.
+#define PEW \
+ *set = *data++; /* Set RGB data high */ \
+ *clear_full = 0; /* ESP32 MUST sync before 2nd 'set' */ \
+ *set_full = clock; /* Set clock high */ \
+ *clear_full = rgbclock; /* Clear RGB data + clock */ \
+ ///< Bitbang one set of RGB data bits to matrix
+
+#define _PM_timerNum 0 // Timer #0 (can be 0-3)
+
+// This is the default aforementioned singular timer. IN THEORY, other
+// timers could be used, IF an Arduino sketch passes the address of its
+// own hw_timer_t* to the Protomatter constructor and initializes that
+// timer using ESP32's timerBegin(). All of the timer-related functions
+// below pass around a handle rather than accessing _PM_esp32timer
+// directly, in case that's ever actually used in the future.
+static hw_timer_t *_PM_esp32timer = NULL;
+#define _PM_TIMER_DEFAULT &_PM_esp32timer
+
+extern IRAM_ATTR void _PM_row_handler(Protomatter_core *core);
+
+// Timer interrupt handler. This, _PM_row_handler() and any functions
+// called by _PM_row_handler() should all have the IRAM_ATTR attribute
+// (RAM-resident functions). This isn't really the ISR itself, but a
+// callback invoked by the real ISR (in arduino-esp32's esp32-hal-timer.c)
+// which takes care of interrupt status bits & such.
+IRAM_ATTR static void _PM_esp32timerCallback(void) {
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+
+// Initialize, but do not start, timer.
+void _PM_timerInit(void *tptr) {
+ hw_timer_t **timer = (hw_timer_t **)tptr; // pointer-to-pointer
+ if (timer == _PM_TIMER_DEFAULT) {
+ *timer = timerBegin(_PM_timerNum, 2, true); // 1:2 prescale, count up
+ }
+ timerAttachInterrupt(*timer, &_PM_esp32timerCallback, true);
+}
+
+// Set timer period, initialize count value to zero, enable timer.
+IRAM_ATTR inline void _PM_timerStart(void *tptr, uint32_t period) {
+ hw_timer_t *timer = *(hw_timer_t **)tptr;
+ timerAlarmWrite(timer, period, true);
+ timerAlarmEnable(timer);
+ timerStart(timer);
+}
+
+// Return current count value (timer enabled or not).
+// Timer must be previously initialized.
+IRAM_ATTR inline uint32_t _PM_timerGetCount(void *tptr) {
+ hw_timer_t *timer = *(hw_timer_t **)tptr;
+ return (uint32_t)timerRead(timer);
+}
+
+// Disable timer and return current count value.
+// Timer must be previously initialized.
+IRAM_ATTR uint32_t _PM_timerStop(void *tptr) {
+ hw_timer_t *timer = *(hw_timer_t **)tptr;
+ timerStop(timer);
+ return _PM_timerGetCount(tptr);
+}
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+// ESP32 CircuitPython magic goes here. If any of the above Arduino-specific
+// defines, structs or functions are useful as-is, don't copy them, just
+// move them above the ARDUINO check so fixes/changes carry over, thx.
+
+// ESP32 requires a custom PEW declaration (issues one set of RGB color bits
+// followed by clock pulse). Turns out the bit set/clear registers are not
+// actually atomic. If two writes are made in quick succession, the second
+// has no effect. One option is NOPs, other is to write a 0 (no effect) to
+// the opposing register (set vs clear) to synchronize the next write.
+#define PEW \
+ *set = (*data++) << shift; /* Set RGB data high */ \
+ *clear_full = 0; /* ESP32 MUST sync before 2nd 'set' */ \
+ *set = clock; /* Set clock high */ \
+ *clear_full = rgbclock; /* Clear RGB data + clock */ \
+ ///< Bitbang one set of RGB data bits to matrix
+
+#include "driver/gpio.h"
+#include "hal/timer_ll.h"
+#include "peripherals/timer.h"
+
+#define _PM_STRICT_32BIT_IO (1)
+
+#define _PM_TIMER_DEFAULT NULL
+
+#define _PM_pinOutput(pin) gpio_set_direction((pin), GPIO_MODE_OUTPUT)
+
+#define _PM_pinLow(pin) gpio_set_level((pin), false)
+
+#define _PM_pinHigh(pin) gpio_set_level((pin), true)
+
+#define _PM_portBitMask(pin) (1U << ((pin)&31))
+
+// Timer interrupt handler. This, _PM_row_handler() and any functions
+// called by _PM_row_handler() should all have the IRAM_ATTR attribute
+// (RAM-resident functions). This isn't really the ISR itself, but a
+// callback invoked by the real ISR (in arduino-esp32's esp32-hal-timer.c)
+// which takes care of interrupt status bits & such.
+IRAM_ATTR bool _PM_esp32timerCallback(void *unused) {
+ if (_PM_protoPtr) {
+ _PM_row_handler(_PM_protoPtr); // In core.c
+ }
+ return false;
+};
+
+// Initialize, but do not start, timer.
+void _PM_timerInit(void *tptr) {
+ const timer_config_t config = {
+ .alarm_en = false,
+ .counter_en = false,
+ .intr_type = TIMER_INTR_LEVEL,
+ .counter_dir = TIMER_COUNT_UP,
+ .auto_reload = true,
+ .divider = 2 // 40MHz
+ };
+
+ timer_index_t *timer = (timer_index_t *)tptr;
+ timer_init(timer->group, timer->idx, &config);
+ timer_isr_callback_add(timer->group, timer->idx, _PM_esp32timerCallback, NULL,
+ 0);
+ timer_enable_intr(timer->group, timer->idx);
+}
+
+// Set timer period, initialize count value to zero, enable timer.
+IRAM_ATTR void _PM_timerStart(void *tptr, uint32_t period) {
+ timer_index_t *timer = (timer_index_t *)tptr;
+ timer_ll_set_counter_enable(timer->hw, timer->idx, false);
+ timer_ll_set_counter_value(timer->hw, timer->idx, 0);
+ timer_ll_set_alarm_value(timer->hw, timer->idx, period);
+ timer_ll_set_alarm_enable(timer->hw, timer->idx, true);
+ timer_ll_set_counter_enable(timer->hw, timer->idx, true);
+}
+
+IRAM_ATTR uint32_t _PM_timerGetCount(void *tptr) {
+ timer_index_t *timer = (timer_index_t *)tptr;
+#ifdef CONFIG_IDF_TARGET_ESP32S3
+ timer->hw->hw_timer[timer->idx].update.tn_update = 1;
+ return timer->hw->hw_timer[timer->idx].lo.tn_lo;
+#else
+ timer->hw->hw_timer[timer->idx].update.tx_update = 1;
+ return timer->hw->hw_timer[timer->idx].lo.tx_lo;
+#endif
+}
+
+// Disable timer and return current count value.
+// Timer must be previously initialized.
+IRAM_ATTR uint32_t _PM_timerStop(void *tptr) {
+ timer_index_t *timer = (timer_index_t *)tptr;
+ timer_ll_set_counter_enable(timer->hw, timer->idx, false);
+ return _PM_timerGetCount(tptr);
+}
+
+#endif // END CIRCUITPYTHON ------------------------------------------------
+
+#endif // END ESP32
diff --git a/circuitpython/lib/protomatter/src/arch/nrf52.h b/circuitpython/lib/protomatter/src/arch/nrf52.h
new file mode 100644
index 0000000..c46cf1e
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/nrf52.h
@@ -0,0 +1,216 @@
+/*!
+ * @file nrf52.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains NRF52-SPECIFIC CODE.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#if defined(NRF52_SERIES)
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+// digitalPinToPort, g_ADigitalPinMap[] are Arduino specific:
+
+void *_PM_portOutRegister(uint32_t pin) {
+ NRF_GPIO_Type *port = digitalPinToPort(pin);
+ return &port->OUT;
+}
+
+void *_PM_portSetRegister(uint32_t pin) {
+ NRF_GPIO_Type *port = digitalPinToPort(pin);
+ return &port->OUTSET;
+}
+
+void *_PM_portClearRegister(uint32_t pin) {
+ NRF_GPIO_Type *port = digitalPinToPort(pin);
+ return &port->OUTCLR;
+}
+
+// Leave _PM_portToggleRegister(pin) undefined on nRF!
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) ((g_ADigitalPinMap[pin] & 0x1F) / 8)
+#define _PM_wordOffset(pin) ((g_ADigitalPinMap[pin] & 0x1F) / 16)
+#else
+#define _PM_byteOffset(pin) (3 - ((g_ADigitalPinMap[pin] & 0x1F) / 8))
+#define _PM_wordOffset(pin) (1 - ((g_ADigitalPinMap[pin] & 0x1F) / 16))
+#endif
+
+// Because it's tied to a specific timer right now, there can be only
+// one instance of the Protomatter_core struct. The Arduino library
+// sets up this pointer when calling begin().
+void *_PM_protoPtr = NULL;
+
+// Arduino implementation is tied to a specific timer/counter,
+// Partly because IRQs must be declared at compile-time.
+#define _PM_IRQ_HANDLER TIMER4_IRQHandler
+#define _PM_timerFreq 16000000
+#define _PM_TIMER_DEFAULT NRF_TIMER4
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Timer interrupt service routine
+void _PM_IRQ_HANDLER(void) {
+ if (_PM_TIMER_DEFAULT->EVENTS_COMPARE[0]) {
+ _PM_TIMER_DEFAULT->EVENTS_COMPARE[0] = 0;
+ }
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+#include "nrf_gpio.h"
+
+volatile uint32_t *_PM_portOutRegister(uint32_t pin) {
+ NRF_GPIO_Type *port = nrf_gpio_pin_port_decode(&pin);
+ return &port->OUT;
+}
+
+volatile uint32_t *_PM_portSetRegister(uint32_t pin) {
+ NRF_GPIO_Type *port = nrf_gpio_pin_port_decode(&pin);
+ return &port->OUTSET;
+}
+
+volatile uint32_t *_PM_portClearRegister(uint32_t pin) {
+ NRF_GPIO_Type *port = nrf_gpio_pin_port_decode(&pin);
+ return &port->OUTCLR;
+}
+#define _PM_pinOutput(pin) \
+ nrf_gpio_cfg(pin, NRF_GPIO_PIN_DIR_OUTPUT, NRF_GPIO_PIN_INPUT_DISCONNECT, \
+ NRF_GPIO_PIN_NOPULL, NRF_GPIO_PIN_H0H1, NRF_GPIO_PIN_NOSENSE)
+#define _PM_pinInput(pin) nrf_gpio_cfg_input(pin)
+#define _PM_pinHigh(pin) nrf_gpio_pin_set(pin)
+#define _PM_pinLow(pin) nrf_gpio_pin_clear(pin)
+#define _PM_portBitMask(pin) (1u << ((pin)&31))
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) ((pin & 31) / 8)
+#define _PM_wordOffset(pin) ((pin & 31) / 16)
+#else
+#define _PM_byteOffset(pin) (3 - ((pin & 31) / 8))
+#define _PM_wordOffset(pin) (1 - ((pin & 31) / 16))
+#endif
+
+// CircuitPython implementation is tied to a specific freq (but the counter
+// is dynamically allocated):
+#define _PM_timerFreq 16000000
+
+// Because it's tied to a specific timer right now, there can be only
+// one instance of the Protomatter_core struct. The Arduino library
+// sets up this pointer when calling begin().
+void *_PM_protoPtr = NULL;
+
+// Timer interrupt service routine
+void _PM_IRQ_HANDLER(void) {
+ NRF_TIMER_Type *timer = (((Protomatter_core *)_PM_protoPtr)->timer);
+ if (timer->EVENTS_COMPARE[0]) {
+ timer->EVENTS_COMPARE[0] = 0;
+ }
+
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+
+#else // END CIRCUITPYTHON -------------------------------------------------
+
+// Byte offset macros, timer and ISR work for other environments go here.
+
+#endif
+
+// CODE COMMON TO ALL ENVIRONMENTS -----------------------------------------
+
+void _PM_timerInit(void *tptr) {
+ static const struct {
+ NRF_TIMER_Type *tc; // -> Timer peripheral base address
+ IRQn_Type IRQn; // Interrupt number
+ } timer[] = {
+#if defined(NRF_TIMER0)
+ {NRF_TIMER0, TIMER0_IRQn},
+#endif
+#if defined(NRF_TIMER1)
+ {NRF_TIMER1, TIMER1_IRQn},
+#endif
+#if defined(NRF_TIMER2)
+ {NRF_TIMER2, TIMER2_IRQn},
+#endif
+#if defined(NRF_TIMER3)
+ {NRF_TIMER3, TIMER3_IRQn},
+#endif
+#if defined(NRF_TIMER4)
+ {NRF_TIMER4, TIMER4_IRQn},
+#endif
+ };
+#define NUM_TIMERS (sizeof timer / sizeof timer[0])
+
+ // Determine IRQn from timer address
+ uint8_t timerNum = 0;
+ while ((timerNum < NUM_TIMERS) && (timer[timerNum].tc != tptr)) {
+ timerNum++;
+ }
+ if (timerNum >= NUM_TIMERS)
+ return;
+
+ NRF_TIMER_Type *tc = timer[timerNum].tc;
+
+ tc->TASKS_STOP = 1; // Stop timer
+ tc->MODE = TIMER_MODE_MODE_Timer; // Timer (not counter) mode
+ tc->TASKS_CLEAR = 1;
+ tc->BITMODE = TIMER_BITMODE_BITMODE_16Bit
+ << TIMER_BITMODE_BITMODE_Pos; // 16-bit timer res
+ tc->PRESCALER = 0; // 1:1 prescale (16 MHz)
+ tc->INTENSET = TIMER_INTENSET_COMPARE0_Enabled
+ << TIMER_INTENSET_COMPARE0_Pos; // Event 0 interrupt
+ // NVIC_DisableIRQ(timer[timerNum].IRQn);
+ // NVIC_ClearPendingIRQ(timer[timerNum].IRQn);
+ // NVIC_SetPriority(timer[timerNum].IRQn, 0); // Top priority
+ NVIC_EnableIRQ(timer[timerNum].IRQn);
+}
+
+inline void _PM_timerStart(void *tptr, uint32_t period) {
+ volatile NRF_TIMER_Type *tc = (volatile NRF_TIMER_Type *)tptr;
+ tc->TASKS_STOP = 1; // Stop timer
+ tc->TASKS_CLEAR = 1; // Reset to 0
+ tc->CC[0] = period;
+ tc->TASKS_START = 1; // Start timer
+}
+
+inline uint32_t _PM_timerGetCount(void *tptr) {
+ volatile NRF_TIMER_Type *tc = (volatile NRF_TIMER_Type *)tptr;
+ tc->TASKS_CAPTURE[0] = 1; // Capture timer to CC[n] register
+ return tc->CC[0];
+}
+
+uint32_t _PM_timerStop(void *tptr) {
+ volatile NRF_TIMER_Type *tc = (volatile NRF_TIMER_Type *)tptr;
+ tc->TASKS_STOP = 1; // Stop timer
+ __attribute__((unused)) uint32_t count = _PM_timerGetCount(tptr);
+ // NOTE TO FUTURE SELF: I don't know why the GetCount code isn't
+ // working. It does the expected thing in a small test program but
+ // not here. I need to get on with testing on an actual matrix, so
+ // this is just a nonsense fudge value for now:
+ return 100;
+ // return count;
+}
+
+#define _PM_clockHoldHigh asm("nop; nop");
+
+#define _PM_minMinPeriod 100
+
+#endif // END NRF52_SERIES
diff --git a/circuitpython/lib/protomatter/src/arch/rp2040.h b/circuitpython/lib/protomatter/src/arch/rp2040.h
new file mode 100644
index 0000000..b098251
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/rp2040.h
@@ -0,0 +1,245 @@
+/*!
+ * @file rp2040.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains RP2040 (Raspberry Pi Pico, etc.) SPECIFIC CODE.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ * RP2040 NOTES: This initial implementation does NOT use PIO. That's normal
+ * for Protomatter, which was written for simple GPIO + timer interrupt for
+ * broadest portability. While not entirely optimal, it's not pessimal
+ * either...no worse than any other platform where we're not taking
+ * advantage of device-specific DMA or peripherals. Would require changes to
+ * the 'blast' functions or possibly the whole _PM_row_handler() (both
+ * currently in core.c). CPU load is just a few percent for a 64x32
+ * matrix @ 6-bit depth, so I'm not losing sleep over this.
+ *
+ */
+
+#pragma once
+
+// TO DO: PUT A *PROPER* RP2040 CHECK HERE
+#if defined(PICO_BOARD) || defined(__RP2040__)
+
+#include "../../hardware_pwm/include/hardware/pwm.h"
+#include "hardware/irq.h"
+#include "hardware/timer.h"
+#include "pico/stdlib.h" // For sio_hw, etc.
+
+// RP2040 only allows full 32-bit aligned writes to GPIO.
+#define _PM_STRICT_32BIT_IO ///< Change core.c behavior for long accesses only
+
+// TEMPORARY: FORCING ARDUINO COMPILATION FOR INITIAL C TESTING
+#if !defined(CIRCUITPY)
+#define ARDUINO
+#endif
+
+// Enable this to use PWM for bitplane timing, else a timer alarm is used.
+// PWM has finer resolution, but alarm is adequate -- this is more about
+// which peripheral we'd rather use, as both are finite resources.
+#ifndef _PM_CLOCK_PWM
+#define _PM_CLOCK_PWM (1)
+#endif
+
+#if _PM_CLOCK_PWM // Use PWM for timing
+static void _PM_PWM_ISR(void);
+#else // Use timer alarm for timing
+static void _PM_timerISR(void);
+#endif
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+// 'pin' here is GPXX # -- that might change in Arduino implementation
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) ((pin & 31) / 8)
+#define _PM_wordOffset(pin) ((pin & 31) / 16)
+#else
+#define _PM_byteOffset(pin) (3 - ((pin & 31) / 8))
+#define _PM_wordOffset(pin) (1 - ((pin & 31) / 16))
+#endif
+
+#if _PM_CLOCK_PWM
+
+// Arduino implementation is tied to a specific PWM slice & frequency
+#define _PM_PWM_SLICE 0
+#define _PM_PWM_DIV 3 // ~41.6 MHz, similar to SAMD
+#define _PM_timerFreq (125000000 / _PM_PWM_DIV)
+#define _PM_TIMER_DEFAULT NULL
+
+#else // Use alarm for timing
+
+// Arduino implementation is tied to a specific timer alarm & frequency
+#define _PM_ALARM_NUM 1
+#define _PM_IRQ_HANDLER TIMER_IRQ_1
+#define _PM_timerFreq 1000000
+#define _PM_TIMER_DEFAULT NULL
+
+// Initialize, but do not start, timer.
+void _PM_timerInit(void *tptr) {
+#if _PM_CLOCK_PWM
+ // Enable PWM wrap interrupt
+ pwm_clear_irq(_PM_PWM_SLICE);
+ pwm_set_irq_enabled(_PM_PWM_SLICE, true);
+ irq_set_exclusive_handler(PWM_IRQ_WRAP, _PM_PWM_ISR);
+ irq_set_enabled(PWM_IRQ_WRAP, true);
+
+ // Config but do not start PWM
+ pwm_config config = pwm_get_default_config();
+ pwm_config_set_clkdiv_int(&config, _PM_PWM_DIV);
+ pwm_init(_PM_PWM_SLICE, &config, true);
+#else
+ timer_hw->alarm[_PM_ALARM_NUM] = timer_hw->timerawl; // Clear any timer
+ hw_set_bits(&timer_hw->inte, 1u << _PM_ALARM_NUM);
+ irq_set_exclusive_handler(_PM_IRQ_HANDLER, _PM_timerISR); // Set IRQ handler
+#endif
+}
+
+#endif
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+// 'pin' here is GPXX #
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) ((pin & 31) / 8)
+#define _PM_wordOffset(pin) ((pin & 31) / 16)
+#else
+#define _PM_byteOffset(pin) (3 - ((pin & 31) / 8))
+#define _PM_wordOffset(pin) (1 - ((pin & 31) / 16))
+#endif
+
+#if _PM_CLOCK_PWM
+
+int _PM_pwm_slice;
+#define _PM_PWM_SLICE (_PM_pwm_slice & 0xff)
+#define _PM_PWM_DIV 3 // ~41.6 MHz, similar to SAMD
+#define _PM_timerFreq (125000000 / _PM_PWM_DIV)
+#define _PM_TIMER_DEFAULT NULL
+
+#else // Use alarm for timing
+
+// Currently tied to a specific timer alarm & frequency
+#define _PM_ALARM_NUM 1
+#define _PM_IRQ_HANDLER TIMER_IRQ_1
+#define _PM_timerFreq 1000000
+#define _PM_TIMER_DEFAULT NULL
+
+#endif
+
+// Initialize, but do not start, timer.
+void _PM_timerInit(void *tptr) {
+#if _PM_CLOCK_PWM
+ _PM_pwm_slice = (int)tptr & 0xff;
+ // Enable PWM wrap interrupt
+ pwm_clear_irq(_PM_PWM_SLICE);
+ pwm_set_irq_enabled(_PM_PWM_SLICE, true);
+ irq_set_exclusive_handler(PWM_IRQ_WRAP, _PM_PWM_ISR);
+ irq_set_enabled(PWM_IRQ_WRAP, true);
+
+ // Config but do not start PWM
+ pwm_config config = pwm_get_default_config();
+ pwm_config_set_clkdiv_int(&config, _PM_PWM_DIV);
+ pwm_init(_PM_PWM_SLICE, &config, true);
+#else
+ timer_hw->alarm[_PM_ALARM_NUM] = timer_hw->timerawl; // Clear any timer
+ hw_set_bits(&timer_hw->inte, 1u << _PM_ALARM_NUM);
+ irq_set_exclusive_handler(_PM_IRQ_HANDLER, _PM_timerISR); // Set IRQ handler
+#endif
+}
+
+#endif
+
+#if !_PM_CLOCK_PWM
+// Unlike timers on other devices, on RP2040 you don't reset a counter to
+// zero at the start of a cycle. To emulate that behavior (for determining
+// elapsed times), the timer start time must be saved somewhere...
+static volatile uint32_t _PM_timerSave;
+
+#endif
+
+// Because it's tied to a specific timer right now, there can be only
+// one instance of the Protomatter_core struct. The Arduino library
+// sets up this pointer when calling begin().
+void *_PM_protoPtr = NULL;
+
+#define _PM_portOutRegister(pin) ((void *)&sio_hw->gpio_out)
+#define _PM_portSetRegister(pin) ((volatile uint32_t *)&sio_hw->gpio_set)
+#define _PM_portClearRegister(pin) ((volatile uint32_t *)&sio_hw->gpio_clr)
+#define _PM_portToggleRegister(pin) ((volatile uint32_t *)&sio_hw->gpio_togl)
+// 'pin' here is GPXX # -- that might change in Arduino implementation
+#define _PM_portBitMask(pin) (1UL << pin)
+// Same for these -- using GPXX #, but Arduino might assign different order
+#define _PM_pinOutput(pin) \
+ { \
+ gpio_init(pin); \
+ gpio_set_dir(pin, GPIO_OUT); \
+ }
+#define _PM_pinLow(pin) gpio_clr_mask(1UL << pin)
+#define _PM_pinHigh(pin) gpio_set_mask(1UL << pin)
+
+#ifndef _PM_delayMicroseconds
+#define _PM_delayMicroseconds(n) sleep_us(n)
+#endif
+
+#if _PM_CLOCK_PWM // Use PWM for timing
+static void _PM_PWM_ISR(void) {
+ pwm_clear_irq(_PM_PWM_SLICE); // Reset PWM wrap interrupt
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+#else // Use timer alarm for timing
+static void _PM_timerISR(void) {
+ hw_clear_bits(&timer_hw->intr, 1u << _PM_ALARM_NUM); // Clear alarm flag
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+#endif
+
+// Set timer period and enable timer.
+inline void _PM_timerStart(void *tptr, uint32_t period) {
+#if _PM_CLOCK_PWM
+ pwm_set_counter(_PM_PWM_SLICE, 0);
+ pwm_set_wrap(_PM_PWM_SLICE, period);
+ pwm_set_enabled(_PM_PWM_SLICE, true);
+#else
+ irq_set_enabled(_PM_IRQ_HANDLER, true); // Enable alarm IRQ
+ _PM_timerSave = timer_hw->timerawl; // Time at start
+ timer_hw->alarm[_PM_ALARM_NUM] = _PM_timerSave + period; // Time at end
+#endif
+}
+
+// Return current count value (timer enabled or not).
+// Timer must be previously initialized.
+inline uint32_t _PM_timerGetCount(void *tptr) {
+#if _PM_CLOCK_PWM
+ return pwm_get_counter(_PM_PWM_SLICE);
+#else
+ return timer_hw->timerawl - _PM_timerSave;
+#endif
+}
+
+// Disable timer and return current count value.
+// Timer must be previously initialized.
+uint32_t _PM_timerStop(void *tptr) {
+#if _PM_CLOCK_PWM
+ pwm_set_enabled(_PM_PWM_SLICE, false);
+#else
+ irq_set_enabled(_PM_IRQ_HANDLER, false); // Disable alarm IRQ
+#endif
+ return _PM_timerGetCount(tptr);
+}
+
+#define _PM_chunkSize 8
+#define _PM_clockHoldLow asm("nop; nop;");
+#if _PM_CLOCK_PWM
+#define _PM_minMinPeriod 100
+#else
+#define _PM_minMinPeriod 8
+#endif
+
+#endif // END PICO_BOARD
diff --git a/circuitpython/lib/protomatter/src/arch/samd-common.h b/circuitpython/lib/protomatter/src/arch/samd-common.h
new file mode 100644
index 0000000..d2e039d
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/samd-common.h
@@ -0,0 +1,98 @@
+/*!
+ * @file samd-common.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains SAMD-SPECIFIC CODE (SAMD51 & SAMD21).
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#if defined(__SAMD51__) || defined(SAM_D5X_E5X) || defined(_SAMD21_) || \
+ defined(SAMD21)
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+// g_APinDescription[] table and pin indices are Arduino specific:
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) (g_APinDescription[pin].ulPin / 8)
+#define _PM_wordOffset(pin) (g_APinDescription[pin].ulPin / 16)
+#else
+#define _PM_byteOffset(pin) (3 - (g_APinDescription[pin].ulPin / 8))
+#define _PM_wordOffset(pin) (1 - (g_APinDescription[pin].ulPin / 16))
+#endif
+
+// Arduino implementation is tied to a specific timer/counter & freq:
+#if defined(TC4)
+#define _PM_TIMER_DEFAULT TC4
+#define _PM_IRQ_HANDLER TC4_Handler
+#else // No TC4 on some M4's
+#define _PM_TIMER_DEFAULT TC3
+#define _PM_IRQ_HANDLER TC3_Handler
+#endif
+#define _PM_timerFreq 48000000
+// Partly because IRQs must be declared at compile-time, and partly
+// because we know Arduino's already set up one of the GCLK sources
+// for 48 MHz.
+
+// Because it's tied to a specific timer right now, there can be only
+// one instance of the Protomatter_core struct. The Arduino library
+// sets up this pointer when calling begin().
+void *_PM_protoPtr = NULL;
+
+// Timer interrupt service routine
+void _PM_IRQ_HANDLER(void) {
+ // Clear overflow flag:
+ _PM_TIMER_DEFAULT->COUNT16.INTFLAG.reg = TC_INTFLAG_OVF;
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+#include "hal_gpio.h"
+
+#define _PM_pinOutput(pin) gpio_set_pin_direction(pin, GPIO_DIRECTION_OUT)
+#define _PM_pinInput(pin) gpio_set_pin_direction(pin, GPIO_DIRECTION_IN)
+#define _PM_pinHigh(pin) gpio_set_pin_level(pin, 1)
+#define _PM_pinLow(pin) gpio_set_pin_level(pin, 0)
+#define _PM_portBitMask(pin) (1u << ((pin)&31))
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) ((pin & 31) / 8)
+#define _PM_wordOffset(pin) ((pin & 31) / 16)
+#else
+#define _PM_byteOffset(pin) (3 - ((pin & 31) / 8))
+#define _PM_wordOffset(pin) (1 - ((pin & 31) / 16))
+#endif
+
+// CircuitPython implementation is tied to a specific freq (but the counter
+// is dynamically allocated):
+#define _PM_timerFreq 48000000
+
+// As currently implemented, there can be only one instance of the
+// Protomatter_core struct. This pointer is set up when starting the matrix.
+void *_PM_protoPtr = NULL;
+
+// Timer interrupt service routine
+void _PM_IRQ_HANDLER(void) {
+ ((Tc *)(((Protomatter_core *)_PM_protoPtr)->timer))->COUNT16.INTFLAG.reg =
+ TC_INTFLAG_OVF;
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+
+#else // END CIRCUITPYTHON -------------------------------------------------
+
+// Byte offset macros, timer and ISR work for other environments go here.
+
+#endif
+
+#endif // END SAMD5x/SAME5x/SAMD21
diff --git a/circuitpython/lib/protomatter/src/arch/samd21.h b/circuitpython/lib/protomatter/src/arch/samd21.h
new file mode 100644
index 0000000..25deef3
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/samd21.h
@@ -0,0 +1,150 @@
+/*!
+ * @file samd21.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains SAMD21-SPECIFIC CODE.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#if defined(_SAMD21_) || defined(SAMD21) // Arduino, Circuitpy SAMD21 defs
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+// g_APinDescription[] table and pin indices are Arduino specific:
+#define _PM_portOutRegister(pin) \
+ &PORT_IOBUS->Group[g_APinDescription[pin].ulPort].OUT.reg
+
+#define _PM_portSetRegister(pin) \
+ &PORT_IOBUS->Group[g_APinDescription[pin].ulPort].OUTSET.reg
+
+#define _PM_portClearRegister(pin) \
+ &PORT_IOBUS->Group[g_APinDescription[pin].ulPort].OUTCLR.reg
+
+#define _PM_portToggleRegister(pin) \
+ &PORT_IOBUS->Group[g_APinDescription[pin].ulPort].OUTTGL.reg
+
+#else // END ARDUINO -------------------------------------------------------
+
+// Non-Arduino port register lookups go here, if not already declared
+// in samd-common.h.
+
+#endif
+
+// CODE COMMON TO ALL ENVIRONMENTS -----------------------------------------
+
+// Initialize, but do not start, timer
+void _PM_timerInit(void *tptr) {
+ static const struct {
+ Tc *tc; // -> Timer/counter peripheral base address
+ IRQn_Type IRQn; // Interrupt number
+ uint8_t GCM_ID; // GCLK selection ID
+ } timer[] = {
+#if defined(TC0)
+ {TC0, TC0_IRQn, GCM_TCC0_TCC1},
+#endif
+#if defined(TC1)
+ {TC1, TC1_IRQn, GCM_TCC0_TCC1},
+#endif
+#if defined(TC2)
+ {TC2, TC2_IRQn, GCM_TCC2_TC3},
+#endif
+#if defined(TC3)
+ {TC3, TC3_IRQn, GCM_TCC2_TC3},
+#endif
+#if defined(TC4)
+ {TC4, TC4_IRQn, GCM_TC4_TC5},
+#endif
+ };
+#define NUM_TIMERS (sizeof timer / sizeof timer[0])
+
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+
+ uint8_t timerNum = 0;
+ while ((timerNum < NUM_TIMERS) && (timer[timerNum].tc != tc)) {
+ timerNum++;
+ }
+ if (timerNum >= NUM_TIMERS)
+ return;
+
+ // Enable GCLK for timer/counter
+ GCLK->CLKCTRL.reg = (uint16_t)(GCLK_CLKCTRL_CLKEN | GCLK_CLKCTRL_GEN_GCLK0 |
+ GCLK_CLKCTRL_ID(timer[timerNum].GCM_ID));
+ while (GCLK->STATUS.bit.SYNCBUSY == 1)
+ ;
+
+ // Counter must first be disabled to configure it
+ tc->COUNT16.CTRLA.bit.ENABLE = 0;
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+
+ tc->COUNT16.CTRLA.reg = // Configure timer counter
+ TC_CTRLA_PRESCALER_DIV1 | // 1:1 Prescale
+ TC_CTRLA_WAVEGEN_MFRQ | // Match frequency generation mode (MFRQ)
+ TC_CTRLA_MODE_COUNT16; // 16-bit counter mode
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+
+ tc->COUNT16.CTRLBCLR.reg = TCC_CTRLBCLR_DIR; // Count up
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+
+ // Overflow interrupt
+ tc->COUNT16.INTENSET.reg = TC_INTENSET_OVF;
+
+ NVIC_DisableIRQ(timer[timerNum].IRQn);
+ NVIC_ClearPendingIRQ(timer[timerNum].IRQn);
+ NVIC_SetPriority(timer[timerNum].IRQn, 0); // Top priority
+ NVIC_EnableIRQ(timer[timerNum].IRQn);
+
+ // Timer is configured but NOT enabled by default
+}
+
+// Set timer period, initialize count value to zero, enable timer.
+// Timer must be initialized to 16-bit mode using the init function
+// above, but must be inactive before calling this.
+inline void _PM_timerStart(void *tptr, uint32_t period) {
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+ tc->COUNT16.COUNT.reg = 0;
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+ tc->COUNT16.CC[0].reg = period;
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+ tc->COUNT16.CTRLA.bit.ENABLE = 1;
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+}
+
+// Return current count value (timer enabled or not).
+// Timer must be previously initialized.
+inline uint32_t _PM_timerGetCount(void *tptr) {
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+ tc->COUNT16.READREQ.reg = TC_READREQ_RCONT | TC_READREQ_ADDR(0x10);
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+ return tc->COUNT16.COUNT.reg;
+}
+
+// Disable timer and return current count value.
+// Timer must be previously initialized.
+inline uint32_t _PM_timerStop(void *tptr) {
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+ uint32_t count = _PM_timerGetCount(tptr);
+ tc->COUNT16.CTRLA.bit.ENABLE = 0;
+ while (tc->COUNT16.STATUS.bit.SYNCBUSY)
+ ;
+ return count;
+}
+
+#endif // END _SAMD21_ || SAMD21
diff --git a/circuitpython/lib/protomatter/src/arch/samd51.h b/circuitpython/lib/protomatter/src/arch/samd51.h
new file mode 100644
index 0000000..278cc2d
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/samd51.h
@@ -0,0 +1,216 @@
+/*!
+ * @file samd51.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains SAMD51-SPECIFIC CODE.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#if defined(__SAMD51__) || \
+ defined(SAM_D5X_E5X) // Arduino, Circuitpy SAMD5x / E5x defs
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+// g_APinDescription[] table and pin indices are Arduino specific:
+#define _PM_portOutRegister(pin) \
+ &PORT->Group[g_APinDescription[pin].ulPort].OUT.reg
+
+#define _PM_portSetRegister(pin) \
+ &PORT->Group[g_APinDescription[pin].ulPort].OUTSET.reg
+
+#define _PM_portClearRegister(pin) \
+ &PORT->Group[g_APinDescription[pin].ulPort].OUTCLR.reg
+
+#define _PM_portToggleRegister(pin) \
+ &PORT->Group[g_APinDescription[pin].ulPort].OUTTGL.reg
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+#define _PM_portOutRegister(pin) (&PORT->Group[(pin / 32)].OUT.reg)
+
+#define _PM_portSetRegister(pin) (&PORT->Group[(pin / 32)].OUTSET.reg)
+
+#define _PM_portClearRegister(pin) (&PORT->Group[(pin / 32)].OUTCLR.reg)
+
+#define _PM_portToggleRegister(pin) (&PORT->Group[(pin / 32)].OUTTGL.reg)
+
+#define F_CPU (120000000)
+
+#else
+
+// Other port register lookups go here
+
+#endif
+
+// CODE COMMON TO ALL ENVIRONMENTS -----------------------------------------
+
+// Initialize, but do not start, timer
+void _PM_timerInit(void *tptr) {
+ static const struct {
+ Tc *tc; // -> Timer/counter peripheral base address
+ IRQn_Type IRQn; // Interrupt number
+ uint8_t GCLK_ID; // Peripheral channel # for clock source
+ } timer[] = {
+#if defined(TC0)
+ {TC0, TC0_IRQn, TC0_GCLK_ID},
+#endif
+#if defined(TC1)
+ {TC1, TC1_IRQn, TC1_GCLK_ID},
+#endif
+#if defined(TC2)
+ {TC2, TC2_IRQn, TC2_GCLK_ID},
+#endif
+#if defined(TC3)
+ {TC3, TC3_IRQn, TC3_GCLK_ID},
+#endif
+#if defined(TC4)
+ {TC4, TC4_IRQn, TC4_GCLK_ID},
+#endif
+#if defined(TC5)
+ {TC5, TC5_IRQn, TC5_GCLK_ID},
+#endif
+#if defined(TC6)
+ {TC6, TC6_IRQn, TC6_GCLK_ID},
+#endif
+#if defined(TC7)
+ {TC7, TC7_IRQn, TC7_GCLK_ID},
+#endif
+#if defined(TC8)
+ {TC8, TC8_IRQn, TC8_GCLK_ID},
+#endif
+#if defined(TC9)
+ {TC9, TC9_IRQn, TC9_GCLK_ID},
+#endif
+#if defined(TC10)
+ {TC10, TC10_IRQn, TC10_GCLK_ID},
+#endif
+#if defined(TC11)
+ {TC11, TC11_IRQn, TC11_GCLK_ID},
+#endif
+#if defined(TC12)
+ {TC12, TC12_IRQn, TC12_GCLK_ID},
+#endif
+ };
+#define NUM_TIMERS (sizeof timer / sizeof timer[0])
+
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+
+ uint8_t timerNum = 0;
+ while ((timerNum < NUM_TIMERS) && (timer[timerNum].tc != tc)) {
+ timerNum++;
+ }
+ if (timerNum >= NUM_TIMERS)
+ return;
+
+ // Feed timer/counter off GCLK1 (already set 48 MHz by Arduino core).
+ // Sure, SAMD51 can run timers up to F_CPU (e.g. 120 MHz or up to
+ // 200 MHz with overclocking), but on higher bitplanes (which have
+ // progressively longer timer periods) I could see this possibly
+ // exceeding a 16-bit timer, and would have to switch prescalers.
+ // We don't actually need atomic precision on the timer -- point is
+ // simply that the period doubles with each bitplane, and this can
+ // work fine at 48 MHz.
+ GCLK->PCHCTRL[timer[timerNum].GCLK_ID].bit.CHEN = 0; // Disable
+ while (GCLK->PCHCTRL[timer[timerNum].GCLK_ID].bit.CHEN)
+ ; // Wait for it
+ GCLK_PCHCTRL_Type pchctrl; // Read-modify-store
+ pchctrl.reg = GCLK->PCHCTRL[timer[timerNum].GCLK_ID].reg;
+ pchctrl.bit.GEN = GCLK_PCHCTRL_GEN_GCLK1_Val;
+ pchctrl.bit.CHEN = 1;
+ GCLK->PCHCTRL[timer[timerNum].GCLK_ID].reg = pchctrl.reg;
+ while (!GCLK->PCHCTRL[timer[timerNum].GCLK_ID].bit.CHEN)
+ ;
+
+ // Disable timer before configuring it
+ tc->COUNT16.CTRLA.bit.ENABLE = 0;
+ while (tc->COUNT16.SYNCBUSY.bit.ENABLE)
+ ;
+
+ // 16-bit counter mode, 1:1 prescale
+ tc->COUNT16.CTRLA.bit.MODE = TC_CTRLA_MODE_COUNT16;
+ tc->COUNT16.CTRLA.bit.PRESCALER = TC_CTRLA_PRESCALER_DIV1_Val;
+
+ tc->COUNT16.WAVE.bit.WAVEGEN =
+ TC_WAVE_WAVEGEN_MFRQ_Val; // Match frequency generation mode (MFRQ)
+
+ tc->COUNT16.CTRLBCLR.reg = TC_CTRLBCLR_DIR; // Count up
+ while (tc->COUNT16.SYNCBUSY.bit.CTRLB)
+ ;
+
+ // Overflow interrupt
+ tc->COUNT16.INTENSET.reg = TC_INTENSET_OVF;
+
+ NVIC_DisableIRQ(timer[timerNum].IRQn);
+ NVIC_ClearPendingIRQ(timer[timerNum].IRQn);
+ NVIC_SetPriority(timer[timerNum].IRQn, 0); // Top priority
+ NVIC_EnableIRQ(timer[timerNum].IRQn);
+
+ // Timer is configured but NOT enabled by default
+}
+
+// Set timer period, initialize count value to zero, enable timer.
+// Timer must be initialized to 16-bit mode using the init function
+// above, but must be inactive before calling this.
+inline void _PM_timerStart(void *tptr, uint32_t period) {
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+ tc->COUNT16.COUNT.reg = 0;
+ while (tc->COUNT16.SYNCBUSY.bit.COUNT)
+ ;
+ tc->COUNT16.CC[0].reg = period;
+ while (tc->COUNT16.SYNCBUSY.bit.CC0)
+ ;
+ tc->COUNT16.CTRLA.bit.ENABLE = 1;
+ while (tc->COUNT16.SYNCBUSY.bit.STATUS)
+ ;
+}
+
+// Return current count value (timer enabled or not).
+// Timer must be previously initialized.
+inline uint32_t _PM_timerGetCount(void *tptr) {
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+ tc->COUNT16.CTRLBSET.bit.CMD = 0x4; // Sync COUNT
+ while (tc->COUNT16.CTRLBSET.bit.CMD)
+ ; // Wait for command
+ return tc->COUNT16.COUNT.reg;
+}
+
+// Disable timer and return current count value.
+// Timer must be previously initialized.
+uint32_t _PM_timerStop(void *tptr) {
+ Tc *tc = (Tc *)tptr; // Cast peripheral address passed in
+ uint32_t count = _PM_timerGetCount(tptr);
+ tc->COUNT16.CTRLA.bit.ENABLE = 0;
+ while (tc->COUNT16.SYNCBUSY.bit.STATUS)
+ ;
+ return count;
+}
+
+// See notes in core.c before the "blast" functions
+#if F_CPU >= 200000000
+#define _PM_clockHoldHigh asm("nop; nop; nop; nop; nop");
+#define _PM_clockHoldLow asm("nop; nop");
+#elif F_CPU >= 180000000
+#define _PM_clockHoldHigh asm("nop; nop; nop; nop");
+#define _PM_clockHoldLow asm("nop");
+#elif F_CPU >= 150000000
+#define _PM_clockHoldHigh asm("nop; nop; nop");
+#define _PM_clockHoldLow asm("nop");
+#else
+#define _PM_clockHoldHigh asm("nop; nop; nop");
+#define _PM_clockHoldLow asm("nop");
+#endif
+
+#define _PM_minMinPeriod 160
+
+#endif // END __SAMD51__ || SAM_D5X_E5X
diff --git a/circuitpython/lib/protomatter/src/arch/stm32.h b/circuitpython/lib/protomatter/src/arch/stm32.h
new file mode 100644
index 0000000..714fdc4
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/stm32.h
@@ -0,0 +1,146 @@
+/*!
+ * @file stm32.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains STM32-SPECIFIC CODE.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#if defined(STM32F4_SERIES) || defined(STM32F405xx) // Arduino, CircuitPy
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+// Arduino port register lookups go here, else ones in arch.h are used.
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+#include "timers.h"
+
+#undef _PM_portBitMask
+#define _PM_portBitMask(pin) (1u << ((pin)&15))
+#define _PM_byteOffset(pin) ((pin & 15) / 8)
+#define _PM_wordOffset(pin) ((pin & 15) / 16)
+
+#define _PM_pinOutput(pin_) \
+ do { \
+ int8_t pin = (pin_); \
+ GPIO_InitTypeDef GPIO_InitStruct = {0}; \
+ GPIO_InitStruct.Pin = 1 << (pin & 15); \
+ GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP; \
+ GPIO_InitStruct.Pull = GPIO_NOPULL; \
+ GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH; \
+ HAL_GPIO_Init(pin_port(pin / 16), &GPIO_InitStruct); \
+ } while (0)
+#define _PM_pinInput(pin_) \
+ do { \
+ int8_t pin = (pin_); \
+ GPIO_InitTypeDef GPIO_InitStruct = {0}; \
+ GPIO_InitStruct.Pin = 1 << (pin & 15); \
+ GPIO_InitStruct.Mode = GPIO_MODE_INPUT; \
+ GPIO_InitStruct.Pull = GPIO_NOPULL; \
+ GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH; \
+ HAL_GPIO_Init(pin_port(pin / 16), &GPIO_InitStruct); \
+ } while (0)
+#define _PM_pinHigh(pin) \
+ HAL_GPIO_WritePin(pin_port(pin / 16), 1 << (pin & 15), GPIO_PIN_SET)
+#define _PM_pinLow(pin) \
+ HAL_GPIO_WritePin(pin_port(pin / 16), 1 << (pin & 15), GPIO_PIN_RESET)
+
+#define _PM_PORT_TYPE uint16_t
+
+volatile uint16_t *_PM_portOutRegister(uint32_t pin) {
+ return (uint16_t *)&pin_port(pin / 16)->ODR;
+}
+
+volatile uint16_t *_PM_portSetRegister(uint32_t pin) {
+ return (uint16_t *)&pin_port(pin / 16)->BSRR;
+}
+
+// To make things interesting, STM32F4xx places the set and clear
+// GPIO bits within a single register. The "clear" bits are upper, so
+// offset by 1 in uint16_ts
+volatile uint16_t *_PM_portClearRegister(uint32_t pin) {
+ return 1 + (uint16_t *)&pin_port(pin / 16)->BSRR;
+}
+
+// TODO: was this somehow specific to TIM6?
+#define _PM_timerFreq 42000000
+
+// Because it's tied to a specific timer right now, there can be only
+// one instance of the Protomatter_core struct. The Arduino library
+// sets up this pointer when calling begin().
+// TODO: this is no longer true, should it change?
+void *_PM_protoPtr = NULL;
+
+STATIC TIM_HandleTypeDef tim_handle;
+
+// Timer interrupt service routine
+void _PM_IRQ_HANDLER(void) {
+ // Clear overflow flag:
+ //_PM_TIMER_DEFAULT->COUNT16.INTFLAG.reg = TC_INTFLAG_OVF;
+ _PM_row_handler(_PM_protoPtr); // In core.c
+}
+
+// Initialize, but do not start, timer
+void _PM_timerInit(void *tptr) {
+ TIM_TypeDef *tim_instance = (TIM_TypeDef *)tptr;
+ stm_peripherals_timer_reserve(tim_instance);
+ // Set IRQs at max priority and start clock
+ stm_peripherals_timer_preinit(tim_instance, 0, _PM_IRQ_HANDLER);
+
+ tim_handle.Instance = tim_instance;
+ tim_handle.Init.Period = 1000; // immediately replaced.
+ tim_handle.Init.Prescaler = 0;
+ tim_handle.Init.ClockDivision = TIM_CLOCKDIVISION_DIV1;
+ tim_handle.Init.CounterMode = TIM_COUNTERMODE_UP;
+ tim_handle.Init.AutoReloadPreload = TIM_AUTORELOAD_PRELOAD_DISABLE;
+
+ HAL_TIM_Base_Init(&tim_handle);
+
+ size_t tim_irq = stm_peripherals_timer_get_irqnum(tim_instance);
+ HAL_NVIC_DisableIRQ(tim_irq);
+ NVIC_ClearPendingIRQ(tim_irq);
+ NVIC_SetPriority(tim_irq, 0); // Top priority
+}
+
+inline void _PM_timerStart(void *tptr, uint32_t period) {
+ TIM_TypeDef *tim = tptr;
+ tim->SR = 0;
+ tim->ARR = period;
+ tim->CR1 |= TIM_CR1_CEN;
+ tim->DIER |= TIM_DIER_UIE;
+ HAL_NVIC_EnableIRQ(stm_peripherals_timer_get_irqnum(tim));
+}
+
+inline uint32_t _PM_timerGetCount(void *tptr) {
+ TIM_TypeDef *tim = tptr;
+ return tim->CNT;
+}
+
+uint32_t _PM_timerStop(void *tptr) {
+ TIM_TypeDef *tim = tptr;
+ HAL_NVIC_DisableIRQ(stm_peripherals_timer_get_irqnum(tim));
+ tim->CR1 &= ~TIM_CR1_CEN;
+ tim->DIER &= ~TIM_DIER_UIE;
+ return tim->CNT;
+}
+// settings from M4 for >= 150MHz, we use this part at 168MHz
+#define _PM_clockHoldHigh asm("nop; nop; nop");
+#define _PM_clockHoldLow asm("nop");
+
+#define _PM_minMinPeriod 140
+
+#endif // END CIRCUITPYTHON ------------------------------------------------
+
+#endif // END STM32F4_SERIES || STM32F405xx
diff --git a/circuitpython/lib/protomatter/src/arch/teensy4.h b/circuitpython/lib/protomatter/src/arch/teensy4.h
new file mode 100644
index 0000000..75cd1f4
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/arch/teensy4.h
@@ -0,0 +1,172 @@
+/*!
+ * @file teensy4.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ * This file contains i.MX 1062 (Teensy 4.x) SPECIFIC CODE.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#if defined(__IMXRT1062__)
+
+// i.MX only allows full 32-bit aligned writes to GPIO.
+#define _PM_STRICT_32BIT_IO ///< Change core.c behavior for long accesses only
+
+#if defined(ARDUINO) // COMPILING FOR ARDUINO ------------------------------
+
+static const struct {
+ volatile uint32_t *base; ///< GPIO base address for pin
+ uint8_t bit; ///< GPIO bit number for pin (0-31)
+} _PM_teensyPins[] = {
+ {&CORE_PIN0_PORTREG, CORE_PIN0_BIT},
+ {&CORE_PIN1_PORTREG, CORE_PIN1_BIT},
+ {&CORE_PIN2_PORTREG, CORE_PIN2_BIT},
+ {&CORE_PIN3_PORTREG, CORE_PIN3_BIT},
+ {&CORE_PIN4_PORTREG, CORE_PIN4_BIT},
+ {&CORE_PIN5_PORTREG, CORE_PIN5_BIT},
+ {&CORE_PIN6_PORTREG, CORE_PIN6_BIT},
+ {&CORE_PIN7_PORTREG, CORE_PIN7_BIT},
+ {&CORE_PIN8_PORTREG, CORE_PIN8_BIT},
+ {&CORE_PIN9_PORTREG, CORE_PIN9_BIT},
+ {&CORE_PIN10_PORTREG, CORE_PIN10_BIT},
+ {&CORE_PIN11_PORTREG, CORE_PIN11_BIT},
+ {&CORE_PIN12_PORTREG, CORE_PIN12_BIT},
+ {&CORE_PIN13_PORTREG, CORE_PIN13_BIT},
+ {&CORE_PIN14_PORTREG, CORE_PIN14_BIT},
+ {&CORE_PIN15_PORTREG, CORE_PIN15_BIT},
+ {&CORE_PIN16_PORTREG, CORE_PIN16_BIT},
+ {&CORE_PIN17_PORTREG, CORE_PIN17_BIT},
+ {&CORE_PIN18_PORTREG, CORE_PIN18_BIT},
+ {&CORE_PIN19_PORTREG, CORE_PIN19_BIT},
+ {&CORE_PIN20_PORTREG, CORE_PIN20_BIT},
+ {&CORE_PIN21_PORTREG, CORE_PIN21_BIT},
+ {&CORE_PIN22_PORTREG, CORE_PIN22_BIT},
+ {&CORE_PIN23_PORTREG, CORE_PIN23_BIT},
+ {&CORE_PIN24_PORTREG, CORE_PIN24_BIT},
+ {&CORE_PIN25_PORTREG, CORE_PIN25_BIT},
+ {&CORE_PIN26_PORTREG, CORE_PIN26_BIT},
+ {&CORE_PIN27_PORTREG, CORE_PIN27_BIT},
+ {&CORE_PIN28_PORTREG, CORE_PIN28_BIT},
+ {&CORE_PIN29_PORTREG, CORE_PIN29_BIT},
+ {&CORE_PIN30_PORTREG, CORE_PIN30_BIT},
+ {&CORE_PIN31_PORTREG, CORE_PIN31_BIT},
+ {&CORE_PIN32_PORTREG, CORE_PIN32_BIT},
+ {&CORE_PIN33_PORTREG, CORE_PIN33_BIT},
+ {&CORE_PIN34_PORTREG, CORE_PIN34_BIT},
+ {&CORE_PIN35_PORTREG, CORE_PIN35_BIT},
+ {&CORE_PIN36_PORTREG, CORE_PIN36_BIT},
+ {&CORE_PIN37_PORTREG, CORE_PIN37_BIT},
+ {&CORE_PIN38_PORTREG, CORE_PIN38_BIT},
+ {&CORE_PIN39_PORTREG, CORE_PIN39_BIT},
+};
+
+#define _PM_SET_OFFSET 33 ///< 0x84 byte offset = 33 longs
+#define _PM_CLEAR_OFFSET 34 ///< 0x88 byte offset = 34 longs
+#define _PM_TOGGLE_OFFSET 35 ///< 0x8C byte offset = 35 longs
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _PM_byteOffset(pin) (_PM_teensyPins[pin].bit / 8)
+#define _PM_wordOffset(pin) (_PM_teensyPins[pin].bit / 16)
+#else
+#define _PM_byteOffset(pin) (3 - (_PM_teensyPins[pin].bit / 8))
+#define _PM_wordOffset(pin) (1 - (_PM_teensyPins[pin].bit / 16))
+#endif
+
+#define _PM_portOutRegister(pin) (void *)_PM_teensyPins[pin].base
+
+#define _PM_portSetRegister(pin) \
+ ((volatile uint32_t *)_PM_teensyPins[pin].base + _PM_SET_OFFSET)
+
+#define _PM_portClearRegister(pin) \
+ ((volatile uint32_t *)_PM_teensyPins[pin].base + _PM_CLEAR_OFFSET)
+
+#define _PM_portToggleRegister(pin) \
+ ((volatile uint32_t *)_PM_teensyPins[pin].base + _PM_TOGGLE_OFFSET)
+
+// As written, because it's tied to a specific timer right now, the
+// Arduino lib only permits one instance of the Protomatter_core struct,
+// which it sets up when calling begin().
+void *_PM_protoPtr = NULL;
+
+// Code as written works with the Periodic Interrupt Timer directly,
+// rather than using the Teensy IntervalTimer library, reason being we
+// need to be able to poll the current timer value in _PM_timerGetCount(),
+// but that's not available from IntervalTimer, and the timer base address
+// it keeps is a private member (possible alternative is to do dirty pool
+// and access the pointer directly, knowing it's the first element in the
+// IntervalTimer object, but this is fraught with peril).
+
+#define _PM_timerFreq 24000000 // 24 MHz
+#define _PM_timerNum 0 // PIT timer #0 (can be 0-3)
+#define _PM_TIMER_DEFAULT (IMXRT_PIT_CHANNELS + _PM_timerNum) // PIT channel *
+
+// Interrupt service routine for Periodic Interrupt Timer
+static void _PM_timerISR(void) {
+ IMXRT_PIT_CHANNEL_t *timer = _PM_TIMER_DEFAULT;
+ _PM_row_handler(_PM_protoPtr); // In core.c
+ timer->TFLG = 1; // Clear timer interrupt
+}
+
+// Initialize, but do not start, timer.
+void _PM_timerInit(void *tptr) {
+ IMXRT_PIT_CHANNEL_t *timer = (IMXRT_PIT_CHANNEL_t *)tptr;
+ CCM_CCGR1 |= CCM_CCGR1_PIT(CCM_CCGR_ON); // Enable clock signal to PIT
+ PIT_MCR = 1; // Enable PIT
+ timer->TCTRL = 0; // Disable timer and interrupt
+ timer->LDVAL = 100000; // Timer initial load value
+ // Interrupt is attached but not enabled yet
+ attachInterruptVector(IRQ_PIT, &_PM_timerISR);
+ NVIC_ENABLE_IRQ(IRQ_PIT);
+}
+
+// Set timer period, initialize count value to zero, enable timer.
+inline void _PM_timerStart(void *tptr, uint32_t period) {
+ IMXRT_PIT_CHANNEL_t *timer = (IMXRT_PIT_CHANNEL_t *)tptr;
+ timer->TCTRL = 0; // Disable timer and interrupt
+ timer->LDVAL = period; // Set load value
+ // timer->CVAL = period; // And current value (just in case?)
+ timer->TFLG = 1; // Clear timer interrupt
+ timer->TCTRL = 3; // Enable timer and interrupt
+}
+
+// Return current count value (timer enabled or not).
+// Timer must be previously initialized.
+inline uint32_t _PM_timerGetCount(void *tptr) {
+ IMXRT_PIT_CHANNEL_t *timer = (IMXRT_PIT_CHANNEL_t *)tptr;
+ return (timer->LDVAL - timer->CVAL);
+}
+
+// Disable timer and return current count value.
+// Timer must be previously initialized.
+uint32_t _PM_timerStop(void *tptr) {
+ IMXRT_PIT_CHANNEL_t *timer = (IMXRT_PIT_CHANNEL_t *)tptr;
+ timer->TCTRL = 0; // Disable timer and interrupt
+ return _PM_timerGetCount(tptr);
+}
+
+#define _PM_clockHoldHigh \
+ asm("nop; nop; nop; nop; nop; nop; nop;"); \
+ asm("nop; nop; nop; nop; nop; nop; nop;");
+#define _PM_clockHoldLow \
+ asm("nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;"); \
+ asm("nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;");
+
+#define _PM_chunkSize 1 ///< DON'T unroll loop, Teensy 4 is SO FAST
+
+#elif defined(CIRCUITPY) // COMPILING FOR CIRCUITPYTHON --------------------
+
+// Teensy 4 CircuitPython magic goes here.
+
+#endif // END CIRCUITPYTHON ------------------------------------------------
+
+#endif // END __IMXRT1062__ (Teensy 4)
diff --git a/circuitpython/lib/protomatter/src/core.c b/circuitpython/lib/protomatter/src/core.c
new file mode 100644
index 0000000..8428797
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/core.c
@@ -0,0 +1,1302 @@
+/*!
+ * @file core.c
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+// Device- and environment-neutral core matrix-driving functionality.
+// See notes near top of arch/arch.h regarding assumptions of hardware
+// "common ground." If you find yourself doing an "#ifdef ARDUINO" or
+// "#ifdef _SAMD21_" in this file, STOP. Idea is that the code in this
+// file is neutral and portable (within aforementioned assumptions).
+// Nonportable elements should appear in arch.h. If arch.h functionality
+// is lacking, extend it there, do not go making device- or environment-
+// specific cases within this file.
+
+// Function names are intentionally a little obtuse, idea is that one writes
+// a more sensible wrapper around this for specific environments (e.g. the
+// Arduino stuff in Adafruit_Protomatter.cpp). The "_PM_" prefix on most
+// things hopefully makes function and variable name collisions much less
+// likely with one's own code.
+
+#include "core.h" // enums and structs
+#include "arch/arch.h" // Do NOT include this in any other source files
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Overall matrix refresh rate (frames/second) is a function of matrix width
+// and chain length, number of address lines, number of bit planes, CPU speed
+// and whether or not a GPIO toggle register is available. There is no "this
+// will run at X-frames-per-second" constant figure. You typically just have
+// to try it out and perhaps trade off some bit planes for refresh rate until
+// the image looks good and stable. Anything over 100 Hz is usually passable,
+// around 250 Hz is where things firm up. And while this could proceed higher
+// in some situations, the tradeoff is that faster rates use progressively
+// more CPU time (because it's timer interrupt based and not using DMA or
+// special peripherals). So a throttle is set here, an approximate maximum
+// frame rate which the software will attempt to avoid exceeding (but may
+// refresh slower than this, and in many cases will...just need to set an
+// upper limit to avoid excessive CPU load). An incredibly long comment block
+// for a single constant, thank you for coming to my TED talk!
+#define _PM_MAX_REFRESH_HZ 250 ///< Max matrix refresh rate
+
+// Time (in microseconds) to pause following any change in address lines
+// (individually or collectively). Some matrices respond slowly there...
+// must pause on change for matrix to catch up. Defined here (rather than
+// arch.h) because it's not architecture-specific.
+#define _PM_ROW_DELAY 8 ///< Delay time between row address line changes (ms)
+
+// These are the lowest-level functions for issing data to matrices.
+// There are three versions because it depends on how the six RGB data bits
+// (and clock bit) are arranged within a 32-bit PORT register. If all six
+// (seven) fit within one byte or word of the PORT, the library's memory
+// use (and corresponding data-issuing function) change. This will also have
+// an impact on parallel chains in the future, where the number of concurrent
+// RGB data bits isn't always six, but some multiple thereof (i.e. up to five
+// parallel outputs -- 30 RGB bits + clock -- on a 32-bit PORT, though that's
+// largely hypothetical as the chance of finding a PORT with that many bits
+// exposed and NOT interfering with other peripherals on a board is highly
+// improbable. But I could see four happening, maybe on a Grand Central or
+// other kitchen-sink board.
+static void blast_byte(Protomatter_core *core, uint8_t *data);
+static void blast_word(Protomatter_core *core, uint16_t *data);
+static void blast_long(Protomatter_core *core, uint32_t *data);
+
+#define _PM_clearReg(x) \
+ (*(volatile _PM_PORT_TYPE *)((x).clearReg) = \
+ ((x).bit)) ///< Clear non-RGB-data-or-clock control line (_PM_pin type)
+#define _PM_setReg(x) \
+ (*(volatile _PM_PORT_TYPE *)((x).setReg) = \
+ ((x).bit)) ///< Set non-RGB-data-or-clock control line (_PM_pin type)
+
+// Validate and populate vital elements of core structure.
+// Does NOT allocate core struct -- calling function must provide that.
+// (In the Arduino C++ library, it’s part of the Protomatter class.)
+ProtomatterStatus _PM_init(Protomatter_core *core, uint16_t bitWidth,
+ uint8_t bitDepth, uint8_t rgbCount, uint8_t *rgbList,
+ uint8_t addrCount, uint8_t *addrList,
+ uint8_t clockPin, uint8_t latchPin, uint8_t oePin,
+ bool doubleBuffer, int8_t tile, void *timer) {
+ if (!core)
+ return PROTOMATTER_ERR_ARG;
+
+ // bitDepth is NOT constrained here, handle in calling function
+ // (varies with implementation, e.g. GFX lib is max 6 bitplanes,
+ // but might be more or less elsewhere)
+ if (rgbCount > 5)
+ rgbCount = 5; // Max 5 in parallel (32-bit PORT)
+ if (addrCount > 5)
+ addrCount = 5; // Max 5 address lines (A-E)
+ if (!tile)
+ tile = 1; // Can't have zero vertical tiling. Single matrix is 1.
+
+#if defined(_PM_TIMER_DEFAULT)
+ // If NULL timer was passed in (the default case for the constructor),
+ // use default value from arch.h. For example, in the Arduino case it's
+ // tied to TC4 specifically.
+ if (timer == NULL)
+ timer = _PM_TIMER_DEFAULT;
+#else
+ if (timer == NULL)
+ return PROTOMATTER_ERR_ARG;
+#endif
+
+ core->timer = timer;
+ core->width = bitWidth; // Matrix chain width in bits (NOT including V tile)
+ core->tile = tile; // Matrix chain vertical tiling
+ core->chainBits = bitWidth * abs(tile); // Total matrix chain bits
+ core->numPlanes = bitDepth;
+ core->parallel = rgbCount;
+ core->numAddressLines = addrCount;
+ core->clockPin = clockPin;
+ core->latch.pin = latchPin;
+ core->oe.pin = oePin;
+ core->doubleBuffer = doubleBuffer;
+ core->addr = NULL;
+ core->screenData = NULL;
+
+ // Make a copy of the rgbList and addrList tables in case they're
+ // passed from local vars on the stack or some other non-persistent
+ // source. screenData is NOT allocated here because data size (byte,
+ // word, long) is not known until the begin function evaluates all
+ // the pin bitmasks.
+
+ rgbCount *= 6; // Convert parallel count to pin count
+ if ((core->rgbPins = (uint8_t *)_PM_allocate(rgbCount * sizeof(uint8_t)))) {
+ if ((core->addr = (_PM_pin *)_PM_allocate(addrCount * sizeof(_PM_pin)))) {
+ memcpy(core->rgbPins, rgbList, rgbCount * sizeof(uint8_t));
+ for (uint8_t i = 0; i < addrCount; i++) {
+ core->addr[i].pin = addrList[i];
+ }
+ return PROTOMATTER_OK;
+ }
+ _PM_free(core->rgbPins);
+ core->rgbPins = NULL;
+ }
+ return PROTOMATTER_ERR_MALLOC;
+}
+
+// Allocate display buffers and populate additional elements.
+ProtomatterStatus _PM_begin(Protomatter_core *core) {
+ if (!core)
+ return PROTOMATTER_ERR_ARG;
+
+ if (!core->rgbPins) { // NULL if copy failed to allocate
+ return PROTOMATTER_ERR_MALLOC;
+ }
+
+ // Verify that rgbPins and clockPin are all on the same PORT. If not,
+ // return an error. Pin list is not freed; please call dealloc function.
+ // Also get bitmask of which bits within 32-bit PORT register are
+ // referenced.
+ uint8_t *port = (uint8_t *)_PM_portOutRegister(core->clockPin);
+#if defined(_PM_portToggleRegister)
+ // If a bit-toggle register is present, the clock pin is included
+ // in determining which bytes of the PORT register are used (and thus
+ // the data storage efficiency).
+ uint32_t bitMask = _PM_portBitMask(core->clockPin);
+#else
+ // If no bit-toggle register, clock pin can be on any bit, doesn't
+ // affect storage efficiency.
+ uint32_t bitMask = 0;
+#endif
+
+ for (uint8_t i = 0; i < core->parallel * 6; i++) {
+ uint8_t *p2 = (uint8_t *)_PM_portOutRegister(core->rgbPins[i]);
+ if (p2 != port) {
+ return PROTOMATTER_ERR_PINS;
+ }
+ bitMask |= _PM_portBitMask(core->rgbPins[i]);
+ }
+
+ // RGB + clock are on same port, we can proceed...
+
+ // Determine data type for internal representation. If all the data
+ // bitmasks (and possibly clock bitmask, depending whether toggle-bits
+ // register is present) are in the same byte, this can be stored more
+ // compact than if they're spread across a word or long.
+ uint8_t byteMask = 0;
+ if (bitMask & 0xFF000000)
+ byteMask |= 0b1000;
+ if (bitMask & 0x00FF0000)
+ byteMask |= 0b0100;
+ if (bitMask & 0x0000FF00)
+ byteMask |= 0b0010;
+ if (bitMask & 0x000000FF)
+ byteMask |= 0b0001;
+ switch (byteMask) {
+ case 0b0001: // If all PORT bits are in the same byte...
+ case 0b0010:
+ case 0b0100:
+ case 0b1000:
+ core->bytesPerElement = 1; // Use 8-bit PORT accesses.
+ break;
+ case 0b0011: // If all PORT bits in upper/lower word...
+ case 0b1100:
+ core->bytesPerElement = 2; // Use 16-bit PORT accesses.
+ // Although some devices might tolerate unaligned 16-bit accesses
+ // ('middle' word of 32-bit PORT), that is NOT handled here.
+ // It's a portability liability.
+ break;
+ default: // Any other situation...
+ core->bytesPerElement = 4; // Use 32-bit PORT accesses.
+ break;
+ }
+
+ // Planning for screen data allocation...
+ core->numRowPairs = 1 << core->numAddressLines;
+ uint8_t chunks = (core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize;
+ uint16_t columns = chunks * _PM_chunkSize; // Padded matrix width
+ uint32_t screenBytes =
+ columns * core->numRowPairs * core->numPlanes * core->bytesPerElement;
+
+ core->bufferSize = screenBytes; // Bytes per matrix buffer (1 or 2)
+ if (core->doubleBuffer)
+ screenBytes *= 2; // Total for matrix buffer(s)
+ uint32_t rgbMaskBytes = core->parallel * 6 * core->bytesPerElement;
+
+ // Allocate matrix buffer(s). Don't worry about the return type...
+ // though we might be using words or longs for certain pin configs,
+ // _PM_allocate() by definition always aligns to the longest type.
+ if (!(core->screenData =
+ (uint8_t *)_PM_allocate(screenBytes + rgbMaskBytes))) {
+ return PROTOMATTER_ERR_MALLOC;
+ }
+
+ // rgbMask data follows the matrix buffer(s)
+ core->rgbMask = core->screenData + screenBytes;
+
+#if !defined(_PM_portToggleRegister)
+ // Clear entire screenData buffer so there's no cruft in any pad bytes
+ // (if using toggle register, each is set to clockMask below instead).
+ memset(core->screenData, 0, screenBytes);
+#endif
+
+ // Figure out clockMask and rgbAndClockMask, clear matrix buffers
+ if (core->bytesPerElement == 1) {
+ core->portOffset = _PM_byteOffset(core->rgbPins[0]);
+#if defined(_PM_portToggleRegister) && !defined(_PM_STRICT_32BIT_IO)
+ // Clock and rgbAndClockMask are 8-bit values
+ core->clockMask = _PM_portBitMask(core->clockPin) >> (core->portOffset * 8);
+ core->rgbAndClockMask =
+ (bitMask >> (core->portOffset * 8)) | core->clockMask;
+ memset(core->screenData, core->clockMask, screenBytes);
+#else
+ // Clock and rgbAndClockMask are 32-bit values
+ core->clockMask = _PM_portBitMask(core->clockPin);
+ core->rgbAndClockMask = bitMask | core->clockMask;
+#endif
+ for (uint8_t i = 0; i < core->parallel * 6; i++) {
+ ((uint8_t *)core->rgbMask)[i] = // Pin bitmasks are 8-bit
+ _PM_portBitMask(core->rgbPins[i]) >> (core->portOffset * 8);
+ }
+ } else if (core->bytesPerElement == 2) {
+ core->portOffset = _PM_wordOffset(core->rgbPins[0]);
+#if defined(_PM_portToggleRegister) && !defined(_PM_STRICT_32BIT_IO)
+ // Clock and rgbAndClockMask are 16-bit values
+ core->clockMask =
+ _PM_portBitMask(core->clockPin) >> (core->portOffset * 16);
+ core->rgbAndClockMask =
+ (bitMask >> (core->portOffset * 16)) | core->clockMask;
+ uint32_t elements = screenBytes / 2;
+ for (uint32_t i = 0; i < elements; i++) {
+ ((uint16_t *)core->screenData)[i] = core->clockMask;
+ }
+#else
+ // Clock and rgbAndClockMask are 32-bit values
+ core->clockMask = _PM_portBitMask(core->clockPin);
+ core->rgbAndClockMask = bitMask | core->clockMask;
+#if defined(_PM_portToggleRegister)
+ // TO DO: this ifdef and the one above can probably be wrapped up
+ // in a more cohesive case. Think something similar will be needed
+ // for the byte case. Will need Teensy 4.1 to test.
+ uint32_t elements = screenBytes / 2;
+ uint16_t mask = core->clockMask >> (core->portOffset * 16);
+ for (uint32_t i = 0; i < elements; i++) {
+ ((uint16_t *)core->screenData)[i] = mask;
+ }
+#endif
+#endif
+ for (uint8_t i = 0; i < core->parallel * 6; i++) {
+ ((uint16_t *)core->rgbMask)[i] = // Pin bitmasks are 16-bit
+ _PM_portBitMask(core->rgbPins[i]) >> (core->portOffset * 16);
+ }
+ } else {
+ core->portOffset = 0;
+ core->clockMask = _PM_portBitMask(core->clockPin);
+ core->rgbAndClockMask = bitMask | core->clockMask;
+#if defined(_PM_portToggleRegister)
+ uint32_t elements = screenBytes / 4;
+ for (uint32_t i = 0; i < elements; i++) {
+ ((uint32_t *)core->screenData)[i] = core->clockMask;
+ }
+#endif
+ for (uint8_t i = 0; i < core->parallel * 6; i++) {
+ ((uint32_t *)core->rgbMask)[i] = // Pin bitmasks are 32-bit
+ _PM_portBitMask(core->rgbPins[i]);
+ }
+ }
+
+ // Estimate minimum bitplane #0 period for _PM_MAX_REFRESH_HZ rate.
+ uint32_t minPeriodPerFrame = _PM_timerFreq / _PM_MAX_REFRESH_HZ;
+ uint32_t minPeriodPerLine = minPeriodPerFrame / core->numRowPairs;
+ core->minPeriod = minPeriodPerLine / ((1 << core->numPlanes) - 1);
+ if (core->minPeriod < _PM_minMinPeriod) {
+ core->minPeriod = _PM_minMinPeriod;
+ }
+ core->bitZeroPeriod = core->minPeriod;
+ // Actual frame rate may be lower than this...it's only an estimate
+ // and does not factor in things like address line selection delays
+ // or interrupt overhead. That's OK, just don't want to exceed this
+ // rate, as it'll eat all the CPU cycles.
+
+ core->activeBuffer = 0;
+
+ // Configure pins as outputs and initialize their states.
+
+ core->latch.setReg = _PM_portSetRegister(core->latch.pin);
+ core->latch.clearReg = _PM_portClearRegister(core->latch.pin);
+ core->latch.bit = _PM_portBitMask(core->latch.pin);
+ core->oe.setReg = _PM_portSetRegister(core->oe.pin);
+ core->oe.clearReg = _PM_portClearRegister(core->oe.pin);
+ core->oe.bit = _PM_portBitMask(core->oe.pin);
+
+ _PM_pinOutput(core->clockPin);
+ _PM_pinLow(core->clockPin); // Init clock LOW
+ _PM_pinOutput(core->latch.pin);
+ _PM_pinLow(core->latch.pin); // Init latch LOW
+ _PM_pinOutput(core->oe.pin);
+ _PM_pinHigh(core->oe.pin); // Init OE HIGH (disable output)
+
+ for (uint8_t i = 0; i < core->parallel * 6; i++) {
+ _PM_pinOutput(core->rgbPins[i]);
+ _PM_pinLow(core->rgbPins[i]);
+ }
+#if defined(_PM_portToggleRegister)
+ core->addrPortToggle = _PM_portToggleRegister(core->addr[0].pin);
+ core->singleAddrPort = 1;
+#endif
+ core->prevRow = (1 << core->numAddressLines) - 2;
+ for (uint8_t line = 0, bit = 1; line < core->numAddressLines;
+ line++, bit <<= 1) {
+ core->addr[line].setReg = _PM_portSetRegister(core->addr[line].pin);
+ core->addr[line].clearReg = _PM_portClearRegister(core->addr[line].pin);
+ core->addr[line].bit = _PM_portBitMask(core->addr[line].pin);
+ _PM_pinOutput(core->addr[line].pin);
+ if (core->prevRow & bit) {
+ _PM_pinHigh(core->addr[line].pin);
+ } else {
+ _PM_pinLow(core->addr[line].pin);
+ }
+#if defined(_PM_portToggleRegister)
+ // If address pin on different port than addr 0, no singleAddrPort.
+ if (_PM_portToggleRegister(core->addr[line].pin) != core->addrPortToggle) {
+ core->singleAddrPort = 0;
+ }
+#endif
+ }
+
+ // Get pointers to bit set and clear registers (and toggle, if present)
+ core->setReg = (uint8_t *)_PM_portSetRegister(core->clockPin);
+ core->clearReg = (uint8_t *)_PM_portClearRegister(core->clockPin);
+#if defined(_PM_portToggleRegister)
+ core->toggleReg = (uint8_t *)_PM_portToggleRegister(core->clockPin);
+#endif
+
+ // Reset plane/row counters, config and start timer
+ _PM_resume(core);
+
+ return PROTOMATTER_OK;
+}
+
+// Disable (but do not deallocate) a Protomatter matrix. Disables matrix by
+// setting OE pin HIGH and writing all-zero data to matrix shift registers,
+// so it won't halt with lit LEDs.
+void _PM_stop(Protomatter_core *core) {
+ if ((core)) {
+ // If _PM_begin failed, this will be a NULL pointer. Stop early,
+ // none of the other "stop" operations make sense
+ if (!core->screenData) {
+ return;
+ }
+ while (core->swapBuffers)
+ ; // Wait for any pending buffer swap
+ _PM_timerStop(core->timer); // Halt timer
+ _PM_setReg(core->oe); // Set OE HIGH (disable output)
+ // So, in PRINCIPLE, setting OE high would be sufficient...
+ // but in case that pin is shared with another function such
+ // as the onloard LED (which pulses during bootloading) let's
+ // also clear out the matrix shift registers for good measure.
+ // Set all RGB pins LOW...
+ for (uint8_t i = 0; i < core->parallel * 6; i++) {
+ _PM_pinLow(core->rgbPins[i]);
+ }
+ // Clock out bits (just need to toggle clock with RGBs held low)
+ for (uint32_t i = 0; i < core->chainBits; i++) {
+ _PM_pinHigh(core->clockPin);
+ _PM_clockHoldHigh;
+ _PM_pinLow(core->clockPin);
+ _PM_clockHoldLow;
+ }
+ // Latch data
+ _PM_setReg(core->latch);
+ _PM_clearReg(core->latch);
+ }
+}
+
+void _PM_resume(Protomatter_core *core) {
+ if ((core)) {
+ // Init plane & row to max values so they roll over on 1st interrupt
+ core->plane = core->numPlanes - 1;
+ core->row = core->numRowPairs - 1;
+ core->prevRow = (core->numRowPairs > 1) ? (core->row - 1) : 1;
+ core->swapBuffers = 0;
+ core->frameCount = 0;
+
+ for (uint8_t line = 0, bit = 1; line < core->numAddressLines;
+ line++, bit <<= 1) {
+ _PM_pinOutput(core->addr[line].pin);
+ if (core->prevRow & bit) {
+ _PM_pinHigh(core->addr[line].pin);
+ } else {
+ _PM_pinLow(core->addr[line].pin);
+ }
+ }
+
+ _PM_timerInit(core->timer); // Configure timer
+ _PM_timerStart(core->timer, 1000); // Start timer
+ }
+}
+
+// Free memory associated with core structure. Does NOT dealloc struct.
+void _PM_deallocate(Protomatter_core *core) {
+ if ((core)) {
+ _PM_stop(core);
+ // TO DO: Set all pins back to inputs here?
+ if (core->screenData)
+ _PM_free(core->screenData);
+ if (core->addr)
+ _PM_free(core->addr);
+ if (core->rgbPins) {
+ _PM_free(core->rgbPins);
+ core->rgbPins = NULL;
+ }
+ }
+}
+
+// ISR function (in arch.h) calls this function which it extern'd.
+// Profuse apologies for the ESP32-specific IRAM_ATTR here -- the goal was
+// for all architecture-specific detauls to be in arch.h -- but the need
+// for one here caught me off guard. So, in arch.h, for all non-ESP32
+// devices, IRAM_ATTR is defined to nothing and is ignored here. If any
+// future architectures have their own attribute for making a function
+// RAM-resident, #define IRAM_ATTR to that in the corresponding device-
+// specific section of arch.h. Sorry. :/
+// Any functions called by this function should also be IRAM_ATTR'd.
+IRAM_ATTR void _PM_row_handler(Protomatter_core *core) {
+
+ _PM_setReg(core->oe); // Disable LED output
+
+ // ESP32 requires this next line, but not wanting to put arch-specific
+ // ifdefs in this code...it's a trivial operation so just do it.
+ // Latch is already clear at this point, but we go through the motions
+ // to clear it again in order to sync up the setReg(OE) above with the
+ // setReg(latch) that follows. Reason being, bit set/clear operations
+ // on ESP32 aren't truly atomic, and if those two pins are on the same
+ // port (quite common) the second setReg will be ignored. The nonsense
+ // clearReg is used to sync up the two setReg operations. See also the
+ // ESP32-specific PEW define in arch.h, same deal.
+ _PM_clearReg(core->latch);
+
+ _PM_setReg(core->latch);
+ (void)_PM_timerStop(core->timer);
+ uint8_t prevPlane = core->plane; // Save that plane # for later timing
+ _PM_clearReg(core->latch); // (split to add a few cycles)
+
+ if (prevPlane == 0) { // Plane 0 just finished loading
+#if defined(_PM_portToggleRegister)
+ // If all address lines are on a single PORT (and bit toggle is
+ // available), do address line change all at once. Even doing all
+ // this math takes MUCH less time than the delays required when
+ // doing line-by-line changes.
+ if (core->singleAddrPort) {
+ // Make bitmasks of prior and new row bits
+ uint32_t priorBits = 0, newBits = 0;
+ for (uint8_t line = 0, bit = 1; line < core->numAddressLines;
+ line++, bit <<= 1) {
+ if (core->row & bit) {
+ newBits |= core->addr[line].bit;
+ }
+ if (core->prevRow & bit) {
+ priorBits |= core->addr[line].bit;
+ }
+ }
+ *(volatile _PM_PORT_TYPE *)core->addrPortToggle = newBits ^ priorBits;
+ _PM_delayMicroseconds(_PM_ROW_DELAY);
+ } else {
+#endif
+ // Configure row address lines individually, making changes
+ // (with delays) only where necessary.
+ for (uint8_t line = 0, bit = 1; line < core->numAddressLines;
+ line++, bit <<= 1) {
+ if ((core->row & bit) != (core->prevRow & bit)) {
+ if (core->row & bit) { // Set addr line high
+ _PM_setReg(core->addr[line]);
+ } else { // Set addr line low
+ _PM_clearReg(core->addr[line]);
+ }
+ _PM_delayMicroseconds(_PM_ROW_DELAY);
+ }
+ }
+#if defined(_PM_portToggleRegister)
+ }
+#endif
+ core->prevRow = core->row;
+ }
+
+ // Advance bitplane index and/or row as necessary
+ if (++core->plane >= core->numPlanes) { // Next data bitplane, or
+ core->plane = 0; // roll over bitplane to start
+ if (++core->row >= core->numRowPairs) { // Next row, or
+ core->row = 0; // roll over row to start
+ // Switch matrix buffers if due (only if double-buffered)
+ if (core->swapBuffers) {
+ core->activeBuffer = 1 - core->activeBuffer;
+ core->swapBuffers = 0; // Swapped!
+ }
+ core->frameCount++;
+ }
+ }
+
+ // core->plane now is index of data to issue, NOT data to display.
+ // 'prevPlane' is the previously-loaded data, which gets displayed
+ // now while the next plane data is loaded.
+
+ // Set timer and enable LED output for data loaded on PRIOR pass:
+ _PM_timerStart(core->timer, core->bitZeroPeriod << prevPlane);
+ _PM_delayMicroseconds(1); // Appease Teensy4
+ _PM_clearReg(core->oe); // Enable LED output
+
+ uint32_t elementsPerLine =
+ _PM_chunkSize * ((core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize);
+ uint32_t srcOffset = elementsPerLine *
+ (core->numPlanes * core->row + core->plane) *
+ core->bytesPerElement;
+ if (core->doubleBuffer) {
+ srcOffset += core->bufferSize * core->activeBuffer;
+ }
+
+ if (core->bytesPerElement == 1) {
+ blast_byte(core, (uint8_t *)(core->screenData + srcOffset));
+ } else if (core->bytesPerElement == 2) {
+ blast_word(core, (uint16_t *)(core->screenData + srcOffset));
+ } else {
+ blast_long(core, (uint32_t *)(core->screenData + srcOffset));
+ }
+
+ // core->plane data is now loaded, will be shown on NEXT pass
+
+ // On the last (longest) bitplane (note that 'plane' has already wrapped
+ // around earlier, so a value of 0 here indicates longest plane), take
+ // note of the elapsed timer value at this point...that's the number of
+ // cycles required to issue (not necessarily display) data for one plane,
+ // and the bare minimum display duration allowed for plane 0.
+ if ((core->numPlanes > 1) && (core->plane == 0)) {
+ // Determine number of timer cycles taken to issue the data.
+ // It can vary slightly if heavy interrupts happen, things like that.
+ // Timer is still running and counting up at this point.
+ uint32_t elapsed = _PM_timerGetCount(core->timer);
+ // Nudge the plane-zero time up or down (filtering to avoid jitter)
+ core->bitZeroPeriod = ((core->bitZeroPeriod * 7) + elapsed + 4) / 8;
+ // But don't allow it to drop below the minimum period calculated during
+ // begin(), that's a hard limit and would just waste cycles.
+ if (core->bitZeroPeriod < core->minPeriod) {
+ core->bitZeroPeriod = core->minPeriod;
+ }
+ }
+}
+
+// Innermost data-stuffing loop functions
+
+// The presence of a bit-toggle register can make the data-stuffing loop a
+// fair bit faster (2 PORT accesses per column vs 3). But ironically, some
+// devices (e.g. SAMD51) can outpace the matrix max CLK speed, so we slow
+// them down with a few NOPs. These are defined in arch.h as needed.
+// _PM_clockHoldLow is whatever code necessary to delay the clock rise
+// after data is placed on the PORT. _PM_clockHoldHigh is code for delay
+// before setting the clock back low. If undefined, nothing goes there.
+
+#if !defined(PEW) // arch.h can define a custom PEW if needed (e.g. ESP32)
+
+#if !defined(_PM_STRICT_32BIT_IO) // Partial access to 32-bit GPIO OK
+
+#if defined(_PM_portToggleRegister)
+#define PEW \
+ *toggle = *data++; /* Toggle in new data + toggle clock low */ \
+ _PM_clockHoldLow; \
+ *toggle = clock; /* Toggle clock high */ \
+ _PM_clockHoldHigh;
+#else
+#define PEW \
+ *set = *data++; /* Set RGB data high */ \
+ _PM_clockHoldLow; \
+ *set_full = clock; /* Set clock high */ \
+ _PM_clockHoldHigh; \
+ *clear_full = rgbclock; /* Clear RGB data + clock */ \
+ ///< Bitbang one set of RGB data bits to matrix
+#endif
+
+#else // ONLY 32-bit GPIO
+
+#if defined(_PM_portToggleRegister)
+#define PEW \
+ *toggle = *data++ << shift; /* Toggle in new data + toggle clock low */ \
+ _PM_clockHoldLow; \
+ *toggle = clock; /* Toggle clock high */ \
+ _PM_clockHoldHigh;
+#else
+#define PEW \
+ *set = *data++ << shift; /* Set RGB data high */ \
+ _PM_clockHoldLow; \
+ *set = clock; /* Set clock high */ \
+ _PM_clockHoldHigh; \
+ *clear_full = rgbclock; /* Clear RGB data + clock */ \
+ ///< Bitbang one set of RGB data bits to matrix
+#endif
+
+#endif // end 32-bit GPIO
+
+#endif // end PEW
+
+#if _PM_chunkSize == 1
+#define PEW_UNROLL PEW
+#elif _PM_chunkSize == 2
+#define PEW_UNROLL PEW PEW ///< 2-way PEW unroll
+#elif _PM_chunkSize == 4
+#define PEW_UNROLL PEW PEW PEW PEW ///< 4-way PEW unroll
+#elif _PM_chunkSize == 8
+#define PEW_UNROLL PEW PEW PEW PEW PEW PEW PEW PEW ///< 8-way PEW unroll
+#elif _PM_chunkSize == 16
+#define PEW_UNROLL \
+ PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW
+#elif _PM_chunkSize == 32
+#define PEW_UNROLL \
+ PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW \
+ PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW
+#elif _PM_chunkSize == 64
+#define PEW_UNROLL \
+ PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW \
+ PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW \
+ PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW \
+ PEW PEW PEW PEW PEW PEW PEW PEW PEW PEW
+#else
+#error "Unimplemented _PM_chunkSize value"
+#endif
+
+// There are THREE COPIES of the following function -- one each for byte,
+// word and long. If changes are made in any one of them, the others MUST
+// be updated to match! (Decided against using macro tricks for the
+// function, too often ends in disaster...but must be vigilant in the
+// three-function maintenance then.)
+
+IRAM_ATTR static void blast_byte(Protomatter_core *core, uint8_t *data) {
+#if !defined(_PM_STRICT_32BIT_IO) // Partial access to 32-bit GPIO OK
+
+#if defined(_PM_portToggleRegister)
+ // If here, it was established in begin() that the RGB data bits and
+ // clock are all within the same byte of a PORT register, else we'd be
+ // in the word- or long-blasting functions now. So we just need an
+ // 8-bit pointer to the PORT.
+ volatile uint8_t *toggle =
+ (volatile uint8_t *)core->toggleReg + core->portOffset;
+#else
+ // No-toggle version is a little different. If here, RGB data is all
+ // in one byte of PORT register, clock can be any bit in 32-bit PORT.
+ volatile uint8_t *set; // For RGB data set
+ volatile _PM_PORT_TYPE *set_full; // For clock set
+ volatile _PM_PORT_TYPE *clear_full; // For RGB data + clock clear
+ set = (volatile uint8_t *)core->setReg + core->portOffset;
+ set_full = (volatile _PM_PORT_TYPE *)core->setReg;
+ clear_full = (volatile _PM_PORT_TYPE *)core->clearReg;
+ _PM_PORT_TYPE rgbclock = core->rgbAndClockMask; // RGB + clock bit
+#endif
+ _PM_PORT_TYPE clock = core->clockMask; // Clock bit
+ uint8_t chunks = (core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize;
+
+ // PORT has already been initialized with RGB data + clock bits
+ // all LOW, so we don't need to initialize that state here.
+
+ while (chunks--) {
+ PEW_UNROLL // _PM_chunkSize RGB+clock writes
+ }
+
+#if defined(_PM_portToggleRegister)
+ // Want the PORT left with RGB data and clock LOW on function exit
+ // (so it's easier to see on 'scope, and to prime it for the next call).
+ // This is implicit in the no-toggle case (due to how the PEW macro
+ // works), but toggle case requires explicitly clearing those bits.
+ // rgbAndClockMask is an 8-bit value when toggling, hence offset here.
+ *((volatile uint8_t *)core->clearReg + core->portOffset) =
+ core->rgbAndClockMask;
+#endif
+
+#else // ONLY 32-bit GPIO
+
+#if defined(_PM_portToggleRegister)
+ volatile _PM_PORT_TYPE *toggle = (volatile _PM_PORT_TYPE *)core->toggleReg;
+#else
+ volatile _PM_PORT_TYPE *set = (volatile _PM_PORT_TYPE *)core->setReg;
+ volatile _PM_PORT_TYPE *clear_full = (volatile _PM_PORT_TYPE *)core->clearReg;
+ _PM_PORT_TYPE rgbclock = core->rgbAndClockMask; // RGB + clock bit
+#endif
+ _PM_PORT_TYPE clock = core->clockMask; // Clock bit
+ uint8_t shift = core->portOffset * 8;
+ uint8_t chunks = (core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize;
+
+ // PORT has already been initialized with RGB data + clock bits
+ // all LOW, so we don't need to initialize that state here.
+
+ while (chunks--) {
+ PEW_UNROLL // _PM_chunkSize RGB+clock writes
+ }
+
+#if defined(_PM_portToggleRegister)
+ *((volatile uint32_t *)core->clearReg) = core->rgbAndClockMask;
+#endif
+
+#endif // 32-bit GPIO
+}
+
+IRAM_ATTR static void blast_word(Protomatter_core *core, uint16_t *data) {
+#if !defined(_PM_STRICT_32BIT_IO) // Partial access to 32-bit GPIO OK
+
+#if defined(_PM_portToggleRegister)
+ // See notes above -- except now 16-bit word in PORT.
+ volatile uint16_t *toggle =
+ (volatile uint16_t *)core->toggleReg + core->portOffset;
+#else
+ volatile uint16_t *set; // For RGB data set
+ volatile _PM_PORT_TYPE *set_full; // For clock set
+ volatile _PM_PORT_TYPE *clear_full; // For RGB data + clock clear
+ set = (volatile uint16_t *)core->setReg + core->portOffset;
+ set_full = (volatile _PM_PORT_TYPE *)core->setReg;
+ clear_full = (volatile _PM_PORT_TYPE *)core->clearReg;
+ _PM_PORT_TYPE rgbclock = core->rgbAndClockMask; // RGB + clock bit
+#endif
+ _PM_PORT_TYPE clock = core->clockMask; // Clock bit
+ uint8_t chunks = (core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize;
+ while (chunks--) {
+ PEW_UNROLL // _PM_chunkSize RGB+clock writes
+ }
+#if defined(_PM_portToggleRegister)
+ // rgbAndClockMask is a 16-bit value when toggling, hence offset here.
+ *((volatile uint16_t *)core->clearReg + core->portOffset) =
+ core->rgbAndClockMask;
+#endif
+
+#else // ONLY 32-bit GPIO
+
+#if defined(_PM_portToggleRegister)
+ volatile _PM_PORT_TYPE *toggle = (volatile _PM_PORT_TYPE *)core->toggleReg;
+#else
+ volatile _PM_PORT_TYPE *set = (volatile _PM_PORT_TYPE *)core->setReg;
+ volatile _PM_PORT_TYPE *clear_full = (volatile _PM_PORT_TYPE *)core->clearReg;
+ _PM_PORT_TYPE rgbclock = core->rgbAndClockMask; // RGB + clock bit
+#endif
+ _PM_PORT_TYPE clock = core->clockMask; // Clock bit
+ uint8_t shift = core->portOffset * 16;
+ uint8_t chunks = (core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize;
+ while (chunks--) {
+ PEW_UNROLL // _PM_chunkSize RGB+clock writes
+ }
+#if defined(_PM_portToggleRegister)
+ *((volatile _PM_PORT_TYPE *)core->clearReg) = core->rgbAndClockMask;
+#endif
+
+#endif // 32-bit GPIO
+}
+
+IRAM_ATTR static void blast_long(Protomatter_core *core, uint32_t *data) {
+#if defined(_PM_portToggleRegister)
+ // See notes above -- except now full 32-bit PORT.
+ volatile uint32_t *toggle = (volatile uint32_t *)core->toggleReg;
+#else
+ // Note in this case two copies exist of the PORT set register.
+ // The optimizer will most likely simplify this; leaving as-is, not
+ // wanting a special case of the PEW macro due to divergence risk.
+ volatile uint32_t *set; // For RGB data set
+#if !defined(_PM_STRICT_32BIT_IO)
+ volatile _PM_PORT_TYPE *set_full; // For clock set
+ set_full = (volatile _PM_PORT_TYPE *)core->setReg;
+#endif
+ volatile _PM_PORT_TYPE *clear_full; // For RGB data + clock clear
+ set = (volatile uint32_t *)core->setReg;
+ clear_full = (volatile _PM_PORT_TYPE *)core->clearReg;
+ _PM_PORT_TYPE rgbclock = core->rgbAndClockMask; // RGB + clock bit
+#endif
+ _PM_PORT_TYPE clock = core->clockMask; // Clock bit
+#if defined(_PM_STRICT_32BIT_IO)
+ uint8_t shift = 0;
+#endif
+ uint8_t chunks = (core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize;
+ while (chunks--) {
+ PEW_UNROLL // _PM_chunkSize RGB+clock writes
+ }
+#if defined(_PM_portToggleRegister)
+ *(volatile uint32_t *)core->clearReg = core->rgbAndClockMask;
+#endif
+}
+
+// Returns current value of frame counter and resets its value to zero.
+// Two calls to this, timed one second apart (or use math with other
+// intervals), can be used to get a rough frames-per-second value for
+// the matrix (since this is difficult to estimate beforehand).
+uint32_t _PM_getFrameCount(Protomatter_core *core) {
+ uint32_t count = 0;
+ if ((core)) {
+ count = core->frameCount;
+ core->frameCount = 0;
+ }
+ return count;
+}
+
+void _PM_swapbuffer_maybe(Protomatter_core *core) {
+ if (core->doubleBuffer) {
+ core->swapBuffers = 1;
+ // To avoid overwriting data on the matrix, don't return
+ // until the timer ISR has performed the swap at the right time.
+ while (core->swapBuffers)
+ ;
+ }
+}
+
+#if defined(ARDUINO) || defined(CIRCUITPY)
+
+// Arduino and CircuitPython happen to use the same internal canvas
+// representation.
+
+// 16-bit (565) color conversion functions go here (rather than in the
+// Arduino lib .cpp) because knowledge is required of chunksize and the
+// toggle register (or lack thereof), which are only known to this file,
+// not the .cpp or anywhere else. However...this file knows nothing of
+// the GFXcanvas16 type (from Adafruit_GFX...another C++ lib), so the
+// .cpp just passes down some pointers and minimal info about the canvas
+// buffer. It's probably not ideal but this is my life now, oh well.
+
+// Different runtime environments (which might not use the 565 canvas
+// format) will need their own conversion functions.
+
+// There are THREE COPIES of the following function -- one each for byte,
+// word and long. If changes are made in any one of them, the others MUST
+// be updated to match! Note that they are not simple duplicates of each
+// other. The byte case, for example, doesn't need to handle parallel
+// matrix chains (matrix data can only be byte-sized if one chain).
+
+// width argument comes from GFX canvas width, which may be less than
+// core's bitWidth (due to padding). height isn't needed, it can be
+// inferred from core->numRowPairs and core->tile.
+__attribute__((noinline)) void _PM_convert_565_byte(Protomatter_core *core,
+ const uint16_t *source,
+ uint16_t width) {
+ uint8_t *pinMask = (uint8_t *)core->rgbMask; // Pin bitmasks
+ uint8_t *dest = (uint8_t *)core->screenData;
+ if (core->doubleBuffer) {
+ dest += core->bufferSize * (1 - core->activeBuffer);
+ }
+
+#if defined(_PM_portToggleRegister)
+#if !defined(_PM_STRICT_32BIT_IO)
+ // core->clockMask mask is already an 8-bit value
+ uint8_t clockMask = core->clockMask;
+#else
+ // core->clockMask mask is 32-bit, shift down to 8-bit for this func.
+ uint8_t clockMask = core->clockMask >> (core->portOffset * 8);
+#endif
+#endif
+
+ // No need to clear matrix buffer, loops below do a full overwrite
+ // (except for any scanline pad, which was already initialized in the
+ // begin() function and won't be touched here).
+
+ // Determine matrix bytes per bitplane & row (row pair really):
+
+ // Size of 1 plane of row pair (across full chain / tile set)
+ uint32_t bitplaneSize =
+ _PM_chunkSize * ((core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize);
+ uint8_t pad = bitplaneSize - core->chainBits; // Plane-start pad
+
+ // Skip initial scanline padding if present (HUB75 matrices shift data
+ // in from right-to-left, so if we need scanline padding it occurs at
+ // the start of a line, rather than the usual end). Destination pointer
+ // passed in already handles double-buffer math, so we don't need to
+ // handle that here, just the pad...
+ dest += pad;
+
+ uint32_t initialRedBit, initialGreenBit, initialBlueBit;
+ if (core->numPlanes == 6) {
+ // If numPlanes is 6, red and blue are expanded from 5 to 6 bits.
+ // This involves duplicating the MSB of the 5-bit value to the LSB
+ // of its corresponding 6-bit value...or in this case, bitmasks for
+ // red and blue are initially assigned to canvas MSBs, while green
+ // starts at LSB (because it's already 6-bit). Inner loop below then
+ // wraps red & blue after the first bitplane.
+ initialRedBit = 0b1000000000000000; // MSB red
+ initialGreenBit = 0b0000000000100000; // LSB green
+ initialBlueBit = 0b0000000000010000; // MSB blue
+ } else {
+ // If numPlanes is 1 to 5, no expansion is needed, and one or all
+ // three color components might be decimated by some number of bits.
+ // The initial bitmasks are set to the components' numPlanesth bit
+ // (e.g. for 5 planes, start at red & blue bit #0, green bit #1,
+ // for 4 planes, everything starts at the next bit up, etc.).
+ uint8_t shiftLeft = 5 - core->numPlanes;
+ initialRedBit = 0b0000100000000000 << shiftLeft;
+ initialGreenBit = 0b0000000001000000 << shiftLeft;
+ initialBlueBit = 0b0000000000000001 << shiftLeft;
+ }
+
+ // This works sequentially-ish through the destination buffer,
+ // reading from the canvas source pixels in repeated passes,
+ // beginning from the least bit.
+ for (uint8_t row = 0; row < core->numRowPairs; row++) {
+ uint32_t redBit = initialRedBit;
+ uint32_t greenBit = initialGreenBit;
+ uint32_t blueBit = initialBlueBit;
+ for (uint8_t plane = 0; plane < core->numPlanes; plane++) {
+#if defined(_PM_portToggleRegister)
+ uint8_t prior = clockMask; // Set clock bit on 1st out
+#endif
+ uint8_t *d2 = dest; // Incremented per-pixel across all tiles
+
+ // Work from bottom tile to top, because data is issued in that order
+ for (int8_t tile = abs(core->tile) - 1; tile >= 0; tile--) {
+ const uint16_t *upperSrc, *lowerSrc; // Canvas scanline pointers
+ int16_t srcIdx;
+ int8_t srcInc;
+
+ // Source pointer to tile's upper-left pixel
+ const uint16_t *srcTileUL =
+ source + tile * width * core->numRowPairs * 2;
+ if ((tile & 1) && (core->tile < 0)) {
+ // Special handling for serpentine tiles
+ lowerSrc = srcTileUL + width * (core->numRowPairs - 1 - row);
+ upperSrc = lowerSrc + width * core->numRowPairs;
+ srcIdx = width - 1; // Work right to left
+ srcInc = -1;
+ } else {
+ // Progressive tile
+ upperSrc = srcTileUL + width * row; // Top row
+ lowerSrc = upperSrc + width * core->numRowPairs; // Bottom row
+ srcIdx = 0; // Left to right
+ srcInc = 1;
+ }
+
+ for (uint16_t x = 0; x < width; x++, srcIdx += srcInc) {
+ uint16_t upperRGB = upperSrc[srcIdx]; // Pixel in upper half
+ uint16_t lowerRGB = lowerSrc[srcIdx]; // Pixel in lower half
+ uint8_t result = 0;
+ if (upperRGB & redBit)
+ result |= pinMask[0];
+ if (upperRGB & greenBit)
+ result |= pinMask[1];
+ if (upperRGB & blueBit)
+ result |= pinMask[2];
+ if (lowerRGB & redBit)
+ result |= pinMask[3];
+ if (lowerRGB & greenBit)
+ result |= pinMask[4];
+ if (lowerRGB & blueBit)
+ result |= pinMask[5];
+#if defined(_PM_portToggleRegister)
+ *d2++ = result ^ prior;
+ prior = result | clockMask; // Set clock bit on next out
+#else
+ *d2++ = result;
+#endif
+ } // end x
+ } // end tile
+
+ greenBit <<= 1;
+ if (plane || (core->numPlanes < 6)) {
+ // In most cases red & blue bit scoot 1 left...
+ redBit <<= 1;
+ blueBit <<= 1;
+ } else {
+ // Exception being after bit 0 with 6-plane display,
+ // in which case they're reset to red & blue LSBs
+ // (so 5-bit colors are expanded to 6 bits).
+ redBit = 0b0000100000000000;
+ blueBit = 0b0000000000000001;
+ }
+#if defined(_PM_portToggleRegister)
+ // If using bit-toggle register, erase the toggle bit on the
+ // first element of each bitplane & row pair. The matrix-driving
+ // interrupt functions correspondingly set the clock low before
+ // finishing. This is all done for legibility on oscilloscope --
+ // so idle clock appears LOW -- but really the matrix samples on
+ // a rising edge and we could leave it high, but at this stage
+ // in development just want the scope "readable."
+ dest[-pad] &= ~clockMask; // Negative index is legal & intentional
+#endif
+ dest += bitplaneSize; // Advance one scanline in dest buffer
+ } // end plane
+ } // end row
+}
+
+// Corresponding function for word output -- either 12 RGB bits (2 parallel
+// matrix chains), or 1 chain with RGB bits not in the same byte (but in the
+// same 16-bit word). Some of the comments have been stripped out since it's
+// largely the same operation, but changes are noted.
+// WORD OUTPUT IS UNTESTED AND ROW TILING MAY ESPECIALLY PRESENT ISSUES.
+void _PM_convert_565_word(Protomatter_core *core, uint16_t *source,
+ uint16_t width) {
+ uint16_t *pinMask = (uint16_t *)core->rgbMask; // Pin bitmasks
+ uint16_t *dest = (uint16_t *)core->screenData;
+ if (core->doubleBuffer) {
+ dest += core->bufferSize / core->bytesPerElement * (1 - core->activeBuffer);
+ }
+
+ // Size of 1 plane of row pair (across full chain / tile set)
+ uint32_t bitplaneSize =
+ _PM_chunkSize * ((core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize);
+ uint8_t pad = bitplaneSize - core->chainBits; // Plane-start pad
+
+ uint32_t initialRedBit, initialGreenBit, initialBlueBit;
+ if (core->numPlanes == 6) {
+ initialRedBit = 0b1000000000000000; // MSB red
+ initialGreenBit = 0b0000000000100000; // LSB green
+ initialBlueBit = 0b0000000000010000; // MSB blue
+ } else {
+ uint8_t shiftLeft = 5 - core->numPlanes;
+ initialRedBit = 0b0000100000000000 << shiftLeft;
+ initialGreenBit = 0b0000000001000000 << shiftLeft;
+ initialBlueBit = 0b0000000000000001 << shiftLeft;
+ }
+
+ // Unlike the 565 byte converter, the word converter DOES clear out the
+ // matrix buffer (because each chain is OR'd into place). If a toggle
+ // register exists, "clear" really means the clock mask is set in all
+ // but the first element on a scanline (per bitplane). If no toggle
+ // register, can just zero everything out.
+#if defined(_PM_portToggleRegister)
+ // No per-chain loop is required; one clock bit handles all chains
+ uint32_t offset = 0; // Current position in the 'dest' buffer
+ uint16_t mask = core->clockMask >> (core->portOffset * 16);
+ for (uint8_t row = 0; row < core->numRowPairs; row++) {
+ for (uint8_t plane = 0; plane < core->numPlanes; plane++) {
+ dest[offset++] = 0; // First element of each plane
+ for (uint16_t x = 1; x < bitplaneSize; x++) { // All subsequent items
+ dest[offset++] = mask;
+ }
+ }
+ }
+#else
+ memset(dest, 0, core->bufferSize);
+#endif
+
+ dest += pad; // Pad value is in 'elements,' not bytes, so this is OK
+
+ for (uint8_t chain = 0; chain < core->parallel; chain++) {
+ for (uint8_t row = 0; row < core->numRowPairs; row++) {
+ uint32_t redBit = initialRedBit;
+ uint32_t greenBit = initialGreenBit;
+ uint32_t blueBit = initialBlueBit;
+ for (uint8_t plane = 0; plane < core->numPlanes; plane++) {
+#if defined(_PM_portToggleRegister)
+ // Since we're ORing in bits over an existing clock bit,
+ // prior is 0 rather than clockMask as in the byte case.
+ uint16_t prior = 0;
+#endif
+ uint16_t *d2 = dest; // Incremented per-pixel across all tiles
+
+ // Work from bottom tile to top, because data is issued in that order
+ for (int8_t tile = abs(core->tile) - 1; tile >= 0; tile--) {
+ uint16_t *upperSrc, *lowerSrc; // Canvas scanline pointers
+ int16_t srcIdx;
+ int8_t srcInc;
+
+ // Source pointer to tile's upper-left pixel
+ uint16_t *srcTileUL = source + (chain * abs(core->tile) + tile) *
+ width * core->numRowPairs * 2;
+ if ((tile & 1) && (core->tile < 0)) {
+ // Special handling for serpentine tiles
+ lowerSrc = srcTileUL + width * (core->numRowPairs - 1 - row);
+ upperSrc = lowerSrc + width * core->numRowPairs;
+ srcIdx = width - 1; // Work right to left
+ srcInc = -1;
+ } else {
+ // Progressive tile
+ upperSrc = srcTileUL + width * row; // Top row
+ lowerSrc = upperSrc + width * core->numRowPairs; // Bottom row
+ srcIdx = 0; // Left to right
+ srcInc = 1;
+ }
+
+ for (uint16_t x = 0; x < width; x++, srcIdx += srcInc) {
+ uint16_t upperRGB = upperSrc[srcIdx]; // Pixel in upper half
+ uint16_t lowerRGB = lowerSrc[srcIdx]; // Pixel in lower half
+ uint16_t result = 0;
+ if (upperRGB & redBit)
+ result |= pinMask[0];
+ if (upperRGB & greenBit)
+ result |= pinMask[1];
+ if (upperRGB & blueBit)
+ result |= pinMask[2];
+ if (lowerRGB & redBit)
+ result |= pinMask[3];
+ if (lowerRGB & greenBit)
+ result |= pinMask[4];
+ if (lowerRGB & blueBit)
+ result |= pinMask[5];
+ // Main difference here vs byte converter is each chain
+ // ORs new bits into place (vs single-pass overwrite).
+#if defined(_PM_portToggleRegister)
+ *d2++ |= result ^ prior; // Bitwise OR
+ prior = result;
+#else
+ *d2++ |= result; // Bitwise OR
+#endif
+ } // end x
+ } // end tile
+ greenBit <<= 1;
+ if (plane || (core->numPlanes < 6)) {
+ redBit <<= 1;
+ blueBit <<= 1;
+ } else {
+ redBit = 0b0000100000000000;
+ blueBit = 0b0000000000000001;
+ }
+ dest += bitplaneSize; // Advance one scanline in dest buffer
+ } // end plane
+ } // end row
+ pinMask += 6; // Next chain's RGB pin masks
+ }
+}
+
+// Corresponding function for long output -- either several parallel chains
+// (up to 5), or 1 chain with RGB bits scattered widely about the PORT.
+// Same deal, comments are pared back, see above functions for explanations.
+// LONG OUTPUT IS UNTESTED AND ROW TILING MAY ESPECIALLY PRESENT ISSUES.
+void _PM_convert_565_long(Protomatter_core *core, uint16_t *source,
+ uint16_t width) {
+ uint32_t *pinMask = (uint32_t *)core->rgbMask; // Pin bitmasks
+ uint32_t *dest = (uint32_t *)core->screenData;
+ if (core->doubleBuffer) {
+ dest += core->bufferSize / core->bytesPerElement * (1 - core->activeBuffer);
+ }
+
+ // Size of 1 plane of row pair (across full chain / tile set)
+ uint32_t bitplaneSize =
+ _PM_chunkSize * ((core->chainBits + (_PM_chunkSize - 1)) / _PM_chunkSize);
+ uint8_t pad = bitplaneSize - core->chainBits; // Plane-start pad
+
+ uint32_t initialRedBit, initialGreenBit, initialBlueBit;
+ if (core->numPlanes == 6) {
+ initialRedBit = 0b1000000000000000; // MSB red
+ initialGreenBit = 0b0000000000100000; // LSB green
+ initialBlueBit = 0b0000000000010000; // MSB blue
+ } else {
+ uint8_t shiftLeft = 5 - core->numPlanes;
+ initialRedBit = 0b0000100000000000 << shiftLeft;
+ initialGreenBit = 0b0000000001000000 << shiftLeft;
+ initialBlueBit = 0b0000000000000001 << shiftLeft;
+ }
+
+#if defined(_PM_portToggleRegister)
+ // No per-chain loop is required; one clock bit handles all chains
+ uint32_t offset = 0; // Current position in the 'dest' buffer
+ for (uint8_t row = 0; row < core->numRowPairs; row++) {
+ for (uint8_t plane = 0; plane < core->numPlanes; plane++) {
+ dest[offset++] = 0; // First element of each plane
+ for (uint16_t x = 1; x < bitplaneSize; x++) { // All subsequent items
+ dest[offset++] = core->clockMask;
+ }
+ }
+ }
+#else
+ memset(dest, 0, core->bufferSize);
+#endif
+
+ dest += pad; // Pad value is in 'elements,' not bytes, so this is OK
+
+ for (uint8_t chain = 0; chain < core->parallel; chain++) {
+ for (uint8_t row = 0; row < core->numRowPairs; row++) {
+ uint32_t redBit = initialRedBit;
+ uint32_t greenBit = initialGreenBit;
+ uint32_t blueBit = initialBlueBit;
+ for (uint8_t plane = 0; plane < core->numPlanes; plane++) {
+#if defined(_PM_portToggleRegister)
+ uint32_t prior = 0;
+#endif
+ uint32_t *d2 = dest; // Incremented per-pixel across all tiles
+
+ // Work from bottom tile to top, because data is issued in that order
+ for (int8_t tile = abs(core->tile) - 1; tile >= 0; tile--) {
+ uint16_t *upperSrc, *lowerSrc; // Canvas scanline pointers
+ int16_t srcIdx;
+ int8_t srcInc;
+
+ // Source pointer to tile's upper-left pixel
+ uint16_t *srcTileUL = source + (chain * abs(core->tile) + tile) *
+ width * core->numRowPairs * 2;
+ if ((tile & 1) && (core->tile < 0)) {
+ // Special handling for serpentine tiles
+ lowerSrc = srcTileUL + width * (core->numRowPairs - 1 - row);
+ upperSrc = lowerSrc + width * core->numRowPairs;
+ srcIdx = width - 1; // Work right to left
+ srcInc = -1;
+ } else {
+ // Progressive tile
+ upperSrc = srcTileUL + width * row; // Top row
+ lowerSrc = upperSrc + width * core->numRowPairs; // Bottom row
+ srcIdx = 0; // Left to right
+ srcInc = 1;
+ }
+
+ for (uint16_t x = 0; x < width; x++, srcIdx += srcInc) {
+ uint16_t upperRGB = upperSrc[srcIdx]; // Pixel in upper half
+ uint16_t lowerRGB = lowerSrc[srcIdx]; // Pixel in lower half
+ uint32_t result = 0;
+ if (upperRGB & redBit)
+ result |= pinMask[0];
+ if (upperRGB & greenBit)
+ result |= pinMask[1];
+ if (upperRGB & blueBit)
+ result |= pinMask[2];
+ if (lowerRGB & redBit)
+ result |= pinMask[3];
+ if (lowerRGB & greenBit)
+ result |= pinMask[4];
+ if (lowerRGB & blueBit)
+ result |= pinMask[5];
+ // Main difference here vs byte converter is each chain
+ // ORs new bits into place (vs single-pass overwrite).
+#if defined(_PM_portToggleRegister)
+ *d2++ |= result ^ prior; // Bitwise OR
+ prior = result;
+#else
+ *d2++ |= result; // Bitwise OR
+#endif
+ } // end x
+ } // end tile
+ greenBit <<= 1;
+ if (plane || (core->numPlanes < 6)) {
+ redBit <<= 1;
+ blueBit <<= 1;
+ } else {
+ redBit = 0b0000100000000000;
+ blueBit = 0b0000000000000001;
+ }
+ dest += bitplaneSize; // Advance one scanline in dest buffer
+ } // end plane
+ } // end row
+ pinMask += 6; // Next chain's RGB pin masks
+ }
+}
+
+void _PM_convert_565(Protomatter_core *core, uint16_t *source, uint16_t width) {
+ // Destination address is computed in convert function
+ // (based on active buffer value, if double-buffering),
+ // just need to pass in the canvas buffer address and
+ // width in pixels.
+ if (core->bytesPerElement == 1) {
+ _PM_convert_565_byte(core, source, width);
+ } else if (core->bytesPerElement == 2) {
+ _PM_convert_565_word(core, source, width);
+ } else {
+ _PM_convert_565_long(core, source, width);
+ }
+}
+
+#endif // END ARDUINO || CIRCUITPY
+
+/* NOTES TO FUTURE SELF ----------------------------------------------------
+
+ON BYTES, WORDS and LONGS:
+I've gone back and forth between implementing all this either as it
+currently is (with byte, word and long cases for various steps), or using
+a uint32_t[64] table for expanding RGB bit combos to PORT bit combos.
+The latter would certainly simplify the code a ton, and the additional
+table lookup step wouldn't significantly impact performance, especially
+going forward with faster processors (several devices already require a
+few NOPs in the innermost loop to avoid outpacing the matrix).
+BUT, the reason this is NOT currently done is that it only allows for a
+single matrix chain (doing parallel chains would require either an
+impractically large lookup table, or adding together multiple tables'
+worth of bitmasks, which would slow things down in the vital inner loop).
+Although parallel matrix chains aren't yet 100% implemented in this code
+right now, I wanted to leave that possibility for the future, as a way to
+handle larger matrix combos, because long chains will slow down the
+refresh rate.
+*/
diff --git a/circuitpython/lib/protomatter/src/core.h b/circuitpython/lib/protomatter/src/core.h
new file mode 100644
index 0000000..ef8e316
--- /dev/null
+++ b/circuitpython/lib/protomatter/src/core.h
@@ -0,0 +1,274 @@
+/*!
+ * @file core.h
+ *
+ * Part of Adafruit's Protomatter library for HUB75-style RGB LED matrices.
+ *
+ * Adafruit invests time and resources providing this open source code,
+ * please support Adafruit and open-source hardware by purchasing
+ * products from Adafruit!
+ *
+ * Written by Phil "Paint Your Dragon" Burgess and Jeff Epler for
+ * Adafruit Industries, with contributions from the open source community.
+ *
+ * BSD license, all text here must be included in any redistribution.
+ *
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+
+/** Status type returned by some functions. */
+typedef enum {
+ PROTOMATTER_OK, // Everything is hunky-dory!
+ PROTOMATTER_ERR_PINS, // Clock and/or data pins on different PORTs
+ PROTOMATTER_ERR_MALLOC, // Couldn't allocate memory for display
+ PROTOMATTER_ERR_ARG, // Bad input to function
+} ProtomatterStatus;
+
+/** Struct for matrix control lines NOT related to RGB data or clock, i.e.
+ latch, OE and address lines. RGB data and clock ("RGBC") are handled
+ differently as they have specific requirements (and might use a toggle
+ register if present). The data conversion functions need bitmasks for
+ RGB data but do NOT need the set or clear registers, so those items are
+ also declared as separate things in the core structure that follows. */
+typedef struct {
+ volatile void *setReg; ///< GPIO bit set register
+ volatile void *clearReg; ///< GPIO bit clear register
+ uint32_t bit; ///< GPIO bitmask
+ uint8_t pin; ///< Some unique ID, e.g. Arduino pin #
+} _PM_pin;
+
+/** Struct with info about an RGB matrix chain and lots of state and buffer
+ details for the library. Toggle-related items in this structure MUST be
+ declared even if the device lacks GPIO bit-toggle registers (i.e. don't
+ do an ifdef check around these). All hardware-specific details (including
+ the presence or lack of toggle registers) are isolated to a single
+ file -- arch.h -- which should ONLY be included by core.c, and ifdef'ing
+ them would result in differing representations of this structure which
+ must be shared between the library and calling code. (An alternative is
+ to put any toggle-specific stuff at the end of the struct with an ifdef
+ check, but that's just dirty pool and asking for trouble.) */
+typedef struct {
+ void *timer; ///< Arch-specific timer/counter info
+ void *setReg; ///< RGBC bit set register (cast to use)
+ void *clearReg; ///< RGBC bit clear register "
+ void *toggleReg; ///< RGBC bit toggle register "
+ uint8_t *rgbPins; ///< Array of RGB data pins (mult of 6)
+ void *rgbMask; ///< PORT bit mask for each RGB pin
+ uint32_t clockMask; ///< PORT bit mask for RGB clock
+ uint32_t rgbAndClockMask; ///< PORT bit mask for RGB data + clock
+ volatile void *addrPortToggle; ///< See singleAddrPort below
+ void *screenData; ///< Per-bitplane RGB data for matrix
+ _PM_pin latch; ///< RGB data latch
+ _PM_pin oe; ///< !OE (LOW out enable)
+ _PM_pin *addr; ///< Array of address pins
+ uint32_t bufferSize; ///< Bytes per matrix buffer
+ uint32_t bitZeroPeriod; ///< Bitplane 0 timer period
+ uint32_t minPeriod; ///< Plane 0 timer period for ~250Hz
+ volatile uint32_t frameCount; ///< For estimating refresh rate
+ uint16_t width; ///< Matrix chain width only in bits
+ uint16_t chainBits; ///< Matrix chain width*tiling in bits
+ uint8_t bytesPerElement; ///< Using 8, 16 or 32 bits of PORT?
+ uint8_t clockPin; ///< RGB clock pin identifier
+ uint8_t parallel; ///< Number of concurrent matrix outs
+ uint8_t numAddressLines; ///< Number of address line pins
+ uint8_t portOffset; ///< Active 8- or 16-bit pos. in PORT
+ uint8_t numPlanes; ///< Display bitplanes (1 to 6)
+ uint8_t numRowPairs; ///< Addressable row pairs
+ int8_t tile; ///< Vertical tiling repetitions
+ bool doubleBuffer; ///< 2X buffers for clean switchover
+ bool singleAddrPort; ///< If 1, all addr lines on same PORT
+ volatile uint8_t activeBuffer; ///< Index of currently-displayed buf
+ volatile uint8_t plane; ///< Current bitplane (changes in ISR)
+ volatile uint8_t row; ///< Current scanline (changes in ISR)
+ volatile uint8_t prevRow; ///< Scanline from prior ISR
+ volatile bool swapBuffers; ///< If 1, awaiting double-buf switch
+} Protomatter_core;
+
+// Protomatter core function prototypes. Environment-specific code (like the
+// Adafruit_Protomatter class for Arduino) calls on these underlying things,
+// and has to provide a few extras of its own (interrupt handlers and such).
+// User code shouldn't need to invoke any of them directly.
+
+/*!
+ @brief Initialize values in Protomatter_core structure.
+ @param core Pointer to Protomatter_core structure.
+ @param bitWidth Total width of RGB matrix chain, in pixels.
+ Usu. some multiple of 32, but maybe exceptions.
+ @param bitDepth Color "depth" in bitplanes, determines range of
+ shades of red, green and blue. e.g. passing 4
+ bits = 16 shades ea. R,G,B = 16x16x16 = 4096
+ colors.
+ @param rgbCount Number of "sets" of RGB data pins, each set
+ containing 6 pins (2 ea. R,G,B). Typically 1,
+ indicating a single matrix (or matrix chain).
+ In theory (but not yet extensively tested),
+ multiple sets of pins can be driven in parallel,
+ up to 5 on some devices (if the hardware design
+ provides all those bits on one PORT).
+ @param rgbList A uint8_t array of pins (values are platform-
+ dependent), 6X the prior rgbCount value,
+ corresponding to the 6 output color bits for a
+ matrix (or chain). Order is upper-half red, green,
+ blue, lower-half red, green blue (repeat for each
+ add'l chain). All the RGB pins (plus the clock pin
+ below on some architectures) MUST be on the same
+ PORT register. It's recommended (but not required)
+ that all RGB pins (and clock depending on arch) be
+ within the same byte of a PORT (but do not need to
+ be sequential or contiguous within that byte) for
+ more efficient RAM utilization. For two concurrent
+ chains, same principle but 16-bit word.
+ @param addrCount Number of row address lines required of matrix.
+ Total pixel height is then 2 x 2^addrCount, e.g.
+ 32-pixel-tall matrices have 4 row address lines.
+ @param addrList A uint8_t array of pins (platform-dependent pin
+ numbering), one per row address line.
+ @param clockPin RGB clock pin (platform-dependent pin #).
+ @param latchPin RGB data latch pin (platform-dependent pin #).
+ @param oePin Output enable pin (platform-dependent pin #),
+ active low.
+ @param doubleBuffer If true, two matrix buffers are allocated,
+ so changing display contents doesn't introduce
+ artifacts mid-conversion. Requires ~2X RAM.
+ @param tile If multiple matrices are chained and stacked
+ vertically (rather than or in addition to
+ horizontally), the number of vertical tiles is
+ specified here. Positive values indicate a
+ "progressive" arrangement (always left-to-right),
+ negative for a "serpentine" arrangement (alternating
+ 180 degree orientation). Horizontal tiles are implied
+ in the 'bitWidth' argument.
+ @param timer Pointer to timer peripheral or timer-related
+ struct (architecture-dependent), or NULL to
+ use a default timer ID (also arch-dependent).
+ @return A ProtomatterStatus status, one of:
+ PROTOMATTER_OK if everything is good.
+ PROTOMATTER_ERR_PINS if data and/or clock pins are split across
+ different PORTs.
+ PROTOMATTER_ERR_MALLOC if insufficient RAM to allocate display
+ memory.
+ PROTOMATTER_ERR_ARG if a bad value (core or timer pointer) was
+ passed in.
+*/
+extern ProtomatterStatus _PM_init(Protomatter_core *core, uint16_t bitWidth,
+ uint8_t bitDepth, uint8_t rgbCount,
+ uint8_t *rgbList, uint8_t addrCount,
+ uint8_t *addrList, uint8_t clockPin,
+ uint8_t latchPin, uint8_t oePin,
+ bool doubleBuffer, int8_t tile, void *timer);
+
+/*!
+ @brief Allocate display buffers and populate additional elements of a
+ Protomatter matrix.
+ @param core Pointer to Protomatter_core structure.
+ @return A ProtomatterStatus status, one of:
+ PROTOMATTER_OK if everything is good.
+ PROTOMATTER_ERR_PINS if data and/or clock pins are split across
+ different PORTs.
+ PROTOMATTER_ERR_MALLOC if insufficient RAM to allocate display
+ memory.
+ PROTOMATTER_ERR_ARG if a bad value.
+*/
+extern ProtomatterStatus _PM_begin(Protomatter_core *core);
+
+/*!
+ @brief Disable (but do not deallocate) a Protomatter matrix. Disables
+ matrix by setting OE pin HIGH and writing all-zero data to
+ matrix shift registers, so it won't halt with lit LEDs.
+ @param core Pointer to Protomatter_core structure.
+*/
+extern void _PM_stop(Protomatter_core *core);
+
+/*!
+ @brief Start or restart a matrix. Initialize counters, configure and
+ start timer.
+ @param core Pointer to Protomatter_core structure.
+*/
+extern void _PM_resume(Protomatter_core *core);
+
+/*!
+ @brief Deallocate memory associated with Protomatter_core structure
+ (e.g. screen data, pin lists for data and rows). Does not
+ deallocate the structure itself.
+ @param core Pointer to Protomatter_core structure.
+*/
+extern void _PM_deallocate(Protomatter_core *core);
+
+/*!
+ @brief Matrix "row handler" that's called by the timer interrupt.
+ Handles row address lines and issuing data to matrix.
+ @param core Pointer to Protomatter_core structure.
+*/
+extern void _PM_row_handler(Protomatter_core *core);
+
+/*!
+ @brief Returns current value of frame counter and resets its value to
+ zero. Two calls to this, timed one second apart (or use math with
+ other intervals), can be used to get a rough frames-per-second
+ value for the matrix (since this is difficult to estimate
+ beforehand).
+ @param core Pointer to Protomatter_core structure.
+ @return Frame count since previous call to function, as a uint32_t.
+*/
+extern uint32_t _PM_getFrameCount(Protomatter_core *core);
+
+/*!
+ @brief Start (or restart) a timer/counter peripheral.
+ @param tptr Pointer to timer/counter peripheral OR a struct
+ encapsulating information about a timer/counter
+ periph (architecture-dependent).
+ @param period Timer 'top' / rollover value.
+*/
+extern void _PM_timerStart(void *tptr, uint32_t period);
+
+/*!
+ @brief Stop timer/counter peripheral.
+ @param tptr Pointer to timer/counter peripheral OR a struct
+ encapsulating information about a timer/counter
+ periph (architecture-dependent).
+ @return Counter value when timer was stopped.
+*/
+extern uint32_t _PM_timerStop(void *tptr);
+
+/*!
+ @brief Query a timer/counter peripheral's current count.
+ @param tptr Pointer to timer/counter peripheral OR a struct
+ encapsulating information about a timer/counter
+ periph (architecture-dependent).
+ @return Counter value.
+*/
+extern uint32_t _PM_timerGetCount(void *tptr);
+
+/*!
+ @brief Pauses until the next vertical blank to avoid 'tearing' animation
+ (if display is double-buffered). If single-buffered, has no effect.
+ @param core Pointer to Protomatter_core structure.
+*/
+extern void _PM_swapbuffer_maybe(Protomatter_core *core);
+
+#if defined(ARDUINO) || defined(CIRCUITPY)
+
+/*!
+ @brief Converts image data from GFX16 canvas to the matrices weird
+ internal format.
+ @param core Pointer to Protomatter_core structure.
+ @param source Pointer to source image data (see Adafruit_GFX 16-bit
+ canvas type for format).
+ @param width Width of canvas in pixels, as this may be different than
+ the matrix pixel width due to row padding.
+*/
+extern void _PM_convert_565(Protomatter_core *core, uint16_t *source,
+ uint16_t width);
+
+#endif // END ARDUINO || CIRCUITPY
+
+#ifdef __cplusplus
+} // extern "C"
+#endif