diff --git a/README.md b/README.md new file mode 100644 index 0000000..226a5f5 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# ShiftPWM-Redux +Arduino Library for software PWM with shift registers + +This branch optimizes the SPI sending code. + +The new code is about 30% faster than the original. You can use the extra time to either have more foreground load, or to increase the PWM frequency. + + diff --git a/ShiftPWM.h b/ShiftPWM.h index b3d3cd5..68dbfd4 100644 --- a/ShiftPWM.h +++ b/ShiftPWM.h @@ -70,16 +70,99 @@ extern const bool ShiftPWM_balanceLoad; #endif // The macro below uses 3 instructions per pin to generate the byte to transfer with SPI -// Retreive duty cycle setting from memory (ldd, 2 clockcycles) +// Retrieve duty cycle setting from memory (ldd, 2 clockcycles) // Compare with the counter (cp, 1 clockcycle) --> result is stored in carry // Use the rotate over carry right to shift the compare result into the byte. (1 clockcycle). -#define add_one_pin_to_byte(sendbyte, counter, ledPtr) \ -{ \ - unsigned char pwmval=*ledPtr; \ - asm volatile ("cp %0, %1" : /* No outputs */ : "r" (counter), "r" (pwmval): ); \ - asm volatile ("ror %0" : "+r" (sendbyte) : "r" (sendbyte) : ); \ -} +// len is the number of 8-bit bytes to send out the SPI +// buf points to the last byte of a buffer of values to compare and send. The last byte in the buffer is sent first. +// counter is threshold. Each byte in the buff is compared to count and if it is greater, then a 1 bit is sent. + +static __attribute__ ((always_inline)) void send_spi_bytes( unsigned len, unsigned char counter, const unsigned char *buf, const unsigned char imask, const unsigned char step ) +{ + register unsigned char sendchar; + + asm volatile ( + + "LOOP_LEN_%=: \n\t" + + // Shift PWM Bit #0 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \r\n" + + // Shift PWM Bit #1 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \r\n" + + // Shift PWM Bit #2 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \r\n" + + // Shift PWM Bit #3 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \r\n" + + // Shift PWM Bit #4 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \r\n" + + // Shift PWM Bit #5 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \r\n" + + // Shift PWM Bit #6 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \r\n" + + // Shift PWM Bit #7 + "ld __tmp_reg__, -%a[buf] \n\t" + "cp %[counter], __tmp_reg__ \n\t" + "ror %[sendchar] \n\t" + + // Potentially invert bits + "eor %[sendchar],%[imask] \n\t" + + // Check that previous send completed + "WAIT_SPI_%=: \n\t" + "in __tmp_reg__,%[spsr] \n\t" + "sbrs __tmp_reg__,%[spif] \n\t" + "rjmp WAIT_SPI_%= \n\t" + + // Send new byte + "out %[spdr], %[sendchar] \n\t" + + // repeat until all shift registers are filled + "sbiw %[len], 1 \n\t" + "brne LOOP_LEN_%= \n\t" + + : // Outputs: + [sendchar] "=&r" (sendchar) // working register + + : // Inputs: + [buf] "e" (buf), // pointer to buffer + [len] "r" (len), // length of buffer + [counter] "r" (counter), // current threshold + [imask] "r" (imask), // XORed to final byte before sent to potentially invert bits + [step] "r" (step), // increment counter by this much for each byte sent for load balancing + + // Constants + [spdr] "I" (_SFR_IO_ADDR(SPDR)), // SPI data register + [spsr] "I" (_SFR_IO_ADDR(SPSR)), // SPI status register + [spif] "I" (SPIF) // SPI interrupt flag (send complete) (should really be a constraint for 0-7 value) + + : // Clobbers + "cc" // special name that indicates that flags may have been clobbered + + ); + +} // The inline function below uses normal output pins to send one bit to the SPI port. // This function is used in the noSPI mode and is useful if you need the SPI port for something else. // It is a lot 2.5x slower than the SPI version. @@ -102,7 +185,7 @@ static inline void ShiftPWM_handleInterrupt(void){ // Look up which bit of which output register corresponds to the pin. // This should be constant, so the compiler can optimize this code away and use sbi and cbi instructions // The compiler only knows this if this function is compiled in the same file as the pin setting. - // That is the reason the full funcion is in the header file, instead of only the prototype. + // That is the reason the full function is in the header file, instead of only the prototype. // If this function is defined in cpp files of the library, it is compiled seperately from the main file. // The compiler does not recognize the pins/ports as constant and sbi and cbi instructions cannot be used. @@ -128,29 +211,24 @@ static inline void ShiftPWM_handleInterrupt(void){ #ifndef SHIFTPWM_NOSPI //Use SPI to send out all bits - SPDR = 0; // write bogus bit to the SPI, because in the loop there is a receive before send. - for(unsigned char i = ShiftPWM.m_amountOfRegisters; i>0;--i){ // do a whole shift register at once. This unrolls the loop for extra speed - unsigned char sendbyte; // no need to initialize, all bits are replaced - if(ShiftPWM_balanceLoad){ - counter +=8; // distribute the load by using a shifted counter per shift register - } - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - add_one_pin_to_byte(sendbyte, counter, --ledPtr); - - while (!(SPSR & _BV(SPIF))); // wait for last send to finish and retreive answer. Retreive must be done, otherwise the SPI will not work. - if(ShiftPWM_invertOutputs){ - sendbyte = ~sendbyte; // Invert the byte if needed. - } - SPDR = sendbyte; // Send the byte to the SPI - } + + // write bogus byte to the SPI because the SPI hardware only signals "transmit complete" rather than "transmitter idle". + // This byte will harmlessly be shifted off the edge of the final shift register + SPDR = 0; + + // If we are inverting outputs, then XORing the output byte with 0xff will flip all bits. XORing with 0x00 does nothing (except waste a cycle). + // TODO: Invert the bits when they are put into the buffer so we only do it once rather than on every INT + const unsigned char imask = ShiftPWM_invertOutputs? 0xff: 0x00; + + // This value gets added to the counter after each byte is sent to avoid having all loads turning on and off at the same time. + // TODO: Is there any use case where you would *not* want to balance the output? If not, get rid of the option and calculate the best step value to most evenly distribute load over all outputs + const unsigned char step = ShiftPWM_balanceLoad ? 8 : 0; + + send_spi_bytes(ShiftPWM.m_amountOfRegisters,counter,ledPtr, imask, step); + while (!(SPSR & _BV(SPIF))); // wait for last send to complete. + + #else //Use port manipulation to send out all bits for(unsigned char i = ShiftPWM.m_amountOfRegisters; i>0;--i){ // do one shift register at a time. This unrolls the loop for extra speed @@ -170,7 +248,8 @@ static inline void ShiftPWM_handleInterrupt(void){ // Write shift register latch clock high bitSet(*latchPort, latchBit); - + + // TODO: spin backwards to save one cycle on compare if(ShiftPWM.m_counter