diff --git a/multi/stm32l4-multi/libmulti/include/libmulti/libdma.h b/multi/stm32l4-multi/libmulti/include/libmulti/libdma.h index 9e455de0..fd19d3b9 100644 --- a/multi/stm32l4-multi/libmulti/include/libmulti/libdma.h +++ b/multi/stm32l4-multi/libmulti/include/libmulti/libdma.h @@ -25,19 +25,57 @@ enum { dma_per2mem = 0, dma_mem2per }; -enum { dma_spi = 0, dma_uart, dma_tim_upd }; +enum libdma_peripheral_type { dma_spi = 0, dma_uart, dma_tim_upd }; -enum { dma_ht = (1 << 0), dma_tc = (1 << 1) }; +enum libdma_interrupt_flags { dma_ht = (1 << 0), dma_tc = (1 << 1) }; -enum {dma_priorityLow = 0, dma_priorityMedium, dma_priorityHigh, dma_priorityVeryHigh }; +enum libdma_transfer_priority {dma_priorityLow = 0, dma_priorityMedium, dma_priorityHigh, dma_priorityVeryHigh }; enum { dma_modeNormal = 0, dma_modeNoBlock }; /* clang-format on */ +typedef void libdma_callback_t(void *arg, int type); + + +#define LIBXPDMA_ACQUIRE_FORCE_HPDMA (1 << 0) /* Force acquire channels on a HPDMA controller */ +#define LIBXPDMA_ACQUIRE_FORCE_GPDMA (1 << 1) /* Force acquire channels on a GPDMA controller */ +#define LIBXPDMA_ACQUIRE_2D_PER2MEM (1 << 2) /* Acquire a 2D DMA channel for per2mem direction */ +#define LIBXPDMA_ACQUIRE_2D_MEM2PER (1 << 3) /* Acquire a 2D DMA channel for mem2per direction */ + + +#define LIBXPDMA_TRANSFORM_ALIGNR0 (0 << 1) /* Right-align data, zero-extend if necessary */ +#define LIBXPDMA_TRANSFORM_ALIGNRS (1 << 1) /* Right-align data, sign-extend if necessary */ +#define LIBXPDMA_TRANSFORM_PACK (2 << 1) /* Treat data as packed and output at destination width */ +#define LIBXPDMA_TRANSFORM_ALIGNL (3 << 1) /* Left-align data */ +#define LIBXPDMA_TRANSFORM_SSWAPB (1 << 3) /* Swap middle bytes in word in source (e.g. 0xAABBCCDD => 0xAACCBBDD) */ +#define LIBXPDMA_TRANSFORM_SWAPB (1 << 4) /* Swap bytes in destination (e.g. 0xAABBCCDD => 0xDDCCBBAA) */ +#define LIBXPDMA_TRANSFORM_SWAPH (1 << 5) /* Swap half-words in destination (e.g. 0xAABBCCDD => 0xCCDDBBAA)*/ +#define LIBXPDMA_TRANSFORM_SWAPW (1 << 6) /* Swap words in destination (for 64-bit destinations only) */ + + +typedef struct { + void *buf; /* Pointer to buffer in memory */ + size_t bufSize; /* Size of buffer */ + uint8_t elSize_log; /* log2 of size of each element in bytes */ + uint8_t burstSize; /* Number of elements in transfer burst */ + uint8_t increment; /* Increment buffer pointer after each burst (1 - true, 0 - false) */ + uint8_t isCached; /* Buffer is in cached memory */ + uint16_t transform; /* Bitfield of LIBXPDMA_TRANSFORM_* flags */ +} libdma_transfer_buffer_t; + + +typedef struct { + void *addr; /* Pointer to peripheral's memory */ + uint8_t elSize_log; /* log2 of size of each element in bytes */ + uint8_t burstSize; /* Number of elements in transfer burst */ + uint8_t increment; /* Increment peripheral pointer after each burst (1 - true, 0 - false) */ +} libdma_peripheral_config_t; + + /* NOTE: Synchronization on an individual channel must be done externally by the user. */ @@ -72,7 +110,7 @@ int libdma_rxAsync(const struct libdma_per *per, void *rxMAddr, size_t len, vola /* Receive infinite rx into circular buffer. Fn is called at each Transfer Complete and Half Transfer interrupt. */ -int libdma_infiniteRxAsync(const struct libdma_per *per, void *rxMAddr, size_t len, void fn(void *arg, int type), void *arg); +int libdma_infiniteRxAsync(const struct libdma_per *per, void *rxMAddr, size_t len, libdma_callback_t *fn, void *arg); /* UTILITY FUNCTIONS */ @@ -88,4 +126,88 @@ int libdma_acquirePeripheral(int per, unsigned int num, const struct libdma_per int libdma_init(void); +/* Reserve DMA channels for the given peripheral. + * * per - one of `enum libdma_peripheral_type` + * * num - number of peripheral + * * flags - bitfield of `LIBXPDMA_ACQUIRE_*` + * * perP - pointer to where the peripheral's handle will be stored. + * Returns -EINVAL on invalid combination of per/num, -EBUSY if requested channels are not available + */ +int libxpdma_acquirePeripheral(int per, unsigned int num, uint32_t flags, const struct libdma_per **perP); + + +/* Configure DMA channel for use + * * per - peripheral's handle + * * dir - direction of transfer + * * priority - priority of transfers on the channel, one of `libdma_transfer_priority`. + * * cond - pointer to conditional that will be signaled upon interrupt. + * If NULL, waiting for transfer condition can be handled by the driver. + * Functions `libxpdma_startTransferWithFlag` and `libxpdma_waitForTransaction` will be usable. + * Otherwise, the user will be responsible for synchronization. + * `libxpdma_startTransferWithFlag` and `libxpdma_waitForTransaction` cannot be used. + */ +int libxpdma_configureChannel(const struct libdma_per *per, int dir, int priority, handle_t *cond); + + +/* Configure peripheral side of the transfer */ +int libxpdma_configurePeripheral(const struct libdma_per *per, int dir, const libdma_peripheral_config_t *cfg); + + +/* Configure memory side of the transfer + * * per - peripheral's handle + * * dir - direction of transfer + * * isCircular - create a circular transfer + * * buffers - pointer to array of buffers + * * n_buffers - number of elements in `buffers` array + */ +int libxpdma_configureMemory(const struct libdma_per *per, int dir, int isCircular, const libdma_transfer_buffer_t *buffers, size_t n_buffers); + + +/* Start transaction, notify of progress using a callback + * * per - peripheral's handle + * * dir - direction of transfer + * * intrFlags - bitfield of `enum libdma_interrupt_flags` + * * cb - callback function + * * cb_arg - argument for callback function + */ +int libxpdma_startTransferWithCallback(const struct libdma_per *per, int dir, int intrFlags, libdma_callback_t *cb, void *cb_arg); + + +/* Start transaction, notify of progress using a flag in memory + * * per - peripheral's handle + * * dir - direction of transfer + * * doneFlag - pointer to flag in memory. Will be set to 0 upon entry, != 0 when transfer is complete. + */ +int libxpdma_startTransferWithFlag(const struct libdma_per *per, int dir, volatile int *doneFlag); + + +/* Wait for transaction to finish. Can wait on both transfer directions at once. + * If either flag pointer is NULL, the function won't wait on transfer in that direction. + * * per - peripheral's handle + * * flagMem2Per - pointer to flag in memory for `dma_per2mem` direction. Can be NULL. + * * flagPer2Mem - pointer to flag in memory for `dma_mem2per` direction. Can be NULL. + * * timeout - ms to wait for transfer to finish or 0 to wait indefinitely + */ +int libxpdma_waitForTransaction(const struct libdma_per *per, volatile int *flagMem2Per, volatile int *flagPer2Mem, time_t timeout); + + +/* Cancel a transfer in progress and reset the channel. + * Can be used to stop infinite transfer or to clean up in case of an error. + */ +int libxpdma_cancelTransfer(const struct libdma_per *per, int dir); + + +/* Returns size of data remaining in the current buffer. + * NOTE: During circular transfers, once a buffer is finished the next one will be loaded. + * For this reason, once transfer complete interrupt occurs this function will return the size of the buffer instead of 0. + */ +ssize_t libxpdma_bufferRemaining(const struct libdma_per *per, int dir); + + +/* Allocate DMA-capable (uncached) memory. + * For now to we assume DMA buffers will be allocated once at the start of the program as needed + * and for this reason no corresponding `libdma_free` function exists. + */ +void *libdma_malloc(size_t size); + #endif diff --git a/multi/stm32l4-multi/libmulti/libgpdma.c b/multi/stm32l4-multi/libmulti/libgpdma.c index 92153c4f..9dc63b2b 100644 --- a/multi/stm32l4-multi/libmulti/libgpdma.c +++ b/multi/stm32l4-multi/libmulti/libgpdma.c @@ -13,21 +13,23 @@ #include #include #include +#include #include +#include +#include #include "../common.h" #include "../stm32l4-multi.h" #include "libmulti/libdma.h" +#define DMA_SYSPAGE_MAP_NAME "dmamem" + #define DMA_CTRL_GPDMA1 0 #define DMA_CTRL_HPDMA1 1 #define DMA_NUM_CONTROLLERS 2 /* Channels 12..15 are capable of 2D transfers, others can only do linear transfers */ -#define DMA_NUM_CHANNELS 16 -/* Length of transfer in bytes from which a transfer is considered "long" and - * will be carried out with interrupts (otherwise polling will be used). - * TODO: Value needs to be tested and optimized empirically. */ -#define DMA_LONG_TRANSFER_THRESHOLD 24 +#define DMA_NUM_CHANNELS 16 +#define DMA_2D_CAPABLE_MASK ((1 << 16) - (1 << 12)) enum dma_reqs { @@ -206,22 +208,10 @@ enum xpdma_cx_regs { }; -/* Destination Wider than Source */ -enum xpdma_padalign_dws { - xpdma_padalign_dws_right_zero = 0x0, /* Source right-aligned, zero-extended (0xAB => 0x00AB) */ - xpdma_padalign_dws_right_sign = 0x1, /* Source right-aligned, sign-extended (0xAB => 0xFFAB) */ - xpdma_padalign_dws_pack = 0x2, /* Source read into FIFO, output little-endian at destination width (0xAB, 0xCD => 0xCDAB) */ -}; - -/* Source Wider than Destination */ -enum xpdma_padalign_swd { - xpdma_padalign_swd_right = 0x0, /* Right-aligned (truncated on the left) (0xABCD => 0xCD) */ - xpdma_padalign_swd_left = 0x1, /* Left-aligned (truncated on the right) (0xABCD => 0xAB) */ - xpdma_padalign_swd_unpack = 0x2, /* Source read into FIFO, output little-endian at destination width (0xABCD => 0xCD, 0xAB) */ -}; - -#define XPDMA_CXTR1_SINC (1 << 3) -#define XPDMA_CXTR1_DINC (1 << 19) +/* CxTR1 register is slightly weird - the upper 16 bits mostly refer to the destination and lower 16 bits mostly refer to the source. + * However, bits 29:26 and 13:10 refer to the transfer as a whole (byte exchange, padding and alignment). */ +#define XPDMA_CXTR1_INDIVIDUAL_BITS 0xc3ffUL +#define XPDMA_CXTR1_COMMON_BITS 0x3c003c00UL #define XPDMA_CXTR2_TCEM_BLOCK (0 << 30) /* Generate TC event on block completion, HT on half block */ #define XPDMA_CXTR2_TCEM_RBLOCK (1 << 30) /* Generate TC event on repeated block completion, HT on half repeated block */ @@ -248,15 +238,20 @@ enum xpdma_padalign_swd { #define XPDMA_ERRFLAGS (XPDMA_USEF | XPDMA_ULEF | XPDMA_DTEF) #define XPDMA_IDLEF (1 << 0) /* Channel idle */ -#define DMA_USE_TC_IRQ (1 << 0) -#define DMA_USE_HT_IRQ (1 << 1) -#define DMA_CIRCULAR (1 << 2) -#define DMA_OVERRIDE_SINC_OFF (1 << 3) -#define DMA_OVERRIDE_DINC_OFF (1 << 4) +#define XPDMA_CXLLR_UT1 (1 << 31) +#define XPDMA_CXLLR_UT2 (1 << 30) +#define XPDMA_CXLLR_UB1 (1 << 29) +#define XPDMA_CXLLR_USA (1 << 28) +#define XPDMA_CXLLR_UDA (1 << 27) +#define XPDMA_CXLLR_UT3 (1 << 26) /* 2D DMA only */ +#define XPDMA_CXLLR_UB2 (1 << 25) /* 2D DMA only */ +#define XPDMA_CXLLR_ULL (1 << 16) + +#define XPDMA_LISTBUF_SIZE 16 /* Maximum size of linked list in memory in words */ struct libdma_perSetup { - uint8_t dma; /* One of DMA_CTRL_* */ + uint8_t defDMA; /* One of DMA_CTRL_* */ uint8_t requests[dma_mem2per + 1]; /* One of dma_req_* */ uint8_t portPer; /* DMA controller port to use for peripheral communication */ uint8_t portMem; /* DMA controller port to use for memory communication */ @@ -266,6 +261,7 @@ struct libdma_perSetup { struct libdma_per { const struct libdma_perSetup *setup; + uint8_t dma; /* Controller selected for the given peripheral */ uint8_t chns[dma_mem2per + 1]; /* Channel used for a given direction. Value of DMA_NUM_CHANNELS signifies channel is not in use. */ }; @@ -277,26 +273,39 @@ struct libdma_per { */ static const struct libdma_perSetup libdma_persUart[] = { - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_usart1_rx, dma_req_usart1_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_usart2_rx, dma_req_usart2_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_usart3_rx, dma_req_usart3_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_uart4_rx, dma_req_uart4_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_uart5_rx, dma_req_uart5_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_usart6_rx, dma_req_usart6_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_uart7_rx, dma_req_uart7_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_uart8_rx, dma_req_uart8_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_uart9_rx, dma_req_uart9_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_usart10_rx, dma_req_usart10_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [usart1] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_usart1_rx, dma_req_usart1_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [usart2] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_usart2_rx, dma_req_usart2_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [usart3] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_usart3_rx, dma_req_usart3_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [uart4] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_uart4_rx, dma_req_uart4_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [uart5] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_uart5_rx, dma_req_uart5_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [usart6] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_usart6_rx, dma_req_usart6_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [uart7] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_uart7_rx, dma_req_uart7_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [uart8] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_uart8_rx, dma_req_uart8_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [uart9] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_uart9_rx, dma_req_uart9_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [usart10] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_usart10_rx, dma_req_usart10_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, }; static const struct libdma_perSetup libdma_persSpi[] = { - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_spi1_rx, dma_req_spi1_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_spi2_rx, dma_req_spi2_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_spi3_rx, dma_req_spi3_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_spi4_rx, dma_req_spi4_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_spi5_rx, dma_req_spi5_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, - { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_spi6_rx, dma_req_spi6_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [spi1] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_spi1_rx, dma_req_spi1_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [spi2] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_spi2_rx, dma_req_spi2_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [spi3] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_spi3_rx, dma_req_spi3_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [spi4] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_spi4_rx, dma_req_spi4_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [spi5] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_spi5_rx, dma_req_spi5_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, + [spi6] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_spi6_rx, dma_req_spi6_tx }, .portPer = 1, .portMem = 0, .valid = 1 }, +}; + + +static const struct libdma_perSetup libdma_persTimUpd[] = { + [pwm_tim1] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim1_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim2] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim2_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim3] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim3_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim4] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim4_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim5] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim5_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim8] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim8_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim15] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim15_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim16] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim16_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, + [pwm_tim17] = { .defDMA = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim17_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, }; @@ -309,17 +318,16 @@ typedef struct { uint32_t llr; } libdma_chn_setup_t; -static const struct libdma_perSetup libdma_persTimUpd[] = { - [pwm_tim1] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim1_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim2] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim2_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim3] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim3_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim4] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim4_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim5] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim5_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim8] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim8_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim15] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim15_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim16] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim16_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, - [pwm_tim17] = { .dma = DMA_CTRL_GPDMA1, .requests = { dma_req_invalid, dma_req_tim17_upd }, .portPer = 1, .portMem = 0, .valid = 1 }, -}; + +/* Type of buffer used for making linked lists, will be allocated in non-cached memory */ +typedef uint32_t libdma_listbufs_t[DMA_NUM_CONTROLLERS][DMA_NUM_CHANNELS][XPDMA_LISTBUF_SIZE]; + + +#define MAX_BUFS_PER_TRANSACTION 8 +typedef struct { + const void *buf; + ssize_t dirSize; /* 0 => don't perform operation (end of list), < 0 => perform per2mem, > 0 => perform mem2per */ +} libdma_cacheOp_t; static const struct dma_setup { @@ -344,18 +352,32 @@ static struct dma_ctrl { struct dma_channel { handle_t irqLock; handle_t cond; + handle_t intr; libdma_chn_setup_t cx; - void (*cb)(void *arg, int type); + libdma_callback_t *cb; void *cb_arg; + libdma_cacheOp_t cacheOps[MAX_BUFS_PER_TRANSACTION]; uint8_t priority; + /* libxpdma_* API functions configure the memory data size and increment at a different point than + * older libdma_* functions, so we need to store them in the driver */ + struct { + uint8_t msize; + uint8_t minc; + } oldAPI; } chns[DMA_NUM_CHANNELS]; - struct libdma_per pers[DMA_NUM_CHANNELS]; - uint32_t chanFree; /* Bitmap of free channels */ + struct libdma_per pers[DMA_NUM_CHANNELS]; /* Each peripheral takes at least 1 channel, so we can have at most DMA_NUM_CHANNELS peripherals */ + uint32_t chanFree; /* Bitmap of free channels */ handle_t takenLock; } dma_ctrl[DMA_NUM_CONTROLLERS]; static struct { bool initialized; + libdma_listbufs_t *listbufs; + void *dmaMemPtr; + size_t dmaMemSz; + handle_t dmaAllocMutex; + void *dmaAllocPtr; + size_t dmaAllocSz; } dma_common; @@ -368,6 +390,41 @@ static inline bool libdma_hasChannelFinished(volatile uint32_t *chnBase) } +/* Utility function for verifying arguments and getting the DMA channel */ +static bool libxpdma_getDmaAndChannel(const struct libdma_per *per, int dir, int *dma_out, int *chn_out) +{ + if (per == NULL) { + return false; + } + + if ((dir != dma_per2mem) && (dir != dma_mem2per)) { + return false; + } + + *dma_out = per->dma; + *chn_out = per->chns[dir]; + + if (*chn_out == DMA_NUM_CHANNELS) { + /* Channel for this direction not allocated */ + return false; + } + + return true; +} + + +/* Check if a buffer is inside the DMA-capable memory range */ +static bool libxpdma_isInsideDMAMemory(const void *addr, size_t sz) +{ + uintptr_t addr_start = (uintptr_t)addr; + uintptr_t addr_end = (uintptr_t)addr + sz; + bool lowerBound = addr_start >= (uintptr_t)dma_common.dmaMemPtr; + bool noOverflow = addr_end >= addr_start; + bool upperBound = addr_end <= ((uintptr_t)dma_common.dmaMemPtr + dma_common.dmaMemSz); + return lowerBound && noOverflow && upperBound; +} + + static inline bool libdma_irqToChannel(int irq, unsigned int *dma, unsigned int *chn) { if ((irq >= gpdma1_ch0_irq) && (irq <= gpdma1_ch15_irq)) { @@ -404,28 +461,87 @@ static inline volatile uint32_t *libdma_channelBase(int dma, unsigned int chn) } -static void libdma_cacheOpMem2Per(void *addr, size_t sz) +static void libdma_cacheOpMem2Per(const void *addr, size_t sz) { platformctl_t pctl; pctl.action = pctl_set; pctl.type = pctl_cleanDCache; - pctl.opDCache.addr = addr; + pctl.opDCache.addr = (void *)addr; pctl.opDCache.sz = sz; platformctl(&pctl); } -static void libdma_cacheOpPer2Mem(void *addr, size_t sz) +static void libdma_cacheOpPer2Mem(const void *addr, size_t sz) { platformctl_t pctl; pctl.action = pctl_set; pctl.type = pctl_cleanInvalDCache; - pctl.opDCache.addr = addr; + pctl.opDCache.addr = (void *)addr; pctl.opDCache.sz = sz; platformctl(&pctl); } +static void libxpdma_performCacheOps(int dma, int chn) +{ + libdma_cacheOp_t *ops = dma_ctrl[dma].chns[chn].cacheOps; + for (size_t i = 0; i < MAX_BUFS_PER_TRANSACTION; i++) { + if (ops[i].dirSize < 0) { + libdma_cacheOpPer2Mem(ops[i].buf, -ops[i].dirSize); + } + else if (ops[i].dirSize == 0) { + break; + } + else { + libdma_cacheOpMem2Per(ops[i].buf, ops[i].dirSize); + } + } +} + + +static int libxpdma_addCacheOp(int dma, int chn, const void *buf, size_t size, int dir) +{ + if (size > INT32_MAX) { + return -EINVAL; + } + + libdma_cacheOp_t *ops = dma_ctrl[dma].chns[chn].cacheOps; + ssize_t dirSize = (dir == dma_per2mem) ? -((ssize_t)size) : size; + for (size_t i = 0; i < MAX_BUFS_PER_TRANSACTION; i++) { + if (ops[i].buf == buf && ops[i].dirSize == dirSize) { + return EOK; + } + + if (ops[i].dirSize == 0) { + ops[i].buf = buf; + ops[i].dirSize = dirSize; + return EOK; + } + } + + return -ENOMEM; +} + + +static void libxpdma_clearCacheOps(int dma, int chn) +{ + memset(dma_ctrl[dma].chns[chn].cacheOps, 0, sizeof(dma_ctrl[dma].chns[chn].cacheOps)); +} + + +static void libdma_setLBAR(int dma, int chn, void *addr) +{ + platformctl_t pctl; + pctl.action = pctl_set; + pctl.type = pctl_dmaLinkBaseAddr; + pctl.dmaLinkBaseAddr.dev = dma_setup[dma].pctl; + pctl.dmaLinkBaseAddr.channel = chn; + pctl.dmaLinkBaseAddr.addr = ((uint32_t)addr) & 0xffff0000; + platformctl(&pctl); +} + + static int libdma_irqHandler(unsigned int n, void *arg) { unsigned int dma, chn; @@ -472,15 +588,61 @@ static void libdma_transferOnceCallback(void *cb_arg, int type) } -static void libdma_setTransferOnceCallback(int dma, int chn, volatile int *doneFlag) +/* Cancel transfer if a transfer is in progress and reset interrupt-related variables */ +static void libdma_cleanupTransfer(int dma, int chn) { - *doneFlag = 0; - dma_ctrl[dma].chns[chn].cb = libdma_transferOnceCallback; - dma_ctrl[dma].chns[chn].cb_arg = (void *)doneFlag; + volatile uint32_t *chnBase = libdma_channelBase(dma, chn); + + dataBarier(); + /* Disable DMA interrupts */ + *(chnBase + xpdma_cxcr) &= ~XPDMA_ALLFLAGS; + dataBarier(); + if (*(chnBase + xpdma_cxcr) & 1) { + /* Channel is still enabled; need to suspend and reset it before it can be used again */ + *(chnBase + xpdma_cxcr) |= (1 << 2); /* Set SUSP flag */ + dataBarier(); + while ((*(chnBase + xpdma_cxcr) & (1 << 2)) == 0) { + /* Wait for hardware to suspend the channel */ + } + + *(chnBase + xpdma_cxcr) |= (1 << 1); /* Set RESET flag */ + dataBarier(); + } + + dma_ctrl[dma].chns[chn].cb = NULL; } -int libdma_acquirePeripheral(int per, unsigned int num, const struct libdma_per **perP) +static int libxpdma_findChannel(uint8_t request, int dir, uint32_t *chanFreePtr, uint32_t flags) +{ + if (request == dma_req_invalid) { + return DMA_NUM_CHANNELS; + } + + uint32_t chanFreeMasked = *chanFreePtr; + bool mask2D = false; + if ((dir == dma_per2mem) && ((flags & LIBXPDMA_ACQUIRE_2D_PER2MEM) != 0)) { + mask2D = true; + } + else if ((dir == dma_mem2per) && ((flags & LIBXPDMA_ACQUIRE_2D_MEM2PER) != 0)) { + mask2D = true; + } + + if (mask2D) { + chanFreeMasked &= DMA_2D_CAPABLE_MASK | (~((1u << DMA_NUM_CHANNELS) - 1)); + } + + int chn = __builtin_ctz(chanFreeMasked); + if (chn >= DMA_NUM_CHANNELS) { + return -EBUSY; + } + + *chanFreePtr &= ~(1u << chn); + return chn; +} + + +int libxpdma_acquirePeripheral(int per, unsigned int num, uint32_t flags, const struct libdma_per **perP) { const struct libdma_perSetup *setups; size_t setupsSize; @@ -505,7 +667,21 @@ int libdma_acquirePeripheral(int per, unsigned int num, const struct libdma_per } const struct libdma_perSetup *setup = &setups[num]; - struct dma_ctrl *ctrl = &dma_ctrl[setup->dma]; + uint32_t dma = setup->defDMA; + if ((flags & (LIBXPDMA_ACQUIRE_FORCE_HPDMA | LIBXPDMA_ACQUIRE_FORCE_GPDMA)) != 0) { + bool findHPDMA = (flags & LIBXPDMA_ACQUIRE_FORCE_HPDMA) != 0; + for (dma = 0; dma < DMA_NUM_CONTROLLERS; dma++) { + if (dma_setup[dma].isHPDMA == findHPDMA) { + break; + } + } + + if (dma >= DMA_NUM_CONTROLLERS) { + return -EINVAL; + } + } + + struct dma_ctrl *ctrl = &dma_ctrl[dma]; mutexLock(ctrl->takenLock); struct libdma_per *p = NULL; @@ -524,24 +700,19 @@ int libdma_acquirePeripheral(int per, unsigned int num, const struct libdma_per /* Find enough unused channels necessary for peripheral */ uint32_t chanFree = ctrl->chanFree; - for (int i = dma_per2mem; i <= dma_mem2per; i++) { - if (setup->requests[i] == dma_req_invalid) { - p->chns[i] = DMA_NUM_CHANNELS; - continue; - } - - uint8_t chn = __builtin_ctz(chanFree); - if (chn >= DMA_NUM_CHANNELS) { + for (int dir = dma_per2mem; dir <= dma_mem2per; dir++) { + int acqResult = libxpdma_findChannel(setup->requests[dir], dir, &chanFree, flags); + if (acqResult < 0) { mutexUnlock(ctrl->takenLock); - return -EBUSY; + return acqResult; } - chanFree &= ~(1u << chn); - p->chns[i] = chn; + p->chns[dir] = acqResult; } ctrl->chanFree = chanFree; p->setup = setup; + p->dma = dma; mutexUnlock(ctrl->takenLock); *perP = p; @@ -549,379 +720,741 @@ int libdma_acquirePeripheral(int per, unsigned int num, const struct libdma_per } -static void libdma_configureChannel( - int dma, - int chn, - int request, - int dir, - int priority, - void *paddr, - int sSize, - int dSize, - int sInc, - int dInc, - int sPort, - int dPort) +int libxpdma_configureChannel(const struct libdma_per *per, int dir, int priority, handle_t *cond) { + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { + return -EINVAL; + } + struct dma_channel *sChn = &dma_ctrl[dma].chns[chn]; - sChn->priority = priority & 0x3; - sChn->cx.tr1 = - /* Destination: */ - (1u << 31) | /* secure transfer */ - ((dPort & 0x1) << 30) | - (0u << 27) | /* no half-word exchange */ - (0u << 26) | /* no byte exchange */ - (0u << 20) | /* burst size = 1 data */ - ((dInc & 0x1) << 19) | - ((dSize & 0x3) << 16) | - /* Source: */ + sChn->priority = priority; + if (sChn->intr == 0) { + handle_t interruptCond = (cond == NULL) ? sChn->cond : *cond; + interrupt(libdma_channelToIRQ(dma, chn), libdma_irqHandler, NULL, interruptCond, &sChn->intr); + } + + sChn->cx.br1 &= 0xffff; /* Zero out upper bits of CxBR1 */ + + return EOK; +} + + +int libxpdma_configurePeripheral(const struct libdma_per *per, int dir, const libdma_peripheral_config_t *cfg) +{ + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { + return -EINVAL; + } + + if (cfg->elSize_log > 2) { + return -EINVAL; + } + + if ((cfg->burstSize == 0) || (cfg->burstSize > 64)) { + return -EINVAL; + } + + uint32_t shift = (dir == dma_mem2per) ? 16 : 0; + struct dma_channel *sChn = &dma_ctrl[dma].chns[chn]; + /* The other half of the register will be configured by libxpdma_configureMemory */ + sChn->cx.tr1 &= ~(XPDMA_CXTR1_INDIVIDUAL_BITS << shift); + uint32_t cxtr1_temp = (1u << 15) | /* secure transfer */ - ((sPort & 0x1) << 14) | - (0u << 13) | /* no byte exchange */ - (0u << 11) | /* padding/alignment - always right-aligned */ - (0u << 4) | /* burst size = 1 data */ - ((sInc & 0x1) << 3) | - ((sSize & 0x3) << 0); - uint32_t dir_bit = (dir == dma_per2mem) ? XPDMA_CXTR2_PER2MEM : XPDMA_CXTR2_MEM2PER; - sChn->cx.tr2 = + ((per->setup->portPer & 1) << 14) | + (((cfg->burstSize - 1) & 0x3f) << 4) | + ((cfg->increment != 0) ? (1 << 3) : 0) | + ((cfg->elSize_log & 0x3) << 0); + sChn->cx.tr1 |= cxtr1_temp << shift; + + /* TCEM bits will be configured by libxpdma_configureMemory */ + sChn->cx.tr2 &= XPDMA_CXTR2_TCEM_MASK; + uint32_t dir_bit = (dir == dma_mem2per) ? XPDMA_CXTR2_MEM2PER : XPDMA_CXTR2_PER2MEM; + sChn->cx.tr2 |= XPDMA_CXTR2_TCEM_BLOCK | XPDMA_CXTR2_PF_NORMAL | XPDMA_CXTR2_PER_BURST | dir_bit | - (request & 0xff); - if (dir == dma_per2mem) { - sChn->cx.sar = (uint32_t)paddr; + (per->setup->requests[dir] & 0xff); + if (dir == dma_mem2per) { + sChn->cx.dar = (uint32_t)cfg->addr; } else { - sChn->cx.dar = (uint32_t)paddr; + sChn->cx.sar = (uint32_t)cfg->addr; } + + return EOK; } -int libdma_configurePeripheral( +static bool libxpdma_setTransform(uint16_t flags, uint32_t *val) +{ + uint8_t ssize = (*val) & 0x3; + uint8_t dsize = (*val >> 16) & 0x3; + uint8_t pamRequest = flags & 0x6; + *val &= ~0x1C003800; + if (ssize < dsize) { + switch (pamRequest) { + case LIBXPDMA_TRANSFORM_ALIGNR0: *val |= (0 << 11); break; + case LIBXPDMA_TRANSFORM_ALIGNRS: *val |= (1 << 11); break; + case LIBXPDMA_TRANSFORM_PACK: *val |= (2 << 11); break; + case LIBXPDMA_TRANSFORM_ALIGNL: return false; + default: return false; /* Should never happen */ + } + } + else if (ssize > dsize) { + switch (pamRequest) { + case LIBXPDMA_TRANSFORM_ALIGNR0: *val |= (0 << 11); break; + case LIBXPDMA_TRANSFORM_ALIGNRS: *val |= (0 << 11); break; + case LIBXPDMA_TRANSFORM_PACK: *val |= (2 << 11); break; + case LIBXPDMA_TRANSFORM_ALIGNL: *val |= (1 << 11); break; + default: return false; /* Should never happen */ + } + } + + *val |= ((flags & LIBXPDMA_TRANSFORM_SSWAPB) != 0) ? (1 << 13) : 0; + *val |= ((flags & LIBXPDMA_TRANSFORM_SWAPB) != 0) ? (1 << 26) : 0; + *val |= ((flags & LIBXPDMA_TRANSFORM_SWAPH) != 0) ? (1 << 27) : 0; + *val |= ((flags & LIBXPDMA_TRANSFORM_SWAPW) != 0) ? (1 << 28) : 0; + return true; +} + + +static ssize_t libxpdma_configureBuffer( + int dma, + int chn, const struct libdma_per *per, + const libdma_transfer_buffer_t *buf, int dir, - int priority, - void *paddr, - int msize, - int psize, - int minc, - int pinc, - handle_t *cond) + libdma_chn_setup_t *setup, + uint32_t *changeMask_out) { - const struct libdma_perSetup *s = per->setup; - int chn = per->chns[dir]; - if (chn == DMA_NUM_CHANNELS) { + if ((buf->bufSize == 0) || (buf->bufSize > DMA_MAX_LEN)) { return -EINVAL; } - handle_t interruptCond = (cond == NULL) ? dma_ctrl[s->dma].chns[chn].cond : *cond; - interrupt(libdma_channelToIRQ(s->dma, chn), libdma_irqHandler, NULL, interruptCond, NULL); - if (dir == dma_per2mem) { - libdma_configureChannel(s->dma, chn, s->requests[dma_per2mem], dir, priority, paddr, psize, msize, pinc, minc, s->portPer, s->portMem); + const uint8_t max_elSize_log = dma_setup[dma].isHPDMA ? 3 : 2; + if (buf->elSize_log > max_elSize_log) { + return -EINVAL; } - else if (dir == dma_mem2per) { - libdma_configureChannel(s->dma, chn, s->requests[dma_mem2per], dir, priority, paddr, msize, psize, minc, pinc, s->portMem, s->portPer); + + if ((buf->burstSize == 0) || (buf->burstSize > 64)) { + return -EINVAL; } - else { + + uint32_t changeMask = 0; + ssize_t n_changes = 0; + uint32_t shift = (dir == dma_mem2per) ? 0 : 16; + /* The other half of the register will be configured by libxpdma_configurePeripheral */ + uint32_t tr1_new = setup->tr1; + tr1_new &= ~(XPDMA_CXTR1_INDIVIDUAL_BITS << shift); + uint32_t tr1_bits = + (1u << 15) | /* secure transfer */ + ((per->setup->portMem & 1) << 14) | + ((buf->burstSize - 1) << 4) | + ((buf->increment != 0) ? (1 << 3) : 0) | + (buf->elSize_log & 0x3); + tr1_new |= tr1_bits << shift; + if (!libxpdma_setTransform(buf->transform, &tr1_new)) { return -EINVAL; } - return EOK; + if (tr1_new != setup->tr1) { + setup->tr1 = tr1_new; + changeMask |= XPDMA_CXLLR_UT1; + n_changes++; + } + + if ((setup->br1 & 0xffff) != buf->bufSize) { + setup->br1 = (setup->br1 & 0xffff0000) | (buf->bufSize & 0xffff); + changeMask |= XPDMA_CXLLR_UB1; + n_changes++; + } + + if (dir == dma_mem2per) { + setup->sar = (uint32_t)buf->buf; + changeMask |= XPDMA_CXLLR_USA; + n_changes++; + } + else { + setup->dar = (uint32_t)buf->buf; + changeMask |= XPDMA_CXLLR_UDA; + n_changes++; + }; + + *changeMask_out = changeMask; + if (buf->isCached != 0) { + int ret = libxpdma_addCacheOp(dma, chn, buf->buf, buf->bufSize, dir); + if (ret < 0) { + return ret; + } + } + + return n_changes; } -static void libdma_prepareTransfer(int dma, int chn, void *maddr, size_t len, int flags) +static inline uint32_t *libxpdma_getListbuf(int dma, int chn) { - volatile uint32_t *chnBase = libdma_channelBase(dma, chn); + return (*dma_common.listbufs)[dma][chn]; +} - dataBarier(); - struct dma_channel *sChn = &dma_ctrl[dma].chns[chn]; - uint32_t cxtr1 = sChn->cx.tr1; - if ((flags & DMA_OVERRIDE_SINC_OFF) != 0) { - cxtr1 &= ~XPDMA_CXTR1_SINC; +static int libxpdma_updateLL(int dma, int chn, libdma_chn_setup_t *setup, ssize_t n_changes, uint32_t changeMask, uint32_t **this_ll, size_t *listbuf_offset) +{ + if (n_changes < 0) { + return n_changes; } - if ((flags & DMA_OVERRIDE_DINC_OFF) != 0) { - cxtr1 &= ~XPDMA_CXTR1_DINC; + changeMask |= XPDMA_CXLLR_ULL; + n_changes++; + size_t i = *listbuf_offset; + if ((i + (size_t)n_changes) > XPDMA_LISTBUF_SIZE) { + return -ENOMEM; } - if ((sChn->cx.tr2 & XPDMA_CXTR2_MEM2PER) != 0) { - sChn->cx.sar = (uint32_t)maddr; + uint32_t *listbuf = libxpdma_getListbuf(dma, chn); + **this_ll = changeMask | (((uint32_t)&listbuf[i]) & 0xffff); + if ((changeMask & XPDMA_CXLLR_UT1) != 0) { + listbuf[i] = setup->tr1; + i++; } - else { - sChn->cx.dar = (uint32_t)maddr; + + if ((changeMask & XPDMA_CXLLR_UB1) != 0) { + listbuf[i] = setup->br1; + i++; } - sChn->cx.tr2 &= ~XPDMA_CXTR2_TCEM_MASK; - sChn->cx.tr2 |= ((flags & DMA_CIRCULAR) != 0) ? XPDMA_CXTR2_TCEM_EACH_LL : XPDMA_CXTR2_TCEM_LAST_LL; - sChn->cx.br1 = len; - sChn->cx.llr = 0; /* Link register == 0 (no list in memory) */ + if ((changeMask & XPDMA_CXLLR_USA) != 0) { + listbuf[i] = setup->sar; + i++; + } - *(chnBase + xpdma_cxlbar) = 0; /* Linked-list base address (unused) */ - *(chnBase + xpdma_cxfcr) = XPDMA_ALLFLAGS; - *(chnBase + xpdma_cxtr1) = cxtr1; - *(chnBase + xpdma_cxtr2) = sChn->cx.tr2; - *(chnBase + xpdma_cxbr1) = sChn->cx.br1; - *(chnBase + xpdma_cxsar) = sChn->cx.sar; - *(chnBase + xpdma_cxdar) = sChn->cx.dar; - *(chnBase + xpdma_cxllr) = sChn->cx.llr; + if ((changeMask & XPDMA_CXLLR_UDA) != 0) { + listbuf[i] = setup->dar; + i++; + } - uint32_t v = *(chnBase + xpdma_cxcr); - v &= ~(0x3 << 22); - v |= (uint32_t)(sChn->priority) << 22; - v &= ~(XPDMA_TCF | XPDMA_HTF); - v |= ((flags & DMA_USE_TC_IRQ) != 0) ? XPDMA_TCF : 0; - v |= ((flags & DMA_USE_HT_IRQ) != 0) ? XPDMA_HTF : 0; - *(chnBase + xpdma_cxcr) = v; - dataBarier(); - *(chnBase + xpdma_cxcr) = v | 1; - dataBarier(); + if ((changeMask & XPDMA_CXLLR_ULL) != 0) { + /* If this is the last element, CxLLR value of 0 will finish transfer. + * Otherwise, this value of 0 will be replaced later. */ + listbuf[i] = 0; + *this_ll = &listbuf[i]; + i++; + } + + *listbuf_offset = i; + return EOK; } -static void libdma_cleanupTransfer(int dma, int chn) +int libxpdma_configureMemory(const struct libdma_per *per, int dir, int isCircular, const libdma_transfer_buffer_t *buffers, size_t n_buffers) { - volatile uint32_t *chnBase = libdma_channelBase(dma, chn); + int dma, chn; + ssize_t ret; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { + return -EINVAL; + } - dataBarier(); - /* Disable DMA interrupts */ - *(chnBase + xpdma_cxcr) &= ~XPDMA_ALLFLAGS; - dataBarier(); - if (*(chnBase + xpdma_cxcr) & 1) { - /* Channel is still enabled; need to suspend and reset it before it can be used again */ - *(chnBase + xpdma_cxcr) |= (1 << 2); /* Set SUSP flag */ - dataBarier(); - while ((*(chnBase + xpdma_cxcr) & (1 << 2)) == 0) { - /* Wait for hardware to suspend the channel */ + if ((buffers == NULL) || (n_buffers == 0)) { + return -EINVAL; + } + + libdma_chn_setup_t lastSetup = dma_ctrl[dma].chns[chn].cx; + lastSetup.tr2 &= ~XPDMA_CXTR2_TCEM_MASK; + lastSetup.tr2 |= (isCircular != 0) ? XPDMA_CXTR2_TCEM_EACH_LL : XPDMA_CXTR2_TCEM_LAST_LL; + + libxpdma_clearCacheOps(dma, chn); + uint32_t unused; + ret = libxpdma_configureBuffer(dma, chn, per, &buffers[0], dir, &lastSetup, &unused); + if (ret < 0) { + return ret; + } + + lastSetup.llr = 0; + dma_ctrl[dma].chns[chn].cx = lastSetup; + + uint32_t *this_ll = &dma_ctrl[dma].chns[chn].cx.llr; + size_t listbuf_offset = 0; + uint32_t changeMask; + for (size_t i = 1; i < n_buffers; i++) { + /* Add linked list element */ + ret = libxpdma_configureBuffer(dma, chn, per, &buffers[i], dir, &lastSetup, &changeMask); + ret = libxpdma_updateLL(dma, chn, &lastSetup, ret, changeMask, &this_ll, &listbuf_offset); + if (ret < 0) { + return ret; } + } - *(chnBase + xpdma_cxcr) |= (1 << 1); /* Set RESET flag */ - dataBarier(); + if (isCircular != 0) { + ret = libxpdma_configureBuffer(dma, chn, per, &buffers[0], dir, &lastSetup, &changeMask); + ret = libxpdma_updateLL(dma, chn, &lastSetup, ret, changeMask, &this_ll, &listbuf_offset); + if (n_buffers > 1) { + /* Loop back to first element in memory */ + *this_ll = dma_ctrl[dma].chns[chn].cx.llr; + } + else { + /* Optimization - if there is only one buffer, we can skip updating LLR - + * updating it would only set it to the same value it had before. */ + dma_ctrl[dma].chns[chn].cx.llr &= ~XPDMA_CXLLR_ULL; + } + + if (ret < 0) { + return ret; + } } - dma_ctrl[dma].chns[chn].cb = NULL; + return EOK; } -static int libdma_transferTimeout(int dma, int chn, void *maddr, size_t len, int dir, int mode, int flags, time_t timeout) +static int libxpdma_startTransaction(const struct libdma_per *per, int dir, int intrFlags, libdma_callback_t *cb, void *cb_arg) { - time_t now, end, condTimeout; - volatile uint32_t *chnBase = libdma_channelBase(dma, chn); - volatile int done = 0; + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { + return -EINVAL; + } - libdma_setTransferOnceCallback(dma, chn, &done); - if (dir == dma_per2mem) { - libdma_cacheOpPer2Mem(maddr, len); + if ((cb != NULL) && (intrFlags == 0)) { + /* Interrupt callback given but no interrupt condition requested */ + return -EINVAL; } - else { - libdma_cacheOpMem2Per(maddr, len); + + volatile uint32_t *chn_base = libdma_channelBase(dma, chn); + /* Channel is currently enabled, cannot reconfigure it now */ + if ((*(chn_base + xpdma_cxcr) & 1) != 0) { + return -EBUSY; } - libdma_prepareTransfer(dma, chn, maddr, len, flags | DMA_USE_TC_IRQ); + libxpdma_performCacheOps(dma, chn); + struct dma_channel *sChn = &dma_ctrl[dma].chns[chn]; + sChn->cb = cb; + sChn->cb_arg = cb_arg; - condTimeout = timeout; - if (timeout != 0) { - gettime(&now, NULL); - end = now + timeout; - } + *(chn_base + xpdma_cxtr1) = dma_ctrl[dma].chns[chn].cx.tr1; + *(chn_base + xpdma_cxtr2) = dma_ctrl[dma].chns[chn].cx.tr2; + *(chn_base + xpdma_cxbr1) = dma_ctrl[dma].chns[chn].cx.br1; + *(chn_base + xpdma_cxsar) = dma_ctrl[dma].chns[chn].cx.sar; + *(chn_base + xpdma_cxdar) = dma_ctrl[dma].chns[chn].cx.dar; + *(chn_base + xpdma_cxllr) = dma_ctrl[dma].chns[chn].cx.llr; - mutexLock(dma_ctrl[dma].chns[chn].irqLock); - while (done == 0) { - condWait(dma_ctrl[dma].chns[chn].cond, dma_ctrl[dma].chns[chn].irqLock, condTimeout); - if (mode == dma_modeNoBlock) { - break; - } + uint32_t v = *(chn_base + xpdma_cxcr); + v &= ~(0x3 << 22); + v |= (sChn->priority & 0x3) << 22; + v &= ~(XPDMA_TCF | XPDMA_HTF); + v |= ((intrFlags & dma_tc) != 0) ? XPDMA_TCF : 0; + v |= ((intrFlags & dma_ht) != 0) ? XPDMA_HTF : 0; + /* Use port 1 to access linked lists (AHB on both HPDMA and GPDMA instances) */ + v |= 1 << 17; + *(chn_base + xpdma_cxcr) = v; + dataBarier(); + *(chn_base + xpdma_cxcr) = v | 1; + dataBarier(); + return EOK; +} + + +int libxpdma_startTransferWithCallback(const struct libdma_per *per, int dir, int intrFlags, libdma_callback_t *cb, void *cb_arg) +{ + return libxpdma_startTransaction(per, dir, intrFlags, cb, cb_arg); +} + + +int libxpdma_startTransferWithFlag(const struct libdma_per *per, int dir, volatile int *doneFlag) +{ + *doneFlag = 0; + return libxpdma_startTransaction(per, dir, dma_tc, libdma_transferOnceCallback, (void *)doneFlag); +} + + +static int libxpdma_waitForChannelIntr(int dma, int chn, volatile int *doneFlag, time_t timeout, time_t end) +{ + int ret = EOK; + struct dma_channel *sChn = &dma_ctrl[dma].chns[chn]; + time_t condTimeout = timeout; + mutexLock(sChn->irqLock); + while (*doneFlag == 0) { + condWait(sChn->cond, sChn->irqLock, condTimeout); if (timeout != 0) { + time_t now; gettime(&now, NULL); if (end <= now) { + ret = -ETIME; break; } + condTimeout = end - now; } } - mutexUnlock(dma_ctrl[dma].chns[chn].irqLock); - if (done == 1) { - return len; - } + mutexUnlock(sChn->irqLock); + /* Channel may have been cleaned up by interrupt, in which case this function call won't do anything */ libdma_cleanupTransfer(dma, chn); - - return len - (*(chnBase + xpdma_cxbr1) & 0xffff); + return ret; } -static int libdma_transferHelperInterrupts(int dma, int rxChn, int txChn, void *rxMAddr, const void *txMAddr, size_t len, int flags) +int libxpdma_waitForTransaction(const struct libdma_per *per, volatile int *flagMem2Per, volatile int *flagPer2Mem, time_t timeout) { - struct dma_channel *rxSChn = &dma_ctrl[dma].chns[rxChn]; - volatile uint32_t *txChnBase = libdma_channelBase(dma, txChn); - volatile int rxDone = 0; + /* Phoenix-RTOS doesn't allow us to wait on two conditionals at the same time, so we do a little trick: + * if we are asked to wait on two channels, we wait on the RX channel's conditional and active poll on the TX channel. + * For our use cases this will be OK (no wasted time) because TX channel will usually finish before RX channel, + * so after RX conditional is signalled we just check the TX channel once and exit. */ + + bool doM2P = flagMem2Per != NULL; + bool doP2M = flagPer2Mem != NULL; + + volatile int *flags[2]; + int dmas[2], chns[2]; + if (doM2P != doP2M) { + /* Waiting on 1 channel */ + int dir = doM2P ? dma_mem2per : dma_per2mem; + flags[0] = doM2P ? flagMem2Per : flagPer2Mem; + if (!libxpdma_getDmaAndChannel(per, dir, &dmas[0], &chns[0])) { + return -EINVAL; + } - if (rxMAddr == NULL) { - return libdma_transferTimeout(dma, txChn, (void *)txMAddr, len, dma_mem2per, dma_modeNormal, flags, 0); + flags[1] = NULL; } + else if (doP2M) { + /* Waiting on 2 channels */ + flags[0] = flagPer2Mem; + if (!libxpdma_getDmaAndChannel(per, dma_per2mem, &dmas[0], &chns[0])) { + return -EINVAL; + } - libdma_setTransferOnceCallback(dma, rxChn, &rxDone); - libdma_cacheOpMem2Per((void *)txMAddr, len); - libdma_cacheOpPer2Mem(rxMAddr, len); + flags[1] = flagMem2Per; + if (!libxpdma_getDmaAndChannel(per, dma_mem2per, &dmas[1], &chns[1])) { + return -EINVAL; + } + } + else { + return EOK; + } - /* Wait on RX transfer using an interrupt + condWait and wait on TX channel by polling. - * Because RX transfer will usually finish last, there will be no long wait on the TX channel. - * This is for two reasons: we avoid unnecessary interrupt handling and condSignal() - * and also Phoenix-RTOS doesn't allow waiting on two conditionals at once. */ - libdma_prepareTransfer(dma, rxChn, rxMAddr, len, flags | DMA_USE_TC_IRQ); - libdma_prepareTransfer(dma, txChn, (void *)txMAddr, len, flags); + time_t end = 0; + if (timeout != 0) { + time_t now; + gettime(&now, NULL); + end = now + timeout; + } - mutexLock(rxSChn->irqLock); - while (rxDone == 0) { - condWait(rxSChn->cond, rxSChn->irqLock, 0); + int ret = libxpdma_waitForChannelIntr(dmas[0], chns[0], flags[0], timeout, end); + if ((ret == EOK) && (flags[1] != NULL)) { + /* Both RX and TX channels given - we already waited for RX, now wait for TX */ + volatile uint32_t *chnBase = libdma_channelBase(dmas[1], chns[1]); + while (!libdma_hasChannelFinished(chnBase)) { + /* Wait for the other channel to finish */ + if (timeout != 0) { + time_t now; + gettime(&now, NULL); + if (end <= now) { + ret = -ETIME; + break; + } + } + } } - mutexUnlock(rxSChn->irqLock); - while (libdma_hasChannelFinished(txChnBase) == 0) { - /* Active-wait for TX channel */ + return ret; +} + + +int libxpdma_cancelTransfer(const struct libdma_per *per, int dir) +{ + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { + return -EINVAL; } - /* RX channel was cleaned up by IRQ */ - libdma_cleanupTransfer(dma, txChn); + libdma_cleanupTransfer(dma, chn); + return EOK; +} + - return len; +ssize_t libxpdma_bufferRemaining(const struct libdma_per *per, int dir) +{ + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { + return -EINVAL; + } + + volatile uint32_t *chnBase = libdma_channelBase(dma, chn); + /* Only bottom 16 bits contain data. */ + return *(chnBase + xpdma_cxbr1) & 0xffff; } -static int libdma_transferHelperNoInterrupts(int dma, int rxChn, int txChn, void *rxMAddr, const void *txMAddr, size_t len, int flags) +/* Get memory that is DMA capable (non-cached). + * We can't do this using mmap(), because this is a NOMMU target so the MAP_UNCACHED flag does nothing. + * Instead, we check the system's memory maps if there's a map with a pre-defined name (`DMA_SYSPAGE_MAP_NAME`). + */ +static int libdma_getDmaMemory(void) { - volatile uint32_t *rxChnBase; - volatile uint32_t *txChnBase = libdma_channelBase(dma, txChn); + void *dmaMemPtr = NULL; + size_t dmaMemSz = 0; + meminfo_t mi; + mi.page.mapsz = -1; + mi.entry.kmapsz = -1; + mi.entry.mapsz = -1; + mi.maps.mapsz = 0; + mi.maps.map = NULL; + meminfo(&mi); + + mi.maps.map = malloc(mi.maps.mapsz * sizeof(mapinfo_t)); + if (mi.maps.map == NULL) { + return -ENOMEM; + } - if (rxMAddr == NULL) { - rxChnBase = NULL; + meminfo(&mi); + for (int i = 0; i < mi.maps.mapsz; i++) { + if (strcmp(mi.maps.map[i].name, DMA_SYSPAGE_MAP_NAME) == 0) { + dmaMemPtr = (void *)mi.maps.map[i].vstart; + dmaMemSz = mi.maps.map[i].vend - mi.maps.map[i].vstart; + break; + } } - else { - rxChnBase = libdma_channelBase(dma, rxChn); - libdma_cacheOpPer2Mem(rxMAddr, len); - libdma_prepareTransfer(dma, rxChn, rxMAddr, len, flags); + + free(mi.maps.map); + if (dmaMemPtr == NULL) { + return -ENODEV; } - libdma_cacheOpMem2Per((void *)txMAddr, len); - libdma_prepareTransfer(dma, txChn, (void *)txMAddr, len, flags); + dma_common.dmaMemPtr = dmaMemPtr; + dma_common.dmaMemSz = dmaMemSz; + return EOK; +} - while (!(libdma_hasChannelFinished(rxChnBase) && libdma_hasChannelFinished(txChnBase))) { - /* Wait for transfer to finish */ - } - if (rxChnBase != NULL) { - libdma_cleanupTransfer(dma, rxChn); +void *libdma_malloc(size_t size) +{ + mutexLock(dma_common.dmaAllocMutex); + if (size > dma_common.dmaAllocSz) { + mutexUnlock(dma_common.dmaAllocMutex); + return NULL; } - libdma_cleanupTransfer(dma, txChn); - - return len; + void *ret = dma_common.dmaAllocPtr; + dma_common.dmaAllocSz -= size; + dma_common.dmaAllocPtr += size; + mutexUnlock(dma_common.dmaAllocMutex); + return ret; } -int libdma_transfer(const struct libdma_per *per, void *rxMAddr, const void *txMAddr, size_t len) +/* Below are functions for compatibility with old DMA API */ + + +int libdma_acquirePeripheral(int per, unsigned int num, const struct libdma_per **perP) { - uint32_t dummyBuf = 0; - int dma = per->setup->dma; - int rxChn = per->chns[dma_per2mem]; - int txChn = per->chns[dma_mem2per]; + return libxpdma_acquirePeripheral(per, num, 0, perP); +} + - if ((len > DMA_MAX_LEN) || (rxChn == DMA_NUM_CHANNELS) || (txChn == DMA_NUM_CHANNELS)) { +int libdma_configurePeripheral( + const struct libdma_per *per, + int dir, + int priority, + void *paddr, + int msize, + int psize, + int minc, + int pinc, + handle_t *cond) +{ + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { return -EINVAL; } - int flags = 0; - if (txMAddr == NULL) { - /* In case no tx buffer is provided, use a dummy value - * and configure DMA not to increment the memory address. */ - txMAddr = &dummyBuf; - flags |= DMA_OVERRIDE_SINC_OFF; + int ret = libxpdma_configureChannel(per, dir, priority, cond); + if (ret < 0) { + return ret; } - if (len > DMA_LONG_TRANSFER_THRESHOLD) { - return libdma_transferHelperInterrupts(dma, rxChn, txChn, rxMAddr, txMAddr, len, flags); - } - else { - return libdma_transferHelperNoInterrupts(dma, rxChn, txChn, rxMAddr, txMAddr, len, flags); + libdma_peripheral_config_t perCfg = { + .addr = paddr, + .elSize_log = psize, + .burstSize = 1, + .increment = pinc, + }; + ret = libxpdma_configurePeripheral(per, dir, &perCfg); + if (ret < 0) { + return ret; } + + dma_ctrl[dma].chns[chn].oldAPI.minc = minc; + dma_ctrl[dma].chns[chn].oldAPI.msize = msize; + + return EOK; } -int libdma_tx(const struct libdma_per *per, const void *txMAddr, size_t len, int mode, time_t timeout) +static int libdma_transferInternal(const struct libdma_per *per, int dir, void *addr, size_t len, int mode, time_t timeout, volatile int *doneFlagPtr) { - int dma = per->setup->dma; - int txChn = per->chns[dma_mem2per]; - - if ((len > DMA_MAX_LEN) || (txChn == DMA_NUM_CHANNELS)) { + int ret; + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dir, &dma, &chn)) { return -EINVAL; } - return libdma_transferTimeout(dma, txChn, (void *)txMAddr, len, dma_mem2per, mode, 0, timeout); -} + libdma_transfer_buffer_t buf = { + .buf = addr, + .bufSize = len, + .elSize_log = dma_ctrl[dma].chns[chn].oldAPI.msize, + .burstSize = 1, + .increment = dma_ctrl[dma].chns[chn].oldAPI.minc, + /* Because the older APIs don't have a way to state if the buffer is cacheable, + * use a heuristic - if it's not in DMA memory, treat it as cacheable. */ + .isCached = libxpdma_isInsideDMAMemory(addr, len) ? 0 : 1, + }; + ret = libxpdma_configureMemory(per, dir, 0, &buf, 1); + if (ret < 0) { + return ret; + } + volatile int doneFlag; + bool doWait = doneFlagPtr == NULL; + if (doWait) { + doneFlagPtr = &doneFlag; + } -int libdma_rx(const struct libdma_per *per, void *rxMAddr, size_t len, int mode, time_t timeout) -{ - int dma = per->setup->dma; - int rxChn = per->chns[dma_per2mem]; + ret = libxpdma_startTransferWithFlag(per, dir, doneFlagPtr); + if (ret < 0) { + return ret; + } - if ((len > DMA_MAX_LEN) || (rxChn == DMA_NUM_CHANNELS)) { - return -EINVAL; + if (!doWait) { + return 0; } - return libdma_transferTimeout(dma, rxChn, rxMAddr, len, mode, dma_per2mem, 0, timeout); + + if (dir == dma_per2mem) { + ret = libxpdma_waitForTransaction(per, NULL, &doneFlag, timeout); + } + else { + ret = libxpdma_waitForTransaction(per, &doneFlag, NULL, timeout); + } + + if (mode == dma_modeNoBlock) { + if (doneFlag != 0) { + return len; + } + + return len - (*(libdma_channelBase(dma, chn) + xpdma_cxbr1) & 0xffff); + } + + return (ret < 0) ? ret : len; } -static int libdma_transferAsync(const struct libdma_per *per, void *maddr, int dir, size_t len, volatile int *doneFlag) +int libdma_transfer(const struct libdma_per *per, void *rxMAddr, const void *txMAddr, size_t len) { - int dma = per->setup->dma; - int chn = per->chns[dir]; - - if ((len > DMA_MAX_LEN) || (chn == DMA_NUM_CHANNELS)) { + int ret; + int txDma, txChn; + if (!libxpdma_getDmaAndChannel(per, dma_mem2per, &txDma, &txChn)) { return -EINVAL; } - libdma_setTransferOnceCallback(dma, chn, doneFlag); - if (dir == dma_per2mem) { - libdma_cacheOpPer2Mem(maddr, len); + libdma_transfer_buffer_t buf; + if (txMAddr == NULL) { + static uint32_t dummy; + buf.buf = &dummy; + buf.bufSize = len; + buf.elSize_log = 0; + buf.burstSize = 1; + buf.increment = 0; + buf.isCached = 0; /* We don't care what is transmitted, so we can skip cache operations */ } else { - libdma_cacheOpMem2Per(maddr, len); + buf.buf = (void *)txMAddr; + buf.bufSize = len; + buf.elSize_log = dma_ctrl[txDma].chns[txChn].oldAPI.msize; + buf.burstSize = 1; + buf.increment = dma_ctrl[txDma].chns[txChn].oldAPI.minc; + buf.isCached = libxpdma_isInsideDMAMemory(txMAddr, len) ? 0 : 1; } - libdma_prepareTransfer(dma, chn, maddr, len, DMA_USE_TC_IRQ); + ret = libxpdma_configureMemory(per, dma_mem2per, 0, &buf, 1); + if (ret < 0) { + return ret; + } - return 0; + volatile int txDoneFlag, rxDoneFlag; + if (rxMAddr != NULL) { + ret = libdma_transferInternal(per, dma_per2mem, rxMAddr, len, dma_modeNormal, 0, &rxDoneFlag); + if (ret < 0) { + return ret; + } + } + + ret = libxpdma_startTransferWithFlag(per, dma_mem2per, &txDoneFlag); + if (ret < 0) { + return ret; + } + + return libxpdma_waitForTransaction(per, &txDoneFlag, (rxMAddr != NULL) ? &rxDoneFlag : NULL, 0); +} + + +int libdma_tx(const struct libdma_per *per, const void *txMAddr, size_t len, int mode, time_t timeout) +{ + return libdma_transferInternal(per, dma_mem2per, (void *)txMAddr, len, mode, timeout, NULL); +} + + +int libdma_rx(const struct libdma_per *per, void *rxMAddr, size_t len, int mode, time_t timeout) +{ + return libdma_transferInternal(per, dma_per2mem, rxMAddr, len, mode, timeout, NULL); } int libdma_txAsync(const struct libdma_per *per, const void *txMAddr, size_t len, volatile int *doneFlag) { - return libdma_transferAsync(per, (void *)txMAddr, dma_mem2per, len, doneFlag); + return libdma_transferInternal(per, dma_mem2per, (void *)txMAddr, len, dma_modeNormal, 0, doneFlag); } int libdma_rxAsync(const struct libdma_per *per, void *rxMAddr, size_t len, volatile int *doneFlag) { - return libdma_transferAsync(per, rxMAddr, dma_per2mem, len, doneFlag); + return libdma_transferInternal(per, dma_per2mem, rxMAddr, len, dma_modeNormal, 0, doneFlag); } int libdma_infiniteRxAsync(const struct libdma_per *per, void *rxMAddr, size_t len, void fn(void *arg, int type), void *arg) { - return -ENOSYS; + if ((rxMAddr == NULL) || (len == 0)) { + return libxpdma_cancelTransfer(per, dma_per2mem); + } + + int dma, chn; + if (!libxpdma_getDmaAndChannel(per, dma_per2mem, &dma, &chn)) { + return -EINVAL; + } + + libdma_transfer_buffer_t buf = { + .buf = rxMAddr, + .bufSize = len, + .elSize_log = dma_ctrl[dma].chns[chn].oldAPI.msize, + .burstSize = 1, + .increment = dma_ctrl[dma].chns[chn].oldAPI.minc, + .isCached = libxpdma_isInsideDMAMemory(rxMAddr, len) ? 0 : 1, + }; + + int ret = libxpdma_configureMemory(per, dma_per2mem, 1, &buf, 1); + if (ret < 0) { + return ret; + } + + return libxpdma_startTransferWithCallback(per, dma_per2mem, dma_tc | dma_ht, fn, arg); } uint16_t libdma_leftToRx(const struct libdma_per *per) { - volatile uint32_t *chnBase = libdma_channelBase(per->setup->dma, per->chns[dma_per2mem]); - /* Only bottom 16 bits contain data. */ - return *(chnBase + xpdma_cxbr1) & 0xffff; + ssize_t ret = libxpdma_bufferRemaining(per, dma_per2mem); + return (ret < 0) ? 0 : (ret & 0xffff); } @@ -931,6 +1464,22 @@ int libdma_init(void) return EOK; } + int ret; + ret = libdma_getDmaMemory(); + if (ret < 0) { + return ret; + } + + if (dma_common.dmaMemSz < sizeof(*dma_common.listbufs)) { + return -ENOMEM; + } + + dma_common.listbufs = dma_common.dmaMemPtr; + + mutexCreate(&dma_common.dmaAllocMutex); + dma_common.dmaAllocPtr = dma_common.dmaMemPtr + sizeof(*dma_common.listbufs); + dma_common.dmaAllocSz = dma_common.dmaMemSz - sizeof(*dma_common.listbufs); + platformctl_t pctl; pctl.action = pctl_set; pctl.type = pctl_dmaPermissions; @@ -947,6 +1496,9 @@ int libdma_init(void) platformctl(&pctl); condCreate(&dma_ctrl[dma].chns[chn].cond); mutexCreate(&dma_ctrl[dma].chns[chn].irqLock); + /* Set base address for channel's linked list buffer */ + libdma_setLBAR(dma, chn, libxpdma_getListbuf(dma, chn)); + *(libdma_channelBase(dma, chn) + xpdma_cxcr) = (1 << 1); /* Reset channel */ } for (size_t per = 0; per < (sizeof(dma_ctrl[dma].pers) / sizeof(dma_ctrl[dma].pers[0])); per++) { diff --git a/multi/stm32l4-multi/libmulti/libspi_n6.c b/multi/stm32l4-multi/libmulti/libspi_n6.c index 7d23f620..c726909a 100644 --- a/multi/stm32l4-multi/libmulti/libspi_n6.c +++ b/multi/stm32l4-multi/libmulti/libspi_n6.c @@ -73,16 +73,83 @@ static void libspi_suspendController(libspi_ctx_t *ctx) } -static int libspi_transactionDMA(libspi_ctx_t *ctx, size_t ignoreBytes, unsigned char *ibuff, const unsigned char *obuff, size_t bufflen) +static int libspi_transactionDMA(libspi_ctx_t *ctx, const uint8_t *preamble, size_t preambleLen, unsigned char *ibuff, const unsigned char *obuff, size_t bufflen) { - int ret; - *(ctx->base + spi_cr1) |= 1 << 9; - ret = libdma_transfer(ctx->per, ibuff, obuff, bufflen); + int ret = 0; + libdma_transfer_buffer_t bufs[2]; + /* Set parameters that will be the same for all buffers */ + bufs[0].elSize_log = 0; + bufs[0].burstSize = 1; + bufs[0].transform = 0; + bufs[1].elSize_log = 0; + bufs[1].burstSize = 1; + bufs[1].transform = 0; + + static uint8_t rxDummy = 0, txDummy = 0; + volatile int rxDone, txDone; + volatile int *rxDonePtr = NULL, *txDonePtr = &txDone; + /* If we don't want to receive, we can just not activate RX DMA */ + if (ibuff != NULL) { + rxDonePtr = &rxDone; + size_t i = 0; + if (preambleLen != 0) { + bufs[i].buf = &rxDummy; + bufs[i].bufSize = preambleLen; + bufs[i].increment = 0; + bufs[i].isCached = 0; + i++; + } + + bufs[i].buf = ibuff; + bufs[i].bufSize = bufflen; + bufs[i].increment = 1; + bufs[i].isCached = 1; + i++; + + ret = (ret < 0) ? ret : libxpdma_configureMemory(ctx->per, dma_per2mem, 0, bufs, i); + ret = (ret < 0) ? ret : libxpdma_startTransferWithFlag(ctx->per, dma_per2mem, rxDonePtr); + } + + size_t i = 0; + if (preambleLen != 0) { + bufs[i].buf = (void *)preamble; + bufs[i].bufSize = preambleLen; + bufs[i].increment = 1; + bufs[i].isCached = 1; + i++; + } + + if (obuff != NULL) { + bufs[i].buf = (void *)obuff; + bufs[i].bufSize = bufflen; + bufs[i].increment = 1; + bufs[i].isCached = 1; + i++; + } + else { + bufs[i].buf = &txDummy; + bufs[i].bufSize = bufflen; + bufs[i].increment = 0; + bufs[i].isCached = 0; + i++; + } + + ret = (ret < 0) ? ret : libxpdma_configureMemory(ctx->per, dma_mem2per, 0, bufs, i); + ret = (ret < 0) ? ret : libxpdma_startTransferWithFlag(ctx->per, dma_mem2per, txDonePtr); + + if (ret >= 0) { + *(ctx->base + spi_cr1) |= 1 << 9; + ret = libxpdma_waitForTransaction(ctx->per, txDonePtr, rxDonePtr, 0); + libspi_suspendController(ctx); + } + else { + libxpdma_cancelTransfer(ctx->per, dma_mem2per); + libxpdma_cancelTransfer(ctx->per, dma_per2mem); + } - libspi_suspendController(ctx); *(ctx->base + spi_cr1) &= ~1; dataBarier(); - return ret; + return (ret < 0) ? ret : bufflen; } @@ -95,7 +162,6 @@ static inline uint8_t libspi_getByteOrZero(const uint8_t *buff, size_t i) int libspi_transaction(libspi_ctx_t *ctx, int dir, unsigned char cmd, unsigned int addr, unsigned char flags, unsigned char *ibuff, const unsigned char *obuff, size_t bufflen) { unsigned int addrsz = (flags >> SPI_ADDRSHIFT) & SPI_ADDRMASK; - unsigned int ignoreBytes = 0; /* Ensure buffers are set to NULL if they are not used */ if (dir == spi_dir_read) { obuff = NULL; @@ -108,48 +174,54 @@ int libspi_transaction(libspi_ctx_t *ctx, int dir, unsigned char cmd, unsigned i *(ctx->base + spi_cr1) |= 1; dataBarier(); - /* TX/RX data register need to be accessed as 8-bit */ - volatile uint8_t *txData = (volatile uint8_t *)(ctx->base + spi_txdr); - volatile uint8_t *rxData = (volatile uint8_t *)(ctx->base + spi_rxdr); - + uint8_t preamble[6]; + unsigned int preambleLen = 0; /* Send command, address and dummy bytes if requested. * They can be sent all at once because FIFO is large enough (at least 8 bytes) */ if ((flags & spi_cmd) != 0) { - *txData = cmd; - ignoreBytes += 1; + preamble[preambleLen] = cmd; + preambleLen += 1; } if (addrsz > 0) { - ignoreBytes += addrsz; if ((flags & spi_addrlsb) == 0) { uint32_t addr_swapped = bswap_32((uint32_t)addr); addr = addr_swapped >> ((4 - addrsz) * 8); } for (unsigned int i = 0; i < addrsz; i++) { - *txData = addr & 0xff; + preamble[preambleLen] = addr & 0xff; + preambleLen += 1; addr >>= 8; } } if ((flags & spi_dummy) != 0) { - ignoreBytes += 1; - *txData = 0; + preamble[preambleLen] = 0; + preambleLen += 1; } /* TODO: to support transactions with preamble, multi-buffer DMA transactions will be necessary */ - if ((ctx->per != NULL) && (ignoreBytes == 0)) { - return libspi_transactionDMA(ctx, ignoreBytes, ibuff, obuff, bufflen); + if (ctx->per != NULL) { + return libspi_transactionDMA(ctx, preamble, preambleLen, ibuff, obuff, bufflen); + } + + /* TX/RX data register need to be accessed as 8-bit */ + volatile uint8_t *txData = (volatile uint8_t *)(ctx->base + spi_txdr); + volatile uint8_t *rxData = (volatile uint8_t *)(ctx->base + spi_rxdr); + + for (size_t i = 0; i < preambleLen; i++) { + *txData = preamble[i]; } uint32_t fifoSize = spiInfo[ctx->spiNum - spi1].fifoSize; - size_t transactionLength = bufflen + ignoreBytes; + size_t transactionLength = bufflen + preambleLen; size_t rx_i = 0; size_t tx_i = 0; bool overflow = false; /* Fill up TX FIFO to the limit */ - while (((tx_i + ignoreBytes) < fifoSize) && (tx_i < bufflen)) { + while (((tx_i + preambleLen) < fifoSize) && (tx_i < bufflen)) { *txData = libspi_getByteOrZero(obuff, tx_i); tx_i++; } @@ -167,8 +239,8 @@ int libspi_transaction(libspi_ctx_t *ctx, int dir, unsigned char cmd, unsigned i } if ((status & SR_RXNE) != 0) { - if ((ibuff != NULL) && (rx_i >= ignoreBytes)) { - ibuff[rx_i - ignoreBytes] = *rxData; + if ((ibuff != NULL) && (rx_i >= preambleLen)) { + ibuff[rx_i - preambleLen] = *rxData; } else { (void)*rxData; @@ -286,13 +358,21 @@ int libspi_init(libspi_ctx_t *ctx, unsigned int spi, int useDma) if (useDma != 0) { libdma_init(); - int err = libdma_acquirePeripheral(dma_spi, spi - spi1, &ctx->per); + int err = libxpdma_acquirePeripheral(dma_spi, spi - spi1, 0, &ctx->per); + err = (err < 0) ? err : libxpdma_configureChannel(ctx->per, dma_mem2per, dma_priorityMedium, NULL); + err = (err < 0) ? err : libxpdma_configureChannel(ctx->per, dma_per2mem, dma_priorityMedium, NULL); + libdma_peripheral_config_t cfg = { + .addr = (void *)(ctx->base + spi_txdr), + .elSize_log = 0, + .burstSize = 1, + .increment = 0, + }; + err = (err < 0) ? err : libxpdma_configurePeripheral(ctx->per, dma_mem2per, &cfg); + cfg.addr = (void *)(ctx->base + spi_rxdr); + err = (err < 0) ? err : libxpdma_configurePeripheral(ctx->per, dma_per2mem, &cfg); if (err < 0) { return err; } - - libdma_configurePeripheral(ctx->per, dma_mem2per, dma_priorityMedium, (void *)(ctx->base + spi_txdr), 0x0, 0x0, 0x1, 0x0, NULL); - libdma_configurePeripheral(ctx->per, dma_per2mem, dma_priorityMedium, (void *)(ctx->base + spi_rxdr), 0x0, 0x0, 0x1, 0x0, NULL); } else { ctx->per = NULL; diff --git a/multi/stm32l4-multi/pwm_n6.c b/multi/stm32l4-multi/pwm_n6.c index 8dddef1c..0c2f2200 100644 --- a/multi/stm32l4-multi/pwm_n6.c +++ b/multi/stm32l4-multi/pwm_n6.c @@ -179,7 +179,6 @@ static const struct { static struct { pwm_tim_data_t timer[pwm_tim_count]; #if USE_DSHOT_DMA - uint32_t dmaTransferBuffer[MAX_DSHOT_DMA + 2]; handle_t dmalock; #endif } pwm_common; @@ -639,6 +638,34 @@ int pwm_get(pwm_tim_id_t timer, pwm_ch_id_t chn, uint32_t *top, uint32_t *compar return EOK; } +static int pwm_initDMAChannel(pwm_tim_id_t timer, pwm_ch_id_t chn) +{ + int res = libxpdma_acquirePeripheral(dma_tim_upd, timer, 0, &(pwm_common.timer[timer].dma_per[chn])); + if ((res < 0) || (pwm_common.timer[timer].dma_per[chn] == NULL)) { + return res; + } + + /* You're supposed to set control DMA-TIM transfers via TIM_DCR and TIM_DMAR registers + * but that failed in testing, so we just give DMA the destination address directly. */ + volatile uint32_t *destination = pwm_setup.timer[timer].base + PWM_CCR_REG(chn); + res = (res < 0) ? res : libxpdma_configureChannel(pwm_common.timer[timer].dma_per[chn], dma_mem2per, dma_priorityVeryHigh, NULL); + libdma_peripheral_config_t cfg = { + .addr = (void *)destination, + .elSize_log = (((PWM_TIM_32BIT >> timer) & 1) != 0) ? 2 : 1, + .burstSize = 1, + .increment = 0, + }; + + res = (res < 0) ? res : libxpdma_configurePeripheral(pwm_common.timer[timer].dma_per[chn], dma_mem2per, &cfg); + if (res < 0) { + return res; + } + + /* Enable TIM DMA request */ + *(pwm_setup.timer[timer].base + tim_dier) |= (1 << 8); + return EOK; +} + int pwm_setBitSequence(pwm_tim_id_t timer, pwm_ch_id_t chn, void *data, uint32_t nbits, uint8_t datasize, int flags) { int res; @@ -665,66 +692,52 @@ int pwm_setBitSequence(pwm_tim_id_t timer, pwm_ch_id_t chn, void *data, uint32_t } #if USE_DSHOT_DMA - /* Limit number of bits so we don't overflow the buffer */ - if (nbits > MAX_DSHOT_DMA) { - return -EINVAL; - } - mutexLock(pwm_common.dmalock); - - for (unsigned int i = 0; i < nbits; i++) { - pwm_common.dmaTransferBuffer[i] = pwm_getUserCompareVal(data, i, datasize); - } - - if (pwm_common.timer[timer].dma_per[chn] == NULL) { - int ret = libdma_acquirePeripheral(dma_tim_upd, timer, &(pwm_common.timer[timer].dma_per[chn])); - if ((ret < 0) || (pwm_common.timer[timer].dma_per[chn] == NULL)) { - mutexUnlock(pwm_common.dmalock); - return ret; - } - - int pSize = (((PWM_TIM_32BIT >> timer) & 1) != 0) ? 2 : 1; - /* You're supposed to set control DMA-TIM transfers via TIM_DCR and TIM_DMAR registers - * but that failed in testing, so we just give DMA the destination address directly. */ - volatile uint32_t *destination = pwm_setup.timer[timer].base + PWM_CCR_REG(chn); - ret = libdma_configurePeripheral( - pwm_common.timer[timer].dma_per[chn], - dma_mem2per, - dma_priorityVeryHigh, - (void *)(destination), - 2, - pSize, - 1, - 0, - NULL); - - if (ret < 0) { - mutexUnlock(pwm_common.dmalock); - return ret; - } - - /* Enable TIM DMA request */ - *(pwm_setup.timer[timer].base + tim_dier) |= (1 << 8); - } - - /* Add two "idle" values at the end of the DMA buffer. - * One value is necessary because after DMA writes the last compare value, `libdma_tx` exits and user may - * disable the timer immediately without waiting for the final cycle to finish. - * Another value is added because compare values are "delayed" - a write to the CCR register after an update event - * will not take effect immediately, but during the next cycle. - * Without those two "idle" values, the last value would not be transmitted and second-to-last value - * would not be transmitted completely. - * TODO: this could be fixed by waiting two timer cycles after DMA finishes - but this may not be enough depending - * on how fast `libdma_tx` returns after DMA is finished. - */ - pwm_common.dmaTransferBuffer[nbits] = 0; - pwm_common.dmaTransferBuffer[nbits + 1] = 0; - + const struct libdma_per *per = pwm_common.timer[timer].dma_per[chn]; + if (per == NULL) { + res = (res < 0) ? res : pwm_initDMAChannel(timer, chn); + per = pwm_common.timer[timer].dma_per[chn]; + } + + static const uint8_t zero_value = 0; + const uint8_t datasize_log = (datasize == 1) ? 0 : ((datasize == 2) ? 1 : 2); + libdma_transfer_buffer_t bufs[2] = { + { + .buf = data, + .bufSize = nbits << datasize_log, + .elSize_log = datasize_log, + .burstSize = 1, + .increment = 1, + .isCached = 1, + .transform = LIBXPDMA_TRANSFORM_ALIGNR0, + }, + /* Add two "idle" values at the end of the DMA buffer. + * One value is necessary because after DMA writes the last compare value, `libdma_tx` exits and user may + * disable the timer immediately without waiting for the final cycle to finish. + * Another value is added because compare values are "delayed" - a write to the CCR register after an update event + * will not take effect immediately, but during the next cycle. + * Without those two "idle" values, the last value would not be transmitted and second-to-last value + * would not be transmitted completely. + * TODO: this could be fixed by waiting two timer cycles after DMA finishes - but this may not be enough depending + * on how fast `libdma_tx` returns after DMA is finished. + */ + { + .buf = (void *)&zero_value, + .bufSize = 2, + .elSize_log = 0, + .burstSize = 1, + .increment = 0, + .isCached = 0, + .transform = LIBXPDMA_TRANSFORM_ALIGNR0, + }, + }; + res = (res < 0) ? res : libxpdma_configureMemory(per, dma_mem2per, 0, bufs, 2); /* Start the PWM channel. Set first duty cycle to 0 - idle state. It will be output until DMA takes over. */ - pwm_set(timer, chn, 0); + res = (res < 0) ? res : pwm_set(timer, chn, 0); + volatile int done = 0; + res = (res < 0) ? res : libxpdma_startTransferWithFlag(per, dma_mem2per, &done); /* TODO: we can calculate a timeout here */ - libdma_tx(pwm_common.timer[timer].dma_per[chn], pwm_common.dmaTransferBuffer, (nbits + 2) * 4, 0, 0); - /* TODO: we may need to wait for the last cycle to finish */ + res = (res < 0) ? res : libxpdma_waitForTransaction(per, &done, NULL, 0); mutexUnlock(pwm_common.dmalock); #else uint16_t firstCompare; @@ -755,7 +768,7 @@ int pwm_setBitSequence(pwm_tim_id_t timer, pwm_ch_id_t chn, void *data, uint32_t mutexUnlock(irq->uevlock); #endif - return EOK; + return res; }