DMAC

From Vita Development Wiki
Jump to navigation Jump to search

Direct Memory Access Controller

The DMA controllers can be used to perform memory copy operations in the background without CPU overhead. Some DMACs can also be used to perform hardware-accelerated cryptography / hashing / RNG. On ARM, the SceKernelDmacMgr module is responsible for commanding the DMACs.

MMIO Interface

/**
 * @name Command Flags
 */
/** @{ */
#define SCE_DMAC_CMD_USE_EXTERNAL_KEY     (0x00000080)

#define SCE_DMAC_CMD_KEYSIZE_64BIT        (0x00000000)
#define SCE_DMAC_CMD_KEYSIZE_128BIT       (0x00000100)
#define SCE_DMAC_CMD_KEYSIZE_192BIT       (0x00000200)
#define SCE_DMAC_CMD_KEYSIZE_256BIT       (0x00000300)

#define SCE_DMAC_CMD_HASH_DIGEST          (0x00000000)
#define SCE_DMAC_CMD_HASH_INIT            (0x00000400)
#define SCE_DMAC_CMD_HASH_RESULT          (0x00000800)
#define SCE_DMAC_CMD_HASH_UPDATE          (0x00000C00)

#define SCE_DMAC_CMD_INTERRUPT            (0x00001000) //!< Fire an interrupt after completion of the tag or job

#define SCE_DMAC_CMD_COHERENT_SRC         (0x01000000) //!< L2 Cache Coherent pTag->src accesses
#define SCE_DMAC_CMD_COHERENT_DST         (0x02000000) //!< L2 Cache Coherent pTag->dst accesses
#define SCE_DMAC_CMD_COHERENT_IV_READ     (0x04000000) //!< L2 Cache Coherent pTag->iv reads
#define SCE_DMAC_CMD_COHERENT_IV_WRITE    (0x08000000) //!< L2 Cache Coherent pTag->iv writes

#define SCE_DMAC_CMD_KEYRING_DST          (0x10000000) //!< Destination is a key ring slot instead of a physical address
/** @} */

/**
 * @name Block size fields
 */
/** @{ */
#define SCE_DMAC_BLOCKSIZE_SRC_SHIFT (0)
#define SCE_DMAC_BLOCKSIZE_SRC_MASK  (0x0000FFFF)

#define SCE_DMAC_BLOCKSIZE_DST_SHIFT (16)
#define SCE_DMAC_BLOCKSIZE_DST_MASK  (0xFFFF0000)
/** @} */

/**
 * @name Status register flags
 */
/** @{ */
#define SCE_DMAC_STAT_BUSY                  (0x00000001)
#define SCE_DMAC_STAT_ABORTED               (0x00000002)
#define SCE_DMAC_STAT_READ_ERROR            (0x00010000)
#define SCE_DMAC_STAT_WRITE_ERROR           (0x00020000)
#define SCE_DMAC_STAT_ILLEGAL_CONFIG_ERROR  (0x00040000)
#define SCE_DMAC_STAT_TAG_ERROR             (0x00080000)
#define SCE_DMAC_STAT_ZERO_BYTE_ERROR       (0x00100000)
/** @} */

/**
 * @name Coherency mask fields
 * 
 * This field, along with the SCE_DMAC_CMD_COHERENT_{SRC/DST} command flags, control cache coherency behavior for the src and dst. \n
 * The actual encoding of the subfields is still unknown. \n
 * Observed values: \n
 * DmacMemcpy - 0x3E7F3 (src: 0x1F3 dst: 0x1F3) \n
 * DmacMemset - 0x3E600 (src: 0x000 dst: 0x1F3) \n
 * SblDmac5   - 0x3FFFF (src: 0x1FF dst: 0x1FF) \n
 */
/** @{ */
#define SCE_DMAC_COHERENCY_MSK_SRC_SHIFT    (0)
#define SCE_DMAC_COHERENCY_MSK_SRC_MASK     (0x000001FF)

#define SCE_DMAC_COHERENCY_MSK_DST_SHIFT    (9)
#define SCE_DMAC_COHERENCY_MSK_DST_MASK     (0x0003FE00)

#define SCE_DMAC_COHERENCY_MSK_SRC_DST_MASK (0x0003FFFF)

#define SCE_DMAC_COHERENCY_MSK_UNK_SHIFT    (18)
#define SCE_DMAC_COHERENCY_MSK_UNK_MASK     (0x07FC0000) //!< This mask is set internally by DmacMgr. It likely affects the coherency of the DMA tag reads
/** @} */

/**
 * @name IV Coherency mask fields
 * 
 * This field, along with the SCE_DMAC_CMD_COHERENT_IV_{READ/WRITE} command flags, control cache coherency behavior for the iv. \n
 * The actual encoding of the subfields is still unknown. \n
 * Observed values: \n
 * SblDmac5   - 0x1FF01FF (read: 0x1FF write: 0x1FF) \n
 */
/** @{ */
#define SCE_DMAC_IV_COHERENCY_MSK_READ_SHIFT  (0)
#define SCE_DMAC_IV_COHERENCY_MSK_READ_MASK   (0x000001FF)

#define SCE_DMAC_IV_COHERENCY_MSK_WRITE_SHIFT (16)
#define SCE_DMAC_IV_COHERENCY_MSK_WRITE_MASK  (0x01FF0000)

#define SCE_DMAC_IV_COHERENCY_MSK_RW_MASK     (0x01FF01FF)
/** @} */

/**
 * @name Interrupt status
 */
/** @{ */
#define SCE_DMAC_INTR_STATUS_COMPLETE (0x00000001)
#define SCE_DMAC_INTR_STATUS_ERROR    (0x00000002)
/** @} */

typedef struct _SceDmacChannelReg { // Channel MMIO interface - Size is 0x80 bytes
/*00*/ SceUIntPAddr src;             //!< Source physical address
/*04*/ union {
           SceUIntPAddr dst;         //!< Destination physical address
           SceUInt32 dst_keyring_id; //!< Destination key ring slot ID (requires SCE_DMAC_CMD_KEYRING_DST flag)
       };
/*08*/ SceSize len;
/*0C*/ SceUInt32 cmd;                //!< Command register - consists of a command ORed with command flags
/*10*/ SceUInt32 src_keyring_id;     //!< Source key ring slot ID (for certain operations)
/*14*/ union {
           SceUIntPAddr iv;          //!< IV for cryptographic operations (updated by certain operations)
           SceUIntPAddr sha_ctx;     //!< SHA CTX (state + length) for cryptographic operations (updated by certain operations)
       };
/*18*/ SceSize block_size;
/*1C*/ SceUInt32 ctrl;               //!< Main control register
/*20*/ SceUIntPAddr chain_start;     //!< Used to start chained DMA operations
/*24*/ SceUInt32 stat;               //!< Status register
/*28*/ SceUInt32 intr_status;        //!< Interrupt status - write back the value to clear interrupts
/*2C*/ SceUInt32 coherency_mask;     //!< See SCE_DMAC_COHERENCY_MASK_*
/*30*/ SceUInt32 iv_coherency_mask;  //!< See SCE_DMAC_IC_COHERENCY_MASK_*
/*34*/ SceUInt32 set_ch;             //!< 32-bit value used for SET command
/*38*/ unsigned int unused1;
/*3C*/ SceUInt32 trng_val;           //!< Current value of the TRNG (only available on DMAC5 & Bigmac)
/*40*/ SceUIntPAddr err_src;         //!< Source parameter of operation that caused a DMAC error         
/*44*/ SceUIntPAddr err_dst;         //!< Destination parameter of operation that caused a DMAC error
/*48*/ SceSize err_len;              //!< Length parameter of operation that caused a DMAC error
/*4C*/ SceUIntPAddr current_tag;     //!< Physical address of the DMA tag being executed (for chained operation)
/*50*/ unsigned int unused3[12];
} SceDmacChannelReg;

The DMACs interfaces are mapped at the following locations:

Device Physical address Interrupt IDs Notes
DMAC0 0xE3000000 0x70~0x71 Linked to Main and Center Xbar.

Faster for LPDDR2 copies.

DMAC1 0xE3010000 0x72~0x73
DMAC2 0xE5000000 0x74~0x75 Linked to Video Xbar, might be linked to GPU Xbar.

Faster for CDRAM copies.

DMAC3 0xE5010000 0x76~0x77
DMAC4 0xE0400000 0x78~0x7B Has 16 channels instead of the usual 2.
DMAC5 0xE0410000 0x7C~0x7D Has a key ring.
DMAC6 0xE50C0000 0x7E~0x7F Only available on TOOL Kermit.
Bigmac 0xE0050000 None? Not visible by ARM NS. Has a key ring.

Standard DMACs

DMAC0, DMAC1, DMAC2, DMAC3 and DMAC6 fall under this category.

These DMACs have two channels and can only be used for SET and COPY operations - all other commands do not operate properly.

Each channel delivers interrupts with its own ID: for example, DMAC0 Channel 0 delivers interrupts via ID 0x70 and Channel 1 via ID 0x71.

typedef struct _SceDmacReg {
/*000*/ SceDmacChannelReg ch0;
/*080*/ SceDmacChannelReg ch1;
/*100*/ SceUInt32 unk100; // Set by SMC #0x168
/*104*/ SceUInt32 unk104; // Set by SMC #0x169
/*108*/ unsigned char unused1[0x8];
/*110*/ SceUIntPAddr bus_error_address;
/*114*/ SceUInt32 bus_error_attribute;
/*118*/ SceUIntPAddr secure_bus_error_address;
/*11C*/ SceUInt32 secure_bus_error_attribute;
/*120*/ unsigned char unused2[0x1000 - 0x120];
} SceDmacReg;

DMAC4

Mostly identical to standard DMACs except it has 16 channels; for this reason, the MMIO interface is different.

Interrupts are also grouped by groups of 4 channels e.g. ID 0x78 corresponds to channels 0~3, 0x79 corresponds to channels 4~7, etc.

typedef struct _Dmac4GateReg {
    SceUInt32 activeIntr; //!< Active interrupts bitmask?
    SceUInt32 intrMask;   //!< Valid channels for interrupt?
} Dmac4GateReg;

typedef struct _SceDmac4Reg {
/*000*/ SceDmacChannelReg ch0;
/*080*/ SceDmacChannelReg ch1;
/*100*/ SceDmacChannelReg ch2;
/*180*/ SceDmacChannelReg ch3;
/*200*/ SceDmacChannelReg ch4;
/*280*/ SceDmacChannelReg ch5;
/*300*/ SceDmacChannelReg ch6;
/*380*/ SceDmacChannelReg ch7;
/*400*/ SceDmacChannelReg ch8;
/*480*/ SceDmacChannelReg ch9;
/*500*/ SceDmacChannelReg ch10;
/*580*/ SceDmacChannelReg ch11;
/*600*/ SceDmacChannelReg ch12;
/*680*/ SceDmacChannelReg ch13;
/*700*/ SceDmacChannelReg ch14;
/*780*/ SceDmacChannelReg ch15;
/*800*/ SceUInt32 unk100; // Set by SMC #0x168
/*804*/ SceUInt32 unk104; // Set by SMC #0x169
/*808*/ unsigned char unused1[0x8];
/*810*/ SceUIntPAddr bus_error_address;
/*814*/ SceUInt32 bus_error_attribute;
/*818*/ SceUIntPAddr secure_bus_error_address;
/*81C*/ SceUInt32 secure_bus_error_attribute;
/*820*/ unsigned char unused2[0x900 - 0x820];
/*900*/ Dmac4GateReg gate0; //!< Interrupt 0x78 - channels 0~3
/*908*/ Dmac4GateReg gate1; //!< Interrupt 0x79 - channels 4~7
/*910*/ Dmac4GateReg gate2; //!< Interrupt 0x7A - channels 8~11
/*918*/ Dmac4GateReg gate3; //!< Interrupt 0x7B - channels 12~15
/*920*/ Dmac4GateReg unk_gate0[4]; //!< Unknown gate controls. Configured by SceKernelIntrMgr in TrustZone
/*940*/ unsigned char unused3[0x1000 - 0x940];
} SceDmacReg;

DMAC5

DMAC5 can perform all cryptographic, hashing and RNG operations in addition to the ones provided by other DMACs.

typedef SceUInt32 Dmac5Key[16]; //!< Size is 0x40 bytes, but only up to 256-bit keys are supported

typedef struct SceDmac5Reg {
/*000*/ SceDmacChannelReg ch0;
/*080*/ SceDmacChannelReg ch1;
/*100*/ SceUInt32 unk100; // Set by SMC #0x168
/*104*/ SceUInt32 unk104; // Set by SMC #0x169
/*108*/ unsigned char unused1[0x8];
/*110*/ SceUIntPAddr bus_error_address;
/*114*/ SceUInt32 bus_error_attribute;
/*118*/ SceUIntPAddr secure_bus_error_address;
/*11C*/ SceUInt32 secure_bus_error_attribute;
/*120*/ unsigned char unused2[0x200 - 0x120];
/*200*/ Dmac5Key ch0Key; //!< Channel 0 external key (Used as src instead of keyring if SCE_DMAC_CMD_USE_EXTERNAL_KEY bit is set)
/*240*/ unsigned char unused3[0x40];
/*280*/ Dmac5Key ch1Key; //!< Channel 1 external key
/*2C0*/ unsigned char unused4[0x1000 - 0x2C0];
} SceDmac5Reg;

DMAC5 Key Ring

A keyring is located on the same bus as DMAC5 and can be used for some operations. This device is located at physical address 0xE04E0000.

The DMAC5 key ring holds 32 keys of 256 bits each and can be accessed by ARM and CMeP. Configuration registers control which slots can be read by ARM in Non-Secure state ?and which slots can be used by DMAC5 for operations?.

typedef struct _SceSblDMAC5DmacKR { // Size is 0x1000 bytes at physical address 0xE04E0000.
/*000*/ Dmac5Key slot[32];
/*400*/ SceUInt32 non_secure_access; //!< Bit X enables Non-Secure state from slot X if 1. This register is only accessible in Secure state.
/*404*/ SceUInt32 unk404;            //!< ?Bit X enables Dmac5 access to slot X if 1?. ?This register is only accessible in Secure state?.
/*408*/ unsigned char unused[0x1000 - 0x408];
} SceSblDMAC5DmacKR;

The key ring configuration is set during secure bootSKBL?. On bootcoldboot? reset?, non_secure_access defaults to 0x200000FF which indicates slots0-7 and 0x1D are accessible by the Non-Secure kernel; the unk404 register is set to 0xFFFFFFFF.

Key ring slot ID Details
0x00~0x07 Can be modified from ARM.
0x08~0x1A ?
0x1B non-secure kernel encryption key.
0x1C Related to Removable Media Authentication (https://pastebin.com/5vuehLr7). This key is set by rmauth_sm.
0x1D Can be modified from ARM.
0x1E~0x1F Related to Magic Gate Key Manager. These keys are set by mgkm_sm.

Bigmac

Bigmac is only accessible by CMeP (?)

Bigmac doesn't have doesn't support DES.

Bigmac Key Ring

See Cmep Key Ring Base.

Usage

DMA controllers can be programmed via the MMIO interface for a one-shot operation, or controlled by so-called DMA tag lists for more complex operations.

While DMACs can ensure coherency at L2 cache level, they are not MMU-aware - all addresses provided to the DMACs must be physical addresses.

One-shot operation

Simple DMA transfers can be performed using only the DMACs' MMIO interface.

  1. Ensure the DMA channel is inactive
    • Read the stat register and check bit 0 (SCE_DMAC_STAT_BUSY)
    • You can cancel the current DMA transfer by writing 0 to the ctrl register
  2. Prepare the new transfer by writing to the configuration registers
    • For example, to perform a SET command, fill the dst, len, cmd and set_ch registers appropriately
  3. Start the transfer
    • Write 1 to the ctrl register
  4. Wait for the transfer to complete
    • If using interrupts read and write back the value of the interrupt status register
    • Unrelated work may be carried out in parallel with the DMA transfer

Example code in C:

void dmac_phycont_memcpy(SceUIntPAddr dst, SceUIntPAddr src, SceSize len) {
    volatile SceDmacChannelReg* pDMAC = /* ...obtain this yourself... */;

    //1) Ensure DMAC is inactive
    if (pDMAC->stat & SCE_DMAC_STAT_BUSY) {
       //Optional - cancel the current transfer
       pDMAC->ctrl = 0;

       //Wait until DMAC is no longer busy.
       while (pDMAC->stat & SCE_DMAC_STAT_BUSY);
    }

    //2) Program the next transfer
    pDMAC->src = src;
    pDMAC->dst = dst;
    pDMAC->len = len;
    pDMAC->cmd = SCE_DMAC_CMD_OP_COPY;

    //3) Start the transfer
    pDMAC->ctrl = 1;

    //4) Wait for the transfer to complete
    while (pDMAC->stat & SCE_DMAC_STAT_BUSY);
}

Chained operation

Complex DMA transfers can be performed using DMA tag lists instead of the MMIO interface.

As an example, the sceKernelDmacMemcpy routine performs a copy from one virtually contiguous buffer to another. However, the buffers may be physically fragmented and DMACs are not MMU-aware; thus the copy may have to be performed in multiple rounds.

To this end, a DMA tag list may be written to memory first then sent to the DMAC which will execute all the tags in the list one after another. The DMA tag structure contains all the data required for an operation along with a link to another tag to allow chaining.

#define SCE_DMAC_TAG_CHAIN_END ((SceUIntPAddr)0xFFFFFFFF) //!< Pseudo-physical address indicating there is no other DMA tag in a chain

typedef struct _SceDmaTag {
    SceUIntPAddr src;
    union {
        SceUIntPAddr dst;
        SceUInt32 dst_keyring_id;  //not sure if this is allowed
    };
    SceSize len;
    SceUInt32 cmd;
    SceUInt32 src_keyring_id;
    SceUIntPAddr iv;
    SceSize block_size;
    SceUIntPAddr next_tag; //<! Physical address of next DMA tag in chain, or SCE_DMAC_TAG_CHAIN_END.
} SceDmaTag;

To start a chained transfer, write the physical address of the first DMA tag in a chain to the chain_start register.

Interrupts

An interrupt is delivered when a command with bit 0x1000 set completes or when an error occurs.

Error handling

When a DMAC error occurs, ?an interrupt is triggered? and bit SCE_DMAC_STAT_ABORTED is set in the stat register, and further information about the error can be found in this same register. If a chained operation was ongoing, the current_tag register will contain the physical address of the DMA tag that was being executed.

Supported commands

N.B. This is the list of all existing commands. See the description of each DMAC to know which commands it supports.

Also see here.

#define SCE_DMAC_CMD_OP_COPY              (0x00000000) 
#define SCE_DMAC_CMD_OP_SET               (0x0000000C) 
#define SCE_DMAC_CMD_OP_RNG               (0x00000004) 
#define SCE_DMAC_CMD_OP_HASH_SHA1         (0x00000003)
#define SCE_DMAC_CMD_OP_HASH_SHA224       (0x0000000B)
#define SCE_DMAC_CMD_OP_HASH_SHA256       (0x00000013)
#define SCE_DMAC_CMD_OP_HMAC_SHA1         (0x00000023)
#define SCE_DMAC_CMD_OP_HMAC_SHA224       (0x0000002B)
#define SCE_DMAC_CMD_OP_HMAC_SHA256       (0x00000033)
#define SCE_DMAC_CMD_OP_ENCRYPT_AES_ECB   (0x00000001)
#define SCE_DMAC_CMD_OP_ENCRYPT_AES_CBC   (0x00000009)
#define SCE_DMAC_CMD_OP_ENCRYPT_AES_CTR   (0x00000011)
#define SCE_DMAC_CMD_OP_ENCRYPT_DES_ECB   (0x00000041)
#define SCE_DMAC_CMD_OP_ENCRYPT_DES_CBC   (0x00000049)
#define SCE_DMAC_CMD_OP_DECRYPT_AES_ECB   (0x00000002)
#define SCE_DMAC_CMD_OP_DECRYPT_AES_CBC   (0x0000000A)
#define SCE_DMAC_CMD_OP_DECRYPT_AES_CTR   (0x00000012)
#define SCE_DMAC_CMD_OP_DECRYPT_DES_ECB   (0x00000042)
#define SCE_DMAC_CMD_OP_DECRYPT_DES_CBC   (0x0000004A)