ARZL

From Vita Development Wiki
Revision as of 14:16, 25 April 2023 by CreepNT (talk | contribs) (→‎Old filter: Add missing return)
Jump to navigation Jump to search

LZRA (ARZL in big-endian) is a compression and encoding format used on PS Vita. It is used for example to store files used by the SKBL like the NSKBL and some Tzs modules. It is also used on GIM texture data used by /sce_sys/right/right.suprx.

Naming

It must be part of the "LZ" algorithms. See LZ algorithms overview. It might be similar to:

There is a typo in SKBL functions names where it is named ARLZ instead of ARZL.

Header

ARZL header is simply the string "ARZL" (41 52 5A 4C).

Encoding

To encode data into ARZL: 1) Apply ARM filter. See #ARM Filter. 2) ARZL encode

Decoding

To decode ARZL data: 1) ARZL decode. See SKBL#sceArlzDecode. 2) Remove ARM filter. See #ARM Filter and SKBL#sceArlzArmFilter.

ARM Filter

ARZL encoded/decoded data is not the raw data but filtered data. It is applied an ARM filter for efficient compression, rather than obfuscation.

Although there are three versions of the ARM filters, the basic operation is the same. The filter is just bit swaps as well as some deterministic changes using information from the offset.

Old filter

In firmwares 0.920-1.06, the ARM filter is different and operates on byte-sized instead of word-sized operands. The "new" filter algorithm is used instead since firmware 1.50.

Use the following algorithm for filtering e.g. NSKBL from these firmwares:

//SKBL uses ver == 0
SceInt32 sceArlzArmFilter_old(ScePVoid buf, SceUInt32 size, SceInt32 ver) {
    SceUInt8* base = (SceUInt8*)buf;

    if (size < 4) {
        return 0;
    }

    SceInt32 sz = 0;
    if (ver == 0) {
        int iVar4 = 1;
        unsigned uVar2 = base[1];
        
        if ((uVar2 & 0xF8) == 0xF0)
            goto LAB_5002DB5C;

LAB_5002DB34:
        uVar2 = sz + 6;
        sz += 2;
        if (uVar2 <= size) {
            while (1) {
                iVar4 = sz + 1;
                uVar2 = (SceUInt32)base[iVar4];
                if ((uVar2 & 0xF8) != 0xF0)
                    break;

LAB_5002DB5C:
                int iVar3 = sz + 3;
                if ((base[iVar3] & 0xF8) != 0xF8)
                    break;

                int iVar1 = sz + 2;
                uVar2 = (-sz - 4) + ((SceUInt32)base[iVar1] | 
                                     ((SceUInt32)base[sz] << 11) |
                                     ((uVar2 & 7) << 19) |
                                     ((base[iVar3] & 7) << 8)) * 2;

                base[iVar4] = ~((SceUInt8)~(SceUInt8)((((uVar2 << 9) >> 0x1d) << 0x1c) >> 0x18) >> 4);
                unsigned uVar5 = sz+8;
                base[sz] = (SceUInt8)(uVar2 >> 12);
                sz += 4;
                base[iVar3] = ~((SceUInt8)~(SceUInt8)(((uVar2 >> 9) << 0x1d) >> 0x18) >> 5);
                base[iVar1] = (SceUInt8)(uVar2 >> 1);
                if (size < uVar5) {
                    return sz;
                }
            }
            goto LAB_5002DB34;
        }
        return sz;
    } else {
        int iVar4;
        do {
            iVar4 = sz;
            if ((base[sz + 1] & 0xF8) == 0xF0 && (base[sz + 3] & 0xF8) == 0xF8) {
                iVar4 = sz + 2;
                unsigned uVar2 = sz + 4 +
                    ((SceUInt32)base[iVar4] | 
                     ((SceUInt32)base[sz] << 11) |
                     ((base[sz + 1] & 7) << 19) |
                     ((base[sz + 3] & 7) << 8)
                    ) * 2;

                base[sz + 1] =  ~((SceUInt8)~(SceUInt8)((((uVar2 << 9) >> 29) << 28) >> 24) >> 4);
                base[sz] = (SceUInt8)(uVar2 >> 12);
                base[sz + 3] = ~((SceUInt8)~(SceUInt8)(((uVar2 >> 9) << 29) >> 24) >> 5);
                base[sz + 2] = (SceUInt8)(uVar2 >> 1);
            }
            sz = iVar4 + 2;
        } while ((iVar4 + 6) <= size);
        return sz;
    }
}

Version 0

int arzl_arm_filter_remove(unsigned char *buffer, int len) {
  unsigned char *buf, *bufend;
  uint32_t data;
  int change_stride;
  buf = buffer;
  bufend = &buffer[len];
  do {
    data = *(uint32_t *)buf;
    buf += 4;
    change_stride = (data & 0xF800F800) >> 27;
    if ((data & 0xF800F800) == 0xF800F000) {
      data = (((data >> 16) & 0xFFC007FF) | ((data & 0x7FF) << 11)) - ((buf - buffer) >> 1);
      *((uint32_t *)buf - 1) = ((((data & 0x7FF) << 16) | 0xF800F000) & 0xFFFFF800) | ((data >> 11) & 0x7FF);
    } else if (change_stride == 30)
      buf -= 2;
  } while (bufend > buf);
}

Version 1

ARM filter version 1 is the same as version 0 except that the offset information is added instead of subtracted.

data = (((data >> 16) & 0xFFC007FF) | ((data & 0x7FF) << 11)) + ((buf - buffer) >> 1);

Version 2

ARM filter version 2 is the same as version 0 but in addition, there is an additional operation to swap two nibbles in certain conditions. The condition is found through a learning process and may be overfitted.

else if ((data & 0x8000FBF0) == 0x0000F2C0) {
  data = (data & 0xF0FFFFF0) | ((data & 0xF) << 24) | ((data >> 24) & 0xF);
  *((uint32_t *)buf - 1) = data;
}

Tools

TODO