digitalmars.D.ldc - struggling with inline assembler
I'm trying to use Intel's AES instruction set for AES encryption. The following piece of code works well with DMD2 but won't compile with LDC. ldc2 tells me this: Basic Block in function '_D4main48__T21AES_128_KEY_EXPANSIONVAyaa7_656e6372797074Z21AES_128_KEY_EXPANS ONFNaNbNiNexPhPhZv' does not have terminator! label %endentry LLVM ERROR: Broken function found, compilation aborted! Flow control in the asm block might be the problem. By the way, how could I access arrays (ubyte[]) instead of pointers in inline assembly? Couldn't find a single piece of documentation... I preferrably omit the use of 'naked' assembler functions. I tried to do it as in biguintx86.d but was confused with the calling conventions. Registers are used in reverse order compared to the C calling convention, aren't they? ****************** module main; import std.stdio; import core.cpuid; void main(string[] args) { assert(sse2 && aes, "hardware does not support sse2 and aes!"); // test vectors immutable ubyte[16] plaintext = cast(const ubyte[])x"6bc1bee22e409f96e93d7e117393172a"; immutable ubyte[16] ciphertext = cast(const ubyte[])x"3ad77bb40d7a3660a89ecaf32466ef97"; immutable ubyte[16] userKey = cast(const ubyte[])x"2b7e151628aed2a6abf7158809cf4f3c"; ubyte[16*11] keySchedule; // buffer for key schedule AES_128_KEY_EXPANSION!"encrypt"(userKey.ptr, keySchedule.ptr); // initialize encryption key schedule ubyte[16] buffer; AES_128_ENCRYPT(keySchedule.ptr, plaintext.ptr, buffer.ptr); // encrypt one 128 bit block assert(buffer == ciphertext, "aes encryption failed"); writeln("200 OK"); } /// AES128 11 round encryption /// Params: /// key = 11*16 byte key schedule /// plain = 16 bytes plaintext /// ciphertext = at least 16 bytes output buffer void AES_128_ENCRYPT(in ubyte* key, in ubyte* plain, ubyte* ciphertext) in { //assert(key.length == 16*ROUNDS, "invalid key size"); //assert(plain.length == 16, "invalid input block size"); //assert(ciphertext.length >= 16, "output buffer too small"); } body { asm { mov RDX, key; // pointer to key schedule // load key into XMM0-XMM10 lddqu XMM0, [RDX+0x00]; lddqu XMM1, [RDX+0x10]; lddqu XMM2, [RDX+0x20]; lddqu XMM3, [RDX+0x30]; lddqu XMM4, [RDX+0x40]; lddqu XMM5, [RDX+0x50]; lddqu XMM6, [RDX+0x60]; lddqu XMM7, [RDX+0x70]; lddqu XMM8, [RDX+0x80]; lddqu XMM9, [RDX+0x90]; lddqu XMM10, [RDX+0xA0]; // load plaintext into XMM15 mov RDX, plain; // pointer to plaintext movdqu XMM15, [RDX]; // read plaintext block // AES-128 encryption sequence. // The data block is in XMM15. // Registers XMM0–XMM10 hold the round keys(from 0 to 10 in this order). // In the end, XMM15 holds the encryption result. pxor XMM15, XMM0; // Whitening step (Round 0) aesenc XMM15, XMM1; // Round 1 aesenc XMM15, XMM2; // Round 2 aesenc XMM15, XMM3; // Round 3 aesenc XMM15, XMM4; // Round 4 aesenc XMM15, XMM5; // Round 5 aesenc XMM15, XMM6; // Round 6 aesenc XMM15, XMM7; // Round 7 aesenc XMM15, XMM8; // Round 8 aesenc XMM15, XMM9; // Round 9 aesenclast XMM15, XMM10; // Round 10 mov RDX, ciphertext; // pointer to output buffer movdqu [RDX], XMM15; // write processed data to buffer } } /// /// Expand a 128 bit user key into 11 round keys /// /// source: http://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instruc ions-set-paper.pdf, Figure 19. AES-128 Key Expansion: Outlined Code Example /// /// Params: /// /// decrypt = generate decryption key if set to true. default: false /// /// userKey = the AES key as given by the user /// key = 11 round keys /// /// enum ROUNDS = 11; trusted public void AES_128_KEY_EXPANSION(string mode = "encrypt")(in ubyte* userKey, ubyte* key) nothrow nogc if(mode == "encrypt" || mode == "decrypt") in { //assertHardwareSupport(); //assert(userKey.length == 16, "invalid key size"); //assert(key.length == ROUNDS*16, "invalid key schedule size"); } body { asm { mov RDX, userKey; // pointer to user key movdqu XMM1, [RDX]; // read user key xor RCX, RCX; // set index to 0 mov RDX, key; // pointer to working key movdqu [RDX+RCX], XMM1; add RCX, 0x10; // increment by 16 bytes aeskeygenassist XMM2, XMM1, 0x01; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x02; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x04; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x08; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x10; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x20; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x40; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x80; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x1b; call aes_128_assist; aeskeygenassist XMM2, XMM1, 0x36; call aes_128_assist; } static if(mode == "decrypt") { asm { // generate inverse key call aesimc128; } } asm { jmp END; aes_128_assist: pshufd XMM2, XMM2, 0xff; //vpslldq XMM3, XMM1, 0x4; // vpslldq requires AVX, pslldq requires only SSE2 movdqu XMM3, XMM1; pslldq XMM3, 0x4; pxor XMM1, XMM3; //vpslldq XMM3, XMM1, 0x4; movdqu XMM3, XMM1; pslldq XMM3, 0x4; pxor XMM1, XMM3; //vpslldq XMM3, XMM1, 0x4; movdqu XMM3, XMM1; pslldq XMM3, 0x4; pxor XMM1, XMM3; pxor XMM1, XMM2; mov RDX, key; // pointer to working key movdqu [RDX+RCX], XMM1; // store result in keySchedule add RCX, 0x10; // increment index by 16 bytes ret; // end of key_expansion_128 // // do aesimc for all except the first and the last round key // aesimc128: mov RDX, key; // pointer to key output buffer add RDX, 0x10; // dont modify first key mov RCX, ROUNDS-2; // set counter to number of rounds - 2 LOOP: movdqu XMM1, [RDX]; // load aesimc XMM1, XMM1; // invert movdqu [RDX], XMM1; // store add RDX, 0x10; // increment pointer loop LOOP; // loop rounds-2 times ret; // end aesimc128 END: ; } }
Apr 09 2015
On Thursday, 9 April 2015 at 15:31:34 UTC, salsa wrote:I'm trying to use Intel's AES instruction set for AES encryption. The following piece of code works well with DMD2 but won't compile with LDC. ldc2 tells me this: Basic Block in function '_D4main48__T21AES_128_KEY_EXPANSIONVAyaa7_656e6372797074Z21AES_128_KEY_EXPANS ONFNaNbNiNexPhPhZv' does not have terminator! label %endentry LLVM ERROR: Broken function found, compilation aborted! Flow control in the asm block might be the problem.Hi salsa! The function has several asm { .. } blocks. Jumping between these blocks is not supported by ldc. A possible workaround could be to load the target address into a register and do an indirect call. You could also write a mixin for the aes_128_assist and aesimc128 subroutines and replace the calls with the assembler text. Regards, Kai
Apr 09 2015
Thanks! Replaced the 'static if' with a simple assembler branch.
Apr 09 2015