diff -urN linux-2.4.28/Documentation/Configure.help linux-2.4.28-loop-AES-v3.0b/Documentation/Configure.help --- linux-2.4.28/Documentation/Configure.help Sun Dec 12 12:06:27 2004 +++ linux-2.4.28-loop-AES-v3.0b/Documentation/Configure.help Sun Feb 6 18:45:39 2005 @@ -620,6 +620,21 @@ If unsure, say N. +AES encrypted loop device support +CONFIG_BLK_DEV_LOOP_AES + If you want to use AES encryption algorithm to encrypt loop devices, + say Y here. If you don't know what to do here, say N. + +loop encryption key scrubbing support +CONFIG_BLK_DEV_LOOP_KEYSCRUB + Loop encryption key scrubbing moves and inverts key bits in + kernel RAM so that the thin oxide which forms the storage + capacitor dielectric of DRAM cells is not permitted to develop + detectable property. For more info, see Peter Gutmann's paper: + http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html + + Paranoid tinfoil hat crowd say Y here, everyone else say N. + ATA/IDE/MFM/RLL support CONFIG_IDE If you say Y here, your kernel will be able to manage low cost mass diff -urN linux-2.4.28/drivers/block/Config.in linux-2.4.28-loop-AES-v3.0b/drivers/block/Config.in --- linux-2.4.28/drivers/block/Config.in Sat Jul 31 18:45:19 2004 +++ linux-2.4.28-loop-AES-v3.0b/drivers/block/Config.in Sun Feb 6 18:45:39 2005 @@ -42,6 +42,10 @@ dep_tristate 'Promise SATA SX8 support' CONFIG_BLK_DEV_SX8 $CONFIG_PCI tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +if [ "$CONFIG_BLK_DEV_LOOP" != "n" ]; then + bool ' AES encrypted loop device support' CONFIG_BLK_DEV_LOOP_AES + bool ' loop encryption key scrubbing support' CONFIG_BLK_DEV_LOOP_KEYSCRUB +fi dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET tristate 'RAM disk support' CONFIG_BLK_DEV_RAM diff -urN linux-2.4.28/drivers/block/loop.c linux-2.4.28-loop-AES-v3.0b/drivers/block/loop.c --- linux-2.4.28/drivers/block/loop.c Sat Sep 13 07:57:22 2003 +++ linux-2.4.28-loop-AES-v3.0b/drivers/block/loop.c Sun Feb 6 18:45:39 2005 @@ -2,7 +2,7 @@ * linux/drivers/block/loop.c * * Written by Theodore Ts'o, 3/29/93 - * + * * Copyright 1993 by Theodore Ts'o. Redistribution of this file is * permitted under the GNU General Public License. * @@ -21,12 +21,12 @@ * Loadable modules and other fixes by AK, 1998 * * Make real block number available to downstream transfer functions, enables - * CBC (and relatives) mode encryption requiring unique IVs per data block. + * CBC (and relatives) mode encryption requiring unique IVs per data block. * Reed H. Petty, rhp@draper.net * * Maximum number of loop devices now dynamic via max_loop module parameter. * Russell Kroll 19990701 - * + * * Maximum number of loop devices when compiled-in now selectable by passing * max_loop=<1-255> to the kernel on boot. * Erik I. Bolsų, , Oct 31, 1999 @@ -39,20 +39,30 @@ * Support up to 256 loop devices * Heinz Mauelshagen , Feb 2002 * - * Still To Fix: - * - Advisory locking is ignored here. - * - Should use an own CAP_* category instead of CAP_SYS_ADMIN + * AES transfer added. IV is now passed as (512 byte) sector number. + * Jari Ruusu, May 18 2001 + * + * External encryption module locking bug fixed. + * Ingo Rohloff , June 21 2001 + * + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite). + * Jari Ruusu, September 2 2001 * - * WARNING/FIXME: - * - The block number as IV passing to low level transfer functions is broken: - * it passes the underlying device's block number instead of the - * offset. This makes it change for a given block when the file is - * moved/restored/copied and also doesn't work over NFS. - * AV, Feb 12, 2000: we pass the logical block number now. It fixes the - * problem above. Encryption modules that used to rely on the old scheme - * should just call ->i_mapping->bmap() to calculate the physical block - * number. - */ + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea. + * Jari Ruusu, May 23 2002 + * + * Backported struct loop_info64 ioctls from 2.6 kernels (64 bit offsets and + * 64 bit sizelimits). Added support for removing offset from IV computations. + * Jari Ruusu, September 21 2003 + * + * Added support for MD5 IV computation and multi-key operation. + * Jari Ruusu, October 8 2003 + * + * + * Still To Fix: + * - Advisory locking is ignored here. + * - Should use an own CAP_* category instead of CAP_SYS_ADMIN + */ #include #include @@ -71,10 +81,14 @@ #include #include #include +#include #include +#include #include +#include "../misc/aes.h" +#include "../misc/md5.h" #define MAJOR_NR LOOP_MAJOR @@ -82,21 +96,31 @@ static struct loop_device *loop_dev; static int *loop_sizes; static int *loop_blksizes; +static int *loop_hardsizes; static devfs_handle_t devfs_handle; /* For the directory */ +#if defined(__x86_64__) && defined(CONFIG_IA32_EMULATION) +# include +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl) +#endif +#if (defined(__sparc__) || defined(__sparc64__)) && defined(CONFIG_SPARC32_COMPAT) + extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); + extern int unregister_ioctl32_conversion(unsigned int cmd); + extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl) +#endif + /* * Transfer functions */ static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf, char *loop_buf, int size, int real_block) { - if (raw_buf != loop_buf) { - if (cmd == READ) - memcpy(loop_buf, raw_buf, size); - else - memcpy(raw_buf, loop_buf, size); - } + /* this code is only called from file backed loop */ + /* and that code expects this function to be no-op */ + if (current->need_resched) + {set_current_state(TASK_RUNNING);schedule();} return 0; } @@ -118,12 +142,13 @@ keysize = lo->lo_encrypt_key_size; for (i = 0; i < size; i++) *out++ = *in++ ^ key[(i & 511) % keysize]; + if (current->need_resched) + {set_current_state(TASK_RUNNING);schedule();} return 0; } static int none_status(struct loop_device *lo, struct loop_info *info) { - lo->lo_flags |= LO_FLAGS_BH_REMAP; return 0; } @@ -134,336 +159,949 @@ return 0; } -struct loop_func_table none_funcs = { +struct loop_func_table none_funcs = { number: LO_CRYPT_NONE, transfer: transfer_none, init: none_status, }; -struct loop_func_table xor_funcs = { +struct loop_func_table xor_funcs = { number: LO_CRYPT_XOR, transfer: transfer_xor, - init: xor_status + init: xor_status, }; -/* xfer_funcs[0] is special - its release function is never called */ -struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { - &none_funcs, - &xor_funcs +#if CONFIG_BLK_DEV_LOOP_AES +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB +# define KEY_ALLOC_COUNT 128 +#else +# define KEY_ALLOC_COUNT 64 +#endif + +typedef struct { + aes_context *keyPtr[KEY_ALLOC_COUNT]; + unsigned keyMask; +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + u_int32_t *partialMD5; + u_int32_t partialMD5buf[8]; + rwlock_t rwlock; + unsigned reversed; + unsigned blocked; + struct timer_list timer; +#else + u_int32_t partialMD5[4]; +#endif +} AESmultiKey; + +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB +static void keyScrubWork(AESmultiKey *m) +{ + aes_context *a0, *a1; + u_int32_t *p; + int x, y, z; + + z = m->keyMask + 1; + for(x = 0; x < z; x++) { + a0 = m->keyPtr[x]; + a1 = m->keyPtr[x + z]; + memcpy(a1, a0, sizeof(aes_context)); + m->keyPtr[x] = a1; + m->keyPtr[x + z] = a0; + p = (u_int32_t *) a0; + y = sizeof(aes_context) / sizeof(u_int32_t); + while(y > 0) { + *p ^= 0xFFFFFFFF; + p++; + y--; + } + } + + x = m->reversed; /* x is 0 or 4 */ + m->reversed ^= 4; + y = m->reversed; /* y is 4 or 0 */ + p = &m->partialMD5buf[x]; + memcpy(&m->partialMD5buf[y], p, 16); + m->partialMD5 = &m->partialMD5buf[y]; + p[0] ^= 0xFFFFFFFF; + p[1] ^= 0xFFFFFFFF; + p[2] ^= 0xFFFFFFFF; + p[3] ^= 0xFFFFFFFF; + + /* try to flush dirty cache data to RAM */ +#if defined(CONFIG_X86_64) || (defined(CONFIG_X86) && !defined(CONFIG_M386) && !defined(CONFIG_CPU_386)) + __asm__ __volatile__ ("wbinvd": : :"memory"); +#else + mb(); +#endif +} + +/* called only from loop thread process context */ +static void keyScrubThreadFn(AESmultiKey *m) +{ + write_lock(&m->rwlock); + if(!m->blocked) keyScrubWork(m); + write_unlock(&m->rwlock); +} + +static void keyScrubTimerInit(struct loop_device *lo) +{ + AESmultiKey *m; + unsigned long expire; + static void keyScrubTimerFn(unsigned long); + + m = (AESmultiKey *)lo->key_data; + expire = jiffies + HZ; + init_timer(&m->timer); + m->timer.expires = expire; + m->timer.data = (unsigned long)lo; + m->timer.function = keyScrubTimerFn; + add_timer(&m->timer); +} + +/* called only from timer handler context */ +static void keyScrubTimerFn(unsigned long d) +{ + struct loop_device *lo = (struct loop_device *)d; + extern void loop_add_keyscrub_fn(struct loop_device *, void (*)(void *), void *); + + /* rw lock needs process context, so make loop thread do scrubbing */ + loop_add_keyscrub_fn(lo, (void (*)(void*))keyScrubThreadFn, lo->key_data); + /* start timer again */ + keyScrubTimerInit(lo); +} +#endif + +static AESmultiKey *allocMultiKey(void) +{ + AESmultiKey *m; + aes_context *a; + int x = 0, n; + + m = (AESmultiKey *) kmalloc(sizeof(AESmultiKey), GFP_KERNEL); + if(!m) return 0; + memset(m, 0, sizeof(AESmultiKey)); +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + m->partialMD5 = &m->partialMD5buf[0]; + rwlock_init(&m->rwlock); + init_timer(&m->timer); + again: +#endif + + n = PAGE_SIZE / sizeof(aes_context); + if(!n) n = 1; + + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL); + if(!a) { +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + if(x) kfree(m->keyPtr[0]); +#endif + kfree(m); + return 0; + } + + while((x < KEY_ALLOC_COUNT) && n) { + m->keyPtr[x] = a; + a++; + x++; + n--; + } +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + if(x < 2) goto again; +#endif + return m; +} + +static void clearAndFreeMultiKey(AESmultiKey *m) +{ + aes_context *a; + int x, n; + +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + /* stop scrub timer. loop thread was killed earlier */ + del_timer_sync(&m->timer); + /* make sure allocated keys are in original order */ + if(m->reversed) keyScrubWork(m); +#endif + n = PAGE_SIZE / sizeof(aes_context); + if(!n) n = 1; + + x = 0; + while(x < KEY_ALLOC_COUNT) { + a = m->keyPtr[x]; + if(!a) break; + memset(a, 0, sizeof(aes_context) * n); + kfree(a); + x += n; + } + + memset(m, 0, sizeof(AESmultiKey)); + kfree(m); +} + +static int multiKeySetup(struct loop_device *lo, unsigned char *k, int version3) +{ + AESmultiKey *m; + aes_context *a; + int x, y, n, err = 0; + union { + u_int32_t w[16]; + unsigned char b[64]; + } un; + + if(lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + m = (AESmultiKey *)lo->key_data; + if(!m) return -ENXIO; + +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + /* temporarily prevent loop thread from messing with keys */ + write_lock(&m->rwlock); + m->blocked = 1; + /* make sure allocated keys are in original order */ + if(m->reversed) keyScrubWork(m); + write_unlock(&m->rwlock); +#endif + n = PAGE_SIZE / sizeof(aes_context); + if(!n) n = 1; + + x = 0; + while(x < KEY_ALLOC_COUNT) { + if(!m->keyPtr[x]) { + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL); + if(!a) { + err = -ENOMEM; + goto error_out; + } + y = x; + while((y < (x + n)) && (y < KEY_ALLOC_COUNT)) { + m->keyPtr[y] = a; + a++; + y++; + } + } +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + if(x >= 64) { + x++; + continue; + } +#endif + if(copy_from_user(&un.b[0], k, 32)) { + err = -EFAULT; + goto error_out; + } + aes_set_key(m->keyPtr[x], &un.b[0], lo->lo_encrypt_key_size, 0); + k += 32; + x++; + } + + m->partialMD5[0] = 0x67452301; + m->partialMD5[1] = 0xefcdab89; + m->partialMD5[2] = 0x98badcfe; + m->partialMD5[3] = 0x10325476; + if(version3) { + /* only first 128 bits of iv-key is used */ + if(copy_from_user(&un.b[0], k, 16)) { + err = -EFAULT; + goto error_out; + } +#if defined(__BIG_ENDIAN) + un.w[0] = cpu_to_le32(un.w[0]); + un.w[1] = cpu_to_le32(un.w[1]); + un.w[2] = cpu_to_le32(un.w[2]); + un.w[3] = cpu_to_le32(un.w[3]); +#endif + memset(&un.b[16], 0, 48); + md5_transform_CPUbyteorder(&m->partialMD5[0], &un.w[0]); + lo->lo_flags |= 0x080000; /* multi-key-v3 (info exported to user space) */ + } + + m->keyMask = 0x3F; /* range 0...63 */ + lo->lo_flags |= 0x100000; /* multi-key (info exported to user space) */ + memset(&un.b[0], 0, 32); +error_out: +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + /* re-enable loop thread key scrubbing */ + write_lock(&m->rwlock); + m->blocked = 0; + write_unlock(&m->rwlock); +#endif + return err; +} + +void loop_compute_sector_iv(int devSect, u_int32_t *ivout) +{ + ivout[0] = cpu_to_le32(devSect); + ivout[3] = ivout[2] = ivout[1] = 0; +} + +void loop_compute_md5_iv_v3(int devSect, u_int32_t *ivout, u_int32_t *data) +{ + int x; +#if defined(__BIG_ENDIAN) + int y, e; +#endif + u_int32_t buf[16]; + +#if defined(__BIG_ENDIAN) + y = 7; + e = 16; + do { + if (!y) { + e = 12; + /* md5_transform_CPUbyteorder wants data in CPU byte order */ + /* devSect is already in CPU byte order -- no need to convert */ + /* 32 bits of sector number + 24 zero bits */ + buf[12] = devSect; + buf[13] = 0x80000000; + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */ + buf[14] = 4024; + buf[15] = 0; + } + x = 0; + do { + buf[x ] = cpu_to_le32(data[0]); + buf[x + 1] = cpu_to_le32(data[1]); + buf[x + 2] = cpu_to_le32(data[2]); + buf[x + 3] = cpu_to_le32(data[3]); + x += 4; + data += 4; + } while (x < e); + md5_transform_CPUbyteorder(&ivout[0], &buf[0]); + } while (--y >= 0); + ivout[0] = cpu_to_le32(ivout[0]); + ivout[1] = cpu_to_le32(ivout[1]); + ivout[2] = cpu_to_le32(ivout[2]); + ivout[3] = cpu_to_le32(ivout[3]); +#else + x = 6; + do { + md5_transform_CPUbyteorder(&ivout[0], data); + data += 16; + } while (--x >= 0); + memcpy(buf, data, 48); + /* md5_transform_CPUbyteorder wants data in CPU byte order */ + /* devSect is already in CPU byte order -- no need to convert */ + /* 32 bits of sector number + 24 zero bits */ + buf[12] = devSect; + buf[13] = 0x80000000; + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */ + buf[14] = 4024; + buf[15] = 0; + md5_transform_CPUbyteorder(&ivout[0], &buf[0]); +#endif +} + +/* this function exists for compatibility with old external cipher modules */ +void loop_compute_md5_iv(int devSect, u_int32_t *ivout, u_int32_t *data) +{ + ivout[0] = 0x67452301; + ivout[1] = 0xefcdab89; + ivout[2] = 0x98badcfe; + ivout[3] = 0x10325476; + loop_compute_md5_iv_v3(devSect, ivout, data); +} + +/* Some external modules do not know if md5_transform_CPUbyteorder() */ +/* is asmlinkage or not, so here is C language wrapper for them. */ +void md5_transform_CPUbyteorder_C(u_int32_t *hash, u_int32_t const *in) +{ + md5_transform_CPUbyteorder(hash, in); +} + +static int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf, + char *loop_buf, int size, int devSect) +{ + aes_context *a; + AESmultiKey *m; + int x; + unsigned y; + u_int32_t iv[8]; + + if(!size || (size & 511)) { + return -EINVAL; + } + m = (AESmultiKey *)lo->key_data; + y = m->keyMask; + if(cmd == READ) { + while(size) { +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + read_lock(&m->rwlock); +#endif + a = m->keyPtr[((unsigned)devSect) & y]; + if(y) { + memcpy(&iv[0], raw_buf, 16); + raw_buf += 16; + loop_buf += 16; + } else { + loop_compute_sector_iv(devSect, &iv[0]); + } + x = 15; + do { + memcpy(&iv[4], raw_buf, 16); + aes_decrypt(a, raw_buf, loop_buf); + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[0]; + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[1]; + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[2]; + *((u_int32_t *)(&loop_buf[12])) ^= iv[3]; + if(y && !x) { + raw_buf -= 496; + loop_buf -= 496; + memcpy(&iv[4], &m->partialMD5[0], 16); + loop_compute_md5_iv_v3(devSect, &iv[4], (u_int32_t *)(&loop_buf[16])); + } else { + raw_buf += 16; + loop_buf += 16; + memcpy(&iv[0], raw_buf, 16); + } + aes_decrypt(a, raw_buf, loop_buf); + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[4]; + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[5]; + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[6]; + *((u_int32_t *)(&loop_buf[12])) ^= iv[7]; + if(y && !x) { + raw_buf += 512; + loop_buf += 512; + } else { + raw_buf += 16; + loop_buf += 16; + } + } while(--x >= 0); +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + read_unlock(&m->rwlock); +#endif + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();} + size -= 512; + devSect++; + } + } else { + while(size) { +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + read_lock(&m->rwlock); +#endif + a = m->keyPtr[((unsigned)devSect) & y]; + if(y) { + /* on 2.4 and later kernels, real raw_buf is not doing */ + /* any writes now so it can be used as temp buffer */ + memcpy(raw_buf, loop_buf, 512); + memcpy(&iv[0], &m->partialMD5[0], 16); + loop_compute_md5_iv_v3(devSect, &iv[0], (u_int32_t *)(&raw_buf[16])); + x = 15; + do { + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0])); + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4])); + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8])); + iv[3] ^= *((u_int32_t *)(&raw_buf[12])); + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf); + memcpy(&iv[0], raw_buf, 16); + raw_buf += 16; + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0])); + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4])); + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8])); + iv[3] ^= *((u_int32_t *)(&raw_buf[12])); + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf); + memcpy(&iv[0], raw_buf, 16); + raw_buf += 16; + } while(--x >= 0); + loop_buf += 512; + } else { + loop_compute_sector_iv(devSect, &iv[0]); + x = 15; + do { + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0])); + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4])); + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8])); + iv[3] ^= *((u_int32_t *)(&loop_buf[12])); + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf); + memcpy(&iv[0], raw_buf, 16); + loop_buf += 16; + raw_buf += 16; + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0])); + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4])); + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8])); + iv[3] ^= *((u_int32_t *)(&loop_buf[12])); + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf); + memcpy(&iv[0], raw_buf, 16); + loop_buf += 16; + raw_buf += 16; + } while(--x >= 0); + } +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + read_unlock(&m->rwlock); +#endif + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();} + size -= 512; + devSect++; + } + } + return(0); +} + +static int keySetup_aes(struct loop_device *lo, struct loop_info *info) +{ + AESmultiKey *m; + union { + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */ + unsigned char b[32]; + } un; + + lo->key_data = m = allocMultiKey(); + if(!m) return(-ENOMEM); + memcpy(&un.b[0], &info->lo_encrypt_key[0], 32); + aes_set_key(m->keyPtr[0], &un.b[0], info->lo_encrypt_key_size, 0); + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key)); + memset(&un.b[0], 0, 32); +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + keyScrubTimerInit(lo); +#endif + return(0); +} + +static int keyClean_aes(struct loop_device *lo) +{ + if(lo->key_data) { + clearAndFreeMultiKey((AESmultiKey *)lo->key_data); + lo->key_data = 0; + } + return(0); +} + +static int handleIoctl_aes(struct loop_device *lo, int cmd, unsigned long arg) +{ + int err; + + switch (cmd) { + case LOOP_MULTI_KEY_SETUP: + err = multiKeySetup(lo, (unsigned char *)arg, 0); + break; + case LOOP_MULTI_KEY_SETUP_V3: + err = multiKeySetup(lo, (unsigned char *)arg, 1); + break; + default: + err = -EINVAL; + } + return err; +} + +static struct loop_func_table funcs_aes = { + number: 16, /* 16 == AES */ + transfer: transfer_aes, + init: keySetup_aes, + release: keyClean_aes, + ioctl: handleIoctl_aes }; -#define MAX_DISK_SIZE 1024*1024*1024 +EXPORT_SYMBOL(loop_compute_sector_iv); +EXPORT_SYMBOL(loop_compute_md5_iv_v3); +EXPORT_SYMBOL(loop_compute_md5_iv); +EXPORT_SYMBOL(md5_transform_CPUbyteorder_C); +#endif /* CONFIG_BLK_DEV_LOOP_AES */ -static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev) -{ - if (S_ISREG(lo_dentry->d_inode->i_mode)) - return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS; - if (blk_size[MAJOR(lodev)]) - return blk_size[MAJOR(lodev)][MINOR(lodev)] - - (lo->lo_offset >> BLOCK_SIZE_BITS); - return MAX_DISK_SIZE; -} +/* xfer_funcs[0] is special - its release function is never called */ +struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { + &none_funcs, + &xor_funcs, +#if CONFIG_BLK_DEV_LOOP_AES + [LO_CRYPT_AES] = &funcs_aes, +#endif +}; -static void figure_loop_size(struct loop_device *lo) -{ - loop_sizes[lo->lo_number] = compute_loop_size(lo, - lo->lo_backing_file->f_dentry, - lo->lo_device); +/* + * First number of 'lo_prealloc' is the default number of RAM pages + * to pre-allocate for each device backed loop. Every (configured) + * device backed loop pre-allocates this amount of RAM pages unless + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc' + * overrides are defined in pairs: loop_index,number_of_pages + */ +static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 }; +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */ +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */ + +#ifdef MODULE +MODULE_PARM(lo_prealloc, "1-9i"); +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]..."); +#else +static int __init lo_prealloc_setup(char *str) +{ + int x, y, z; + + for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) { + z = get_option(&str, &y); + if (z > 0) + lo_prealloc[x] = y; + if (z < 2) + break; + } + return 1; } +__setup("lo_prealloc=", lo_prealloc_setup); +#endif -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +/* + * This is loop helper thread nice value in range + * from 0 (low priority) to -20 (high priority). + */ +#if defined(DEF_NICE) && defined(DEF_COUNTER) +static int lo_nice = -20; /* old scheduler default */ +#else +static int lo_nice = -1; /* O(1) scheduler default */ +#endif + +#ifdef MODULE +MODULE_PARM(lo_nice, "1i"); +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)"); +#else +static int __init lo_nice_setup(char *str) { - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ - struct address_space *mapping = file->f_dentry->d_inode->i_mapping; - struct address_space_operations *aops = mapping->a_ops; - struct page *page; - char *kaddr, *data; - unsigned long index; - unsigned size, offset; - int len; - - down(&mapping->host->i_sem); - index = pos >> PAGE_CACHE_SHIFT; - offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; - while (len > 0) { - int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; - int transfer_result; - - size = PAGE_CACHE_SIZE - offset; - if (size > len) - size = len; - - page = grab_cache_page(mapping, index); - if (!page) - goto fail; - kaddr = kmap(page); - if (aops->prepare_write(file, page, offset, offset+size)) - goto unlock; - flush_dcache_page(page); - transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV); - if (transfer_result) { - /* - * The transfer failed, but we still write the data to - * keep prepare/commit calls balanced. - */ - printk(KERN_ERR "loop: transfer error block %ld\n", index); - memset(kaddr + offset, 0, size); - } - if (aops->commit_write(file, page, offset, offset+size)) - goto unlock; - if (transfer_result) - goto unlock; - kunmap(page); - data += size; - len -= size; - offset = 0; - index++; - pos += size; - UnlockPage(page); - page_cache_release(page); - } - up(&mapping->host->i_sem); - return 0; + int y; -unlock: - kunmap(page); - UnlockPage(page); - page_cache_release(page); -fail: - up(&mapping->host->i_sem); - return -1; + if (get_option(&str, &y) == 1) + lo_nice = y; + return 1; } +__setup("lo_nice=", lo_nice_setup); +#endif -struct lo_read_data { - struct loop_device *lo; - char *data; - int bsize; -}; +typedef struct { + struct buffer_head **q0; + struct buffer_head **q1; + struct buffer_head **q2; + int x0; + int x1; + int x2; +} que_look_up_table; -static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) +static void loop_prealloc_cleanup(struct loop_device *lo) { - char *kaddr; - unsigned long count = desc->count; - struct lo_read_data *p = (struct lo_read_data*)desc->buf; - struct loop_device *lo = p->lo; - int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize; - - if (size > count) - size = count; + struct buffer_head *bh; - kaddr = kmap(page); - if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) { - size = 0; - printk(KERN_ERR "loop: transfer error block %ld\n",page->index); - desc->error = -EINVAL; + while ((bh = lo->lo_bh_free)) { + __free_page(bh->b_page); + lo->lo_bh_free = bh->b_reqnext; + bh->b_reqnext = NULL; + kmem_cache_free(bh_cachep, bh); } - kunmap(page); - - desc->count = count - size; - desc->written += size; - p->data += size; - return size; -} - -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) -{ - struct lo_read_data cookie; - read_descriptor_t desc; - struct file *file; - - cookie.lo = lo; - cookie.data = bh->b_data; - cookie.bsize = bsize; - desc.written = 0; - desc.count = bh->b_size; - desc.buf = (char*)&cookie; - desc.error = 0; - spin_lock_irq(&lo->lo_lock); - file = lo->lo_backing_file; - spin_unlock_irq(&lo->lo_lock); - do_generic_file_read(file, &pos, &desc, lo_read_actor); - return desc.error; } -static inline int loop_get_bs(struct loop_device *lo) +static int loop_prealloc_init(struct loop_device *lo, int y) { - int bs = 0; + struct buffer_head *bh; + int x; - if (blksize_size[MAJOR(lo->lo_device)]) - bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)]; - if (!bs) - bs = BLOCK_SIZE; + if(!y) { + y = lo_prealloc[0]; + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) { + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) { + y = lo_prealloc[x + 1]; + break; + } + } + } + lo->lo_bh_flsh = (y * 3) / 4; - return bs; + for (x = 0; x < y; x++) { + bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL); + if (!bh) { + loop_prealloc_cleanup(lo); + return 1; + } + bh->b_page = alloc_page(GFP_KERNEL); + if (!bh->b_page) { + bh->b_reqnext = NULL; + kmem_cache_free(bh_cachep, bh); + loop_prealloc_cleanup(lo); + return 1; + } + bh->b_reqnext = lo->lo_bh_free; + lo->lo_bh_free = bh; + } + return 0; } -static inline unsigned long loop_get_iv(struct loop_device *lo, - unsigned long sector) +static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q) { - int bs = loop_get_bs(lo); - unsigned long offset, IV; + unsigned long flags; - IV = sector / (bs >> 9) + lo->lo_offset / bs; - offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs; - if (offset >= bs) - IV++; + spin_lock_irqsave(&lo->lo_lock, flags); + if (*q) { + bh->b_reqnext = (*q)->b_reqnext; + (*q)->b_reqnext = bh; + } else { + bh->b_reqnext = bh; + } + *q = bh; + spin_unlock_irqrestore(&lo->lo_lock, flags); - return IV; + if (waitqueue_active(&lo->lo_bh_wait)) + wake_up_interruptible(&lo->lo_bh_wait); } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q) { - loff_t pos; - int ret; - - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; - - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); - else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); - - return ret; + spin_lock_irq(&lo->lo_lock); + if (*q) { + bh->b_reqnext = (*q)->b_reqnext; + (*q)->b_reqnext = bh; + } else { + bh->b_reqnext = bh; + *q = bh; + } + spin_unlock_irq(&lo->lo_lock); } -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate); -static void loop_put_buffer(struct buffer_head *bh) +static struct buffer_head *loop_get_bh(struct loop_device *lo, int *list_nr, + que_look_up_table *qt) { - /* - * check b_end_io, may just be a remapped bh and not an allocated one - */ - if (bh && bh->b_end_io == loop_end_io_transfer) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + struct buffer_head *bh = NULL, *last; + + spin_lock_irq(&lo->lo_lock); + if ((last = *qt->q0)) { + bh = last->b_reqnext; + if (bh == last) + *qt->q0 = NULL; + else + last->b_reqnext = bh->b_reqnext; + bh->b_reqnext = NULL; + *list_nr = qt->x0; + } else if ((last = *qt->q1)) { + bh = last->b_reqnext; + if (bh == last) + *qt->q1 = NULL; + else + last->b_reqnext = bh->b_reqnext; + bh->b_reqnext = NULL; + *list_nr = qt->x1; + } else if ((last = *qt->q2)) { + bh = last->b_reqnext; + if (bh == last) + *qt->q2 = NULL; + else + last->b_reqnext = bh->b_reqnext; + bh->b_reqnext = NULL; + *list_nr = qt->x2; } + spin_unlock_irq(&lo->lo_lock); + return bh; } -/* - * Add buffer_head to back of pending list - */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b) { unsigned long flags; + int wk; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; - } else - lo->lo_bh = lo->lo_bhtail = bh; + b->b_reqnext = lo->lo_bh_free; + lo->lo_bh_free = b; + wk = lo->lo_bh_need; spin_unlock_irqrestore(&lo->lo_lock, flags); - up(&lo->lo_bh_mutex); + if (wk && waitqueue_active(&lo->lo_bh_wait)) + wake_up_interruptible(&lo->lo_bh_wait); } -/* - * Grab first pending buffer - */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate) { - struct buffer_head *bh; - - spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - } - spin_unlock_irq(&lo->lo_lock); + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + struct buffer_head *rbh = bh->b_private; - return bh; + rbh->b_reqnext = NULL; + rbh->b_end_io(rbh, uptodate); + loop_put_buffer(lo, bh); + if (atomic_dec_and_test(&lo->lo_pending)) + wake_up_interruptible(&lo->lo_bh_wait); } -/* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs - * disabled) - */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate) { struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; - - rbh->b_end_io(rbh, uptodate); - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); - loop_put_buffer(bh); - } else - loop_add_bh(lo, bh); + if (!uptodate) + loop_end_io_transfer_wr(bh, uptodate); + else + loop_add_queue_last(lo, bh, &lo->lo_bh_que0); } static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) + struct buffer_head *rbh, int from_thread, int rw) { struct buffer_head *bh; + struct page *p; + unsigned long flags; - /* - * for xfer_funcs that can operate on the same bh, do that - */ - if (lo->lo_flags & LO_FLAGS_BH_REMAP) { - bh = rbh; - goto out_bh; + spin_lock_irqsave(&lo->lo_lock, flags); + bh = lo->lo_bh_free; + if (bh) { + lo->lo_bh_free = bh->b_reqnext; + if (from_thread) + lo->lo_bh_need = 0; + } else { + if (from_thread) + lo->lo_bh_need = 1; } + spin_unlock_irqrestore(&lo->lo_lock, flags); + if (!bh) + return (struct buffer_head *)0; - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; - - run_task_queue(&tq_disk); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); + p = bh->b_page; + memset(bh, 0, sizeof(struct buffer_head)); + bh->b_page = p; + bh->b_private = rbh; bh->b_size = rbh->b_size; bh->b_dev = rbh->b_rdev; + bh->b_rdev = lo->lo_device; bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bh->b_data = page_address(bh->b_page); + bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd; + bh->b_rsector = rbh->b_rsector + lo->lo_offs_sec; + init_waitqueue_head(&bh->b_wait); + + return bh; +} + +static int figure_loop_size(struct loop_device *lo) +{ + loff_t size, offs; + unsigned int x; + int err = 0; + kdev_t lodev = lo->lo_device; + + offs = lo->lo_offset; + if (S_ISREG(lo->lo_backing_file->f_dentry->d_inode->i_mode)) { + size = lo->lo_backing_file->f_dentry->d_inode->i_size; + } else { + offs &= ~((loff_t)511); + if (blk_size[MAJOR(lodev)]) + size = (loff_t)(blk_size[MAJOR(lodev)][MINOR(lodev)]) << BLOCK_SIZE_BITS; + else + size = 1024*1024*1024; /* unknown size */ + } + if ((offs > 0) && (offs < size)) { + size -= offs; + } else { + if (offs) + err = -EINVAL; + lo->lo_offset = 0; + lo->lo_offs_sec = lo->lo_iv_remove = 0; + } + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) { + size = lo->lo_sizelimit; + } else { + if (lo->lo_sizelimit) + err = -EINVAL; + lo->lo_sizelimit = 0; + } + size >>= BLOCK_SIZE_BITS; /* - * easy way out, although it does waste some memory for < PAGE_SIZE - * blocks... if highmem bounce buffering can get away with it, - * so can we :-) + * Unfortunately, if we want to do I/O on the device, + * the number of 1024-byte blocks has to fit into unsigned int */ - do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) - break; + x = (unsigned int)size; + if ((loff_t)x != size) { + err = -EFBIG; + size = 0; + } - run_task_queue(&tq_disk); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - } while (1); + loop_sizes[lo->lo_number] = size; + return err; +} - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_private = rbh; - init_waitqueue_head(&bh->b_wait); +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w) +{ + mm_segment_t fs; + int x, y, z; -out_bh: - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); - spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; - spin_unlock_irq(&lo->lo_lock); + y = 0; + do { + z = size - y; + fs = get_fs(); + set_fs(get_ds()); + if (w) { + x = file->f_op->write(file, buf + y, z, ppos); + set_fs(fs); + } else { + x = file->f_op->read(file, buf + y, z, ppos); + set_fs(fs); + if (!x) + return 1; + } + if (x < 0) { + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) { + run_task_queue(&tq_disk); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 2); + continue; + } + return 1; + } + y += x; + } while (y < size); + return 0; +} - return bh; +static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +{ + loff_t pos; + struct file *file = lo->lo_backing_file; + char *data, *buf; + unsigned int size, len; + unsigned long IV; + + pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + buf = page_address(lo->lo_bh_free->b_page); + len = bh->b_size; + data = bh_kmap(bh); + IV = bh->b_rsector; + if (!lo->lo_iv_remove) + IV += lo->lo_offs_sec; + while (len > 0) { + if (lo->lo_encrypt_type == LO_CRYPT_NONE) { + /* this code relies that NONE transfer is a no-op */ + buf = data; + } + size = PAGE_SIZE; + if (size > len) + size = len; + if (rw == WRITE) { + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) { + printk(KERN_ERR "loop%d: write transfer error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + if (loop_file_io(file, buf, size, &pos, 1)) { + printk(KERN_ERR "loop%d: write i/o error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + } else { + if (loop_file_io(file, buf, size, &pos, 0)) { + printk(KERN_ERR "loop%d: read i/o error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + if (lo_do_transfer(lo, READ, buf, data, size, IV)) { + printk(KERN_ERR "loop%d: read transfer error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + flush_dcache_page(bh->b_page); + } + data += size; + len -= size; + IV += size >> 9; + } + bh_kunmap(bh); + return 0; + +kunmap_and_out: + bh_kunmap(bh); + return 1; } static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) { - struct buffer_head *bh = NULL; + struct buffer_head *bh; struct loop_device *lo; - unsigned long IV; + char *md; + set_current_state(TASK_RUNNING); if (!buffer_locked(rbh)) BUG(); @@ -483,45 +1121,55 @@ } else if (rw == READA) { rw = READ; } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%d)\n", rw); + printk(KERN_ERR "loop%d: unknown command (%d)\n", lo->lo_number, rw); goto err; } - rbh = blk_queue_bounce(q, rw, rbh); - /* * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que0); return 0; } /* - * piggy old buffer on original, and submit for I/O + * device backed, just remap rdev & rsector for NONE transfer */ - bh = loop_get_buffer(lo, rbh); - IV = loop_get_iv(lo, rbh->b_rsector); + if (lo->lo_encrypt_type == LO_CRYPT_NONE) { + rbh->b_rsector += lo->lo_offs_sec; + rbh->b_rdev = lo->lo_device; + generic_make_request(rw, rbh); + if (atomic_dec_and_test(&lo->lo_pending)) + wake_up_interruptible(&lo->lo_bh_wait); + return 0; + } + + /* + * device backed, start reads and writes now if buffer available + */ + bh = loop_get_buffer(lo, rbh, 0, rw); + if (!bh) { + /* just queue request and let thread handle alloc later */ + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2); + return 0; + } if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + int trv; + md = bh_kmap(rbh); + trv = lo_do_transfer(lo, WRITE, bh->b_data, md, bh->b_size, bh->b_rsector - lo->lo_iv_remove); + bh_kunmap(rbh); + if (trv) { + loop_put_buffer(lo, bh); goto err; + } } - generic_make_request(rw, bh); return 0; err: if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + wake_up_interruptible(&lo->lo_bh_wait); out: buffer_IO_error(rbh); return 0; @@ -530,30 +1178,6 @@ goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) -{ - int ret; - - /* - * For block backed loop, we know this is a READ - */ - if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); - } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); - - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); - - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); - } -} - /* * worker thread that handles reads/writes to file backed loop devices, * to avoid blocking in our make_request_fn. it also does loop decrypting @@ -563,8 +1187,20 @@ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct buffer_head *bh, *xbh; + int x, rw, qi = 0, flushcnt = 0; + wait_queue_t waitq; + que_look_up_table qt[4] = { + { &lo->lo_bh_que0, &lo->lo_bh_que1, &lo->lo_bh_que2, 0, 1, 2 }, + { &lo->lo_bh_que2, &lo->lo_bh_que0, &lo->lo_bh_que1, 2, 0, 1 }, + { &lo->lo_bh_que0, &lo->lo_bh_que2, &lo->lo_bh_que1, 0, 2, 1 }, + { &lo->lo_bh_que1, &lo->lo_bh_que0, &lo->lo_bh_que2, 1, 0, 2 } + }; + char *md; + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS; + init_waitqueue_entry(&waitq, current); + memcpy(¤t->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim)); daemonize(); exit_files(current); reparent_to_init(); @@ -576,12 +1212,30 @@ flush_signals(current); spin_unlock_irq(¤t->sigmask_lock); + if (lo_nice > 0) + lo_nice = 0; + if (lo_nice < -20) + lo_nice = -20; +#if defined(DEF_NICE) && defined(DEF_COUNTER) + /* old scheduler syntax */ + current->policy = SCHED_OTHER; + current->nice = lo_nice; +#else + /* O(1) scheduler syntax */ + set_user_nice(current, lo_nice); +#endif + spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_bound; atomic_inc(&lo->lo_pending); spin_unlock_irq(&lo->lo_lock); current->flags |= PF_NOIO; +#if defined(PF_NOFREEZE) + current->flags |= PF_NOFREEZE; +#elif defined(PF_IOTHREAD) + current->flags |= PF_IOTHREAD; +#endif /* * up sem, we are running @@ -589,23 +1243,120 @@ up(&lo->lo_sem); for (;;) { - down_interruptible(&lo->lo_bh_mutex); + add_wait_queue(&lo->lo_bh_wait, &waitq); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (!atomic_read(&lo->lo_pending)) + break; + + x = 0; + spin_lock_irq(&lo->lo_lock); +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + if(lo->lo_keyscrub_fn) x = 1; +#endif + if (lo->lo_bh_que0) { + x = 1; + } else if (lo->lo_bh_que1 || lo->lo_bh_que2) { + /* file backed works too because lo->lo_bh_need == 0 */ + if (lo->lo_bh_free || !lo->lo_bh_need) + x = 1; + } + spin_unlock_irq(&lo->lo_lock); + if (x) + break; + + schedule(); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&lo->lo_bh_wait, &waitq); + +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + if(lo->lo_keyscrub_fn) { + (*lo->lo_keyscrub_fn)(lo->lo_keyscrub_ptr); + lo->lo_keyscrub_fn = 0; + } +#endif /* - * could be upped because of tear-down, not because of + * could be woken because of tear-down, not because of * pending work */ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + /* + * read queues using alternating order to prevent starvation + */ + bh = loop_get_bh(lo, &x, &qt[++qi & 3]); + if (!bh) + continue; + + /* + * x list tag usage(buffer-allocated) + * --- -------------- ----------------------- + * 0 lo->lo_bh_que0 dev-read(y) / file-read + * 1 lo->lo_bh_que1 dev-write(n) / file-write + * 2 lo->lo_bh_que2 dev-read(n) + */ + rw = (x == 1) ? WRITE : READ; + if ((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) { + /* loop_make_request didn't allocate a buffer, do that now */ + xbh = loop_get_buffer(lo, bh, 1, rw); + if (!xbh) { + run_task_queue(&tq_disk); + flushcnt = 0; + loop_add_queue_first(lo, bh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2); + /* lo->lo_bh_need should be 1 now, go back to sleep */ + continue; + } + if (rw == WRITE) { + int trv; + md = bh_kmap(bh); + trv = lo_do_transfer(lo, WRITE, xbh->b_data, md, xbh->b_size, xbh->b_rsector - lo->lo_iv_remove); + bh_kunmap(bh); + if (trv) { + loop_put_buffer(lo, xbh); + buffer_IO_error(bh); + atomic_dec(&lo->lo_pending); + continue; + } + } + generic_make_request(rw, xbh); + + /* start I/O if there are no more requests lacking buffers */ + x = 0; + spin_lock_irq(&lo->lo_lock); + if (!lo->lo_bh_que1 && !lo->lo_bh_que2) + x = 1; + spin_unlock_irq(&lo->lo_lock); + if (x || (++flushcnt >= lo->lo_bh_flsh)) { + run_task_queue(&tq_disk); + flushcnt = 0; + } + + /* request not completely processed yet */ continue; } - loop_handle_bh(lo, bh); + if (lo->lo_flags & LO_FLAGS_DO_BMAP) { + /* request is for file backed device */ + x = do_bh_filebacked(lo, bh, rw); + bh->b_reqnext = NULL; + bh->b_end_io(bh, !x); + } else { + /* device backed read has completed, do decrypt now */ + xbh = bh->b_private; + /* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */ + /* instead, recompute IV from original request */ + md = bh_kmap(xbh); + x = lo_do_transfer(lo, READ, bh->b_data, md, bh->b_size, xbh->b_rsector + lo->lo_offs_sec - lo->lo_iv_remove); + flush_dcache_page(xbh->b_page); + bh_kunmap(xbh); + xbh->b_reqnext = NULL; + xbh->b_end_io(xbh, !x); + loop_put_buffer(lo, bh); + } /* - * upped both for pending work and tear-down, lo_pending + * woken both for pending work and tear-down, lo_pending * will hit zero then */ if (atomic_dec_and_test(&lo->lo_pending)) @@ -616,15 +1367,34 @@ return 0; } +static void loop_set_softblksz(struct loop_device *lo, kdev_t dev) +{ + int bs = 0, x; + + if (blksize_size[MAJOR(lo->lo_device)]) + bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)]; + if (!bs) + bs = BLOCK_SIZE; + if (lo->lo_flags & LO_FLAGS_DO_BMAP) { + x = loop_sizes[lo->lo_number]; + if ((bs == 8192) && (x & 7)) + bs = 4096; + if ((bs == 4096) && (x & 3)) + bs = 2048; + if ((bs == 2048) && (x & 1)) + bs = 1024; + } + set_blocksize(dev, bs); +} + static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev, unsigned int arg) { struct file *file; struct inode *inode; kdev_t lo_device; - int lo_flags = 0; + int lo_flags = 0, hardsz = 512; int error; - int bs; MOD_INC_USE_COUNT; @@ -643,33 +1413,49 @@ if (!(file->f_mode & FMODE_WRITE)) lo_flags |= LO_FLAGS_READ_ONLY; +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + lo->lo_keyscrub_fn = 0; +#endif + lo->lo_offset = lo->lo_sizelimit = 0; + lo->lo_offs_sec = lo->lo_iv_remove = 0; + lo->lo_bh_free = lo->lo_bh_que2 = lo->lo_bh_que1 = lo->lo_bh_que0 = NULL; + lo->lo_bh_need = lo->lo_bh_flsh = 0; + init_waitqueue_head(&lo->lo_bh_wait); if (S_ISBLK(inode->i_mode)) { lo_device = inode->i_rdev; if (lo_device == dev) { error = -EBUSY; goto out_putf; } + if (loop_prealloc_init(lo, 0)) { + error = -ENOMEM; + goto out_putf; + } + hardsz = get_hardsect_size(lo_device); } else if (S_ISREG(inode->i_mode)) { - struct address_space_operations *aops = inode->i_mapping->a_ops; /* * If we can't read - sorry. If we only can't write - well, * it's going to be read-only. */ - if (!aops->readpage) + if (!file->f_op || !file->f_op->read) goto out_putf; - if (!aops->prepare_write || !aops->commit_write) + if (!file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; lo_device = inode->i_dev; lo_flags |= LO_FLAGS_DO_BMAP; + if (loop_prealloc_init(lo, 1)) { + error = -ENOMEM; + goto out_putf; + } error = 0; } else goto out_putf; get_file(file); - if (IS_RDONLY (inode) || is_read_only(lo_device) + if ((S_ISREG(inode->i_mode) && IS_RDONLY(inode)) || is_read_only(lo_device) || !(lo_file->f_mode & FMODE_WRITE)) lo_flags |= LO_FLAGS_READ_ONLY; @@ -677,28 +1463,40 @@ lo->lo_device = lo_device; lo->lo_flags = lo_flags; + if(lo_flags & LO_FLAGS_READ_ONLY) + lo->lo_flags |= 0x200000; /* export to user space */ lo->lo_backing_file = file; lo->transfer = NULL; lo->ioctl = NULL; - figure_loop_size(lo); - lo->old_gfp_mask = inode->i_mapping->gfp_mask; - inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS); - - bs = 0; - if (blksize_size[MAJOR(lo_device)]) - bs = blksize_size[MAJOR(lo_device)][MINOR(lo_device)]; - if (!bs) - bs = BLOCK_SIZE; + if (figure_loop_size(lo)) { + error = -EFBIG; + goto out_cleanup; + } - set_blocksize(dev, bs); + if (lo_flags & LO_FLAGS_DO_BMAP) { + lo->old_gfp_mask = inode->i_mapping->gfp_mask; + inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS); + inode->i_mapping->gfp_mask |= __GFP_HIGH; + } else { + lo->old_gfp_mask = -1; + } - lo->lo_bh = lo->lo_bhtail = NULL; - kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - down(&lo->lo_sem); + loop_hardsizes[MINOR(dev)] = hardsz; + loop_set_softblksz(lo, dev); + error = kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + if(error < 0) + goto out_mapping; + down(&lo->lo_sem); fput(file); return 0; + out_mapping: + if(lo->old_gfp_mask != -1) + inode->i_mapping->gfp_mask = lo->old_gfp_mask; + out_cleanup: + loop_prealloc_cleanup(lo); + fput(file); out_putf: fput(file); out: @@ -708,13 +1506,14 @@ static int loop_release_xfer(struct loop_device *lo) { - int err = 0; + int err = 0; if (lo->lo_encrypt_type) { - struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type]; + struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type]; + lo->transfer = NULL; if (xfer && xfer->release) - err = xfer->release(lo); + err = xfer->release(lo); if (xfer && xfer->unlock) - xfer->unlock(lo); + xfer->unlock(lo); lo->lo_encrypt_type = 0; } return err; @@ -722,19 +1521,19 @@ static int loop_init_xfer(struct loop_device *lo, int type,struct loop_info *i) { - int err = 0; + int err = 0; if (type) { - struct loop_func_table *xfer = xfer_funcs[type]; + struct loop_func_table *xfer = xfer_funcs[type]; if (xfer->init) err = xfer->init(lo, i); - if (!err) { + if (!err) { lo->lo_encrypt_type = type; if (xfer->lock) xfer->lock(lo); } } return err; -} +} static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) { @@ -751,11 +1550,12 @@ spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); + wake_up_interruptible(&lo->lo_bh_wait); spin_unlock_irq(&lo->lo_lock); down(&lo->lo_sem); + loop_prealloc_cleanup(lo); lo->lo_backing_file = NULL; loop_release_xfer(lo); @@ -763,87 +1563,219 @@ lo->ioctl = NULL; lo->lo_device = 0; lo->lo_encrypt_type = 0; - lo->lo_offset = 0; +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + lo->lo_keyscrub_fn = 0; +#endif + lo->lo_offset = lo->lo_sizelimit = 0; + lo->lo_offs_sec = lo->lo_iv_remove = 0; lo->lo_encrypt_key_size = 0; lo->lo_flags = 0; memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_name, 0, LO_NAME_SIZE); + memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); loop_sizes[lo->lo_number] = 0; invalidate_bdev(bdev, 0); - filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp; + if (gfp != -1) + filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp; lo->lo_state = Lo_unbound; fput(filp); MOD_DEC_USE_COUNT; return 0; } -static int loop_set_status(struct loop_device *lo, struct loop_info *arg) +static void +loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) +{ + memset(info64, 0, sizeof(*info64)); + info64->lo_number = info->lo_number; + info64->lo_device = info->lo_device; + info64->lo_inode = info->lo_inode; + info64->lo_rdevice = info->lo_rdevice; + info64->lo_offset = info->lo_offset; + info64->lo_encrypt_type = info->lo_encrypt_type; + info64->lo_encrypt_key_size = info->lo_encrypt_key_size; + info64->lo_flags = info->lo_flags; + info64->lo_init[0] = info->lo_init[0]; + info64->lo_init[1] = info->lo_init[1]; + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */ + memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); + else + memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); + memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); +} + +static int +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info) +{ + memset(info, 0, sizeof(*info)); + info->lo_number = info64->lo_number; + info->lo_device = info64->lo_device; + info->lo_inode = info64->lo_inode; + info->lo_rdevice = info64->lo_rdevice; + info->lo_offset = info64->lo_offset; + info->lo_encrypt_type = info64->lo_encrypt_type; + info->lo_encrypt_key_size = info64->lo_encrypt_key_size; + info->lo_flags = info64->lo_flags; + info->lo_init[0] = info64->lo_init[0]; + info->lo_init[1] = info64->lo_init[1]; + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */ + memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); + else + memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); + memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); + + /* error in case values were truncated */ + if (info->lo_device != info64->lo_device || + info->lo_rdevice != info64->lo_rdevice || + info->lo_inode != info64->lo_inode || + info->lo_offset != info64->lo_offset || + info64->lo_sizelimit) + return -EOVERFLOW; + + return 0; +} + +static int loop_set_status(struct loop_device *lo, kdev_t dev, struct loop_info64 *info, struct loop_info *oldinfo) { - struct loop_info info; int err; unsigned int type; - if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid && + if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN)) return -EPERM; if (lo->lo_state != Lo_bound) return -ENXIO; - if (copy_from_user(&info, arg, sizeof (struct loop_info))) - return -EFAULT; - if ((unsigned int) info.lo_encrypt_key_size > LO_KEY_SIZE) + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; - type = info.lo_encrypt_type; + type = info->lo_encrypt_type; if (type >= MAX_LO_CRYPT || xfer_funcs[type] == NULL) return -EINVAL; - if (type == LO_CRYPT_XOR && info.lo_encrypt_key_size == 0) + if (type == LO_CRYPT_XOR && info->lo_encrypt_key_size == 0) return -EINVAL; err = loop_release_xfer(lo); - if (!err) - err = loop_init_xfer(lo, type, &info); if (err) return err; - lo->lo_offset = info.lo_offset; - strncpy(lo->lo_name, info.lo_name, LO_NAME_SIZE); + if ((loff_t)info->lo_offset < 0) { + /* negative offset == remove offset from IV computations */ + lo->lo_offset = -(info->lo_offset); + lo->lo_iv_remove = lo->lo_offset >> 9; + } else { + /* positive offset == include offset in IV computations */ + lo->lo_offset = info->lo_offset; + lo->lo_iv_remove = 0; + } + lo->lo_offs_sec = lo->lo_offset >> 9; + lo->lo_sizelimit = info->lo_sizelimit; + err = figure_loop_size(lo); + if (err) + return err; + loop_set_softblksz(lo, dev); + /* transfer init function for 2.4 kernels takes old style struct */ + err = loop_init_xfer(lo, type, oldinfo); + /* copy key -- just in case transfer init func modified it */ + memcpy(info->lo_encrypt_key, oldinfo->lo_encrypt_key, sizeof(info->lo_encrypt_key)); + if (err) + return err; + + strncpy(lo->lo_name, info->lo_file_name, LO_NAME_SIZE); + strncpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); lo->transfer = xfer_funcs[type]->transfer; lo->ioctl = xfer_funcs[type]->ioctl; - lo->lo_encrypt_key_size = info.lo_encrypt_key_size; - lo->lo_init[0] = info.lo_init[0]; - lo->lo_init[1] = info.lo_init[1]; - if (info.lo_encrypt_key_size) { - memcpy(lo->lo_encrypt_key, info.lo_encrypt_key, - info.lo_encrypt_key_size); - lo->lo_key_owner = current->uid; - } - figure_loop_size(lo); + lo->lo_encrypt_key_size = info->lo_encrypt_key_size; + lo->lo_init[0] = info->lo_init[0]; + lo->lo_init[1] = info->lo_init[1]; + if (info->lo_encrypt_key_size) { + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, + info->lo_encrypt_key_size); + lo->lo_key_owner = current->uid; + } + return 0; } -static int loop_get_status(struct loop_device *lo, struct loop_info *arg) +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { - struct loop_info info; struct file *file = lo->lo_backing_file; if (lo->lo_state != Lo_bound) return -ENXIO; - if (!arg) - return -EINVAL; - memset(&info, 0, sizeof(info)); - info.lo_number = lo->lo_number; - info.lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev); - info.lo_inode = file->f_dentry->d_inode->i_ino; - info.lo_rdevice = kdev_t_to_nr(lo->lo_device); - info.lo_offset = lo->lo_offset; - info.lo_flags = lo->lo_flags; - strncpy(info.lo_name, lo->lo_name, LO_NAME_SIZE); - info.lo_encrypt_type = lo->lo_encrypt_type; + memset(info, 0, sizeof(*info)); + info->lo_number = lo->lo_number; + info->lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev); + info->lo_inode = file->f_dentry->d_inode->i_ino; + info->lo_rdevice = kdev_t_to_nr(lo->lo_device); + info->lo_offset = lo->lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset; + info->lo_sizelimit = lo->lo_sizelimit; + info->lo_flags = lo->lo_flags; + strncpy(info->lo_file_name, lo->lo_name, LO_NAME_SIZE); + strncpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); + info->lo_encrypt_type = lo->lo_encrypt_type; if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { - info.lo_encrypt_key_size = lo->lo_encrypt_key_size; - memcpy(info.lo_encrypt_key, lo->lo_encrypt_key, + info->lo_encrypt_key_size = lo->lo_encrypt_key_size; + memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, lo->lo_encrypt_key_size); + info->lo_init[0] = lo->lo_init[0]; + info->lo_init[1] = lo->lo_init[1]; } - return copy_to_user(arg, &info, sizeof(info)) ? -EFAULT : 0; + return 0; +} + +static int +loop_set_status_n(struct loop_device *lo, kdev_t dev, void *arg, int n) +{ + struct loop_info info; + struct loop_info64 info64; + int err; + + if (n) { + if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) + return -EFAULT; + /* truncation errors can be ignored here as transfer init func only wants key bits */ + loop_info64_to_old(&info64, &info); + } else { + if (copy_from_user(&info, arg, sizeof (struct loop_info))) + return -EFAULT; + loop_info64_from_old(&info, &info64); + } + err = loop_set_status(lo, dev, &info64, &info); + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key)); + memset(&info64.lo_encrypt_key[0], 0, sizeof(info64.lo_encrypt_key)); + return err; +} + +static int +loop_get_status_old(struct loop_device *lo, struct loop_info *arg) { + struct loop_info info; + struct loop_info64 info64; + int err = 0; + + if (!arg) + err = -EINVAL; + if (!err) + err = loop_get_status(lo, &info64); + if (!err) + err = loop_info64_to_old(&info64, &info); + if (!err && copy_to_user(arg, &info, sizeof(info))) + err = -EFAULT; + + return err; +} + +static int +loop_get_status64(struct loop_device *lo, struct loop_info64 *arg) { + struct loop_info64 info64; + int err = 0; + + if (!arg) + err = -EINVAL; + if (!err) + err = loop_get_status(lo, &info64); + if (!err && copy_to_user(arg, &info64, sizeof(info64))) + err = -EFAULT; + + return err; } static int lo_ioctl(struct inode * inode, struct file * file, @@ -872,10 +1804,16 @@ err = loop_clr_fd(lo, inode->i_bdev); break; case LOOP_SET_STATUS: - err = loop_set_status(lo, (struct loop_info *) arg); + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 0); break; case LOOP_GET_STATUS: - err = loop_get_status(lo, (struct loop_info *) arg); + err = loop_get_status_old(lo, (struct loop_info *) arg); + break; + case LOOP_SET_STATUS64: + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 1); + break; + case LOOP_GET_STATUS64: + err = loop_get_status64(lo, (struct loop_info64 *) arg); break; case BLKGETSIZE: if (lo->lo_state != Lo_bound) { @@ -894,6 +1832,8 @@ case BLKBSZGET: case BLKBSZSET: case BLKSSZGET: + case BLKROGET: + case BLKROSET: err = blk_ioctl(inode->i_rdev, cmd, arg); break; default: @@ -906,7 +1846,7 @@ static int lo_open(struct inode *inode, struct file *file) { struct loop_device *lo; - int dev, type; + int dev; if (!inode) return -EINVAL; @@ -921,10 +1861,6 @@ lo = &loop_dev[dev]; MOD_INC_USE_COUNT; down(&lo->lo_ctl_mutex); - - type = lo->lo_encrypt_type; - if (type && xfer_funcs[type] && xfer_funcs[type]->lock) - xfer_funcs[type]->lock(lo); lo->lo_refcnt++; up(&lo->lo_ctl_mutex); return 0; @@ -933,7 +1869,7 @@ static int lo_release(struct inode *inode, struct file *file) { struct loop_device *lo; - int dev, type; + int dev; if (!inode) return 0; @@ -948,11 +1884,7 @@ lo = &loop_dev[dev]; down(&lo->lo_ctl_mutex); - type = lo->lo_encrypt_type; --lo->lo_refcnt; - if (xfer_funcs[type] && xfer_funcs[type]->unlock) - xfer_funcs[type]->unlock(lo); - up(&lo->lo_ctl_mutex); MOD_DEC_USE_COUNT; return 0; @@ -974,34 +1906,32 @@ int loop_register_transfer(struct loop_func_table *funcs) { - if ((unsigned)funcs->number > MAX_LO_CRYPT || xfer_funcs[funcs->number]) + if ((unsigned)funcs->number >= MAX_LO_CRYPT || xfer_funcs[funcs->number]) return -EINVAL; xfer_funcs[funcs->number] = funcs; - return 0; + return 0; } int loop_unregister_transfer(int number) { - struct loop_device *lo; + struct loop_device *lo; if ((unsigned)number >= MAX_LO_CRYPT) - return -EINVAL; - for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) { + return -EINVAL; + for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) { int type = lo->lo_encrypt_type; - if (type == number) { - xfer_funcs[type]->release(lo); - lo->transfer = NULL; - lo->lo_encrypt_type = 0; + if (type == number) { + loop_release_xfer(lo); } } - xfer_funcs[number] = NULL; - return 0; + xfer_funcs[number] = NULL; + return 0; } EXPORT_SYMBOL(loop_register_transfer); EXPORT_SYMBOL(loop_unregister_transfer); -int __init loop_init(void) +int __init loop_init(void) { int i; @@ -1017,10 +1947,9 @@ return -EIO; } - loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL); if (!loop_dev) - return -ENOMEM; + goto out_dev; loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_sizes) @@ -1030,6 +1959,10 @@ if (!loop_blksizes) goto out_blksizes; + loop_hardsizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); + if (!loop_hardsizes) + goto out_hardsizes; + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); for (i = 0; i < max_loop; i++) { @@ -1037,45 +1970,86 @@ memset(lo, 0, sizeof(struct loop_device)); init_MUTEX(&lo->lo_ctl_mutex); init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); } memset(loop_sizes, 0, max_loop * sizeof(int)); memset(loop_blksizes, 0, max_loop * sizeof(int)); + memset(loop_hardsizes, 0, max_loop * sizeof(int)); blk_size[MAJOR_NR] = loop_sizes; blksize_size[MAJOR_NR] = loop_blksizes; + hardsect_size[MAJOR_NR] = loop_hardsizes; for (i = 0; i < max_loop; i++) register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0); + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) { + if (!lo_prealloc[i]) + continue; + if (lo_prealloc[i] < LO_PREALLOC_MIN) + lo_prealloc[i] = LO_PREALLOC_MIN; + if (lo_prealloc[i] > LO_PREALLOC_MAX) + lo_prealloc[i] = LO_PREALLOC_MAX; + } + +#if defined(IOCTL32_COMPATIBLE_PTR) + lock_kernel(); + register_ioctl32_conversion(LOOP_SET_STATUS64, IOCTL32_COMPATIBLE_PTR); + register_ioctl32_conversion(LOOP_GET_STATUS64, IOCTL32_COMPATIBLE_PTR); + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP, IOCTL32_COMPATIBLE_PTR); + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP_V3, IOCTL32_COMPATIBLE_PTR); + unlock_kernel(); +#endif + devfs_handle = devfs_mk_dir(NULL, "loop", NULL); devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT, MAJOR_NR, 0, S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, &lo_fops, NULL); +#if CONFIG_BLK_DEV_LOOP_AES +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + printk(KERN_INFO "loop: AES key scrubbing enabled\n"); +#endif +#endif printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; +out_hardsizes: + kfree(loop_blksizes); out_blksizes: kfree(loop_sizes); out_sizes: kfree(loop_dev); +out_dev: if (devfs_unregister_blkdev(MAJOR_NR, "loop")) printk(KERN_WARNING "loop: cannot unregister blkdev\n"); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; } -void loop_exit(void) +void loop_exit(void) { devfs_unregister(devfs_handle); if (devfs_unregister_blkdev(MAJOR_NR, "loop")) printk(KERN_WARNING "loop: cannot unregister blkdev\n"); + + blk_size[MAJOR_NR] = 0; + blksize_size[MAJOR_NR] = 0; + hardsect_size[MAJOR_NR] = 0; kfree(loop_dev); kfree(loop_sizes); kfree(loop_blksizes); + kfree(loop_hardsizes); + +#if defined(IOCTL32_COMPATIBLE_PTR) + lock_kernel(); + unregister_ioctl32_conversion(LOOP_SET_STATUS64); + unregister_ioctl32_conversion(LOOP_GET_STATUS64); + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP); + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP_V3); + unlock_kernel(); +#endif } module_init(loop_init); @@ -1089,4 +2063,15 @@ } __setup("max_loop=", max_loop_setup); +#endif + +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB +void loop_add_keyscrub_fn(struct loop_device *lo, void (*fn)(void *), void *ptr) +{ + lo->lo_keyscrub_ptr = ptr; + wmb(); + lo->lo_keyscrub_fn = fn; + wake_up_interruptible(&lo->lo_bh_wait); +} +EXPORT_SYMBOL(loop_add_keyscrub_fn); #endif diff -urN linux-2.4.28/drivers/misc/Makefile linux-2.4.28-loop-AES-v3.0b/drivers/misc/Makefile --- linux-2.4.28/drivers/misc/Makefile Sat Dec 1 18:27:13 2001 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/Makefile Sun Feb 6 18:45:39 2005 @@ -9,7 +9,34 @@ # parent makes.. # +.S.o: + $(CC) $(AFLAGS) $(AFLAGS_$@) -c $< -o $*.o + O_TARGET := misc.o + +ifeq ($(CONFIG_BLK_DEV_LOOP_AES),y) +AES_X86_ASM=n +ifeq ($(CONFIG_X86),y) +ifneq ($(CONFIG_X86_64),y) + AES_X86_ASM=y +endif +endif +ifeq ($(AES_X86_ASM),y) + export-objs += crypto-ksym.o + obj-y += aes-x86.o md5-x86.o crypto-ksym.o + AFLAGS_aes-x86.o := -DUSE_UNDERLINE=1 +else +ifeq ($(CONFIG_X86_64),y) + export-objs += crypto-ksym.o + obj-y += aes-amd64.o md5-amd64.o crypto-ksym.o + AFLAGS_aes-amd64.o := -DUSE_UNDERLINE=1 +else + export-objs += crypto-ksym.o + obj-y += aes.o md5.o crypto-ksym.o + CFLAGS_aes.o := -DDATA_ALWAYS_ALIGNED=1 +endif +endif +endif include $(TOPDIR)/Rules.make diff -urN linux-2.4.28/drivers/misc/aes-amd64.S linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes-amd64.S --- linux-2.4.28/drivers/misc/aes-amd64.S Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes-amd64.S Sun Feb 6 18:45:39 2005 @@ -0,0 +1,893 @@ +// +// Copyright (c) 2001, Dr Brian Gladman , Worcester, UK. +// All rights reserved. +// +// TERMS +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted subject to the following conditions: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The copyright holder's name must not be used to endorse or promote +// any products derived from this software without his specific prior +// written permission. +// +// This software is provided 'as is' with no express or implied warranties +// of correctness or fitness for purpose. + +// Modified by Jari Ruusu, December 24 2001 +// - Converted syntax to GNU CPP/assembler syntax +// - C programming interface converted back to "old" API +// - Minor portability cleanups and speed optimizations + +// Modified by Jari Ruusu, April 11 2002 +// - Added above copyright and terms to resulting object code so that +// binary distributions can avoid legal trouble + +// Modified by Jari Ruusu, June 12 2004 +// - Converted 32 bit x86 code to 64 bit AMD64 code +// - Re-wrote encrypt and decrypt code from scratch + +// An AES (Rijndael) implementation for the AMD64. This version only +// implements the standard AES block length (128 bits, 16 bytes). This code +// does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 registers or the +// artihmetic status flags. However, the rbx, rbp and r12-r15 registers are +// preserved across calls. + +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f) +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) + +#if defined(USE_UNDERLINE) +# define aes_set_key _aes_set_key +# define aes_encrypt _aes_encrypt +# define aes_decrypt _aes_decrypt +#endif +#if !defined(ALIGN64BYTES) +# define ALIGN64BYTES 64 +#endif + + .file "aes-amd64.S" + .globl aes_set_key + .globl aes_encrypt + .globl aes_decrypt + + .section .rodata +copyright: + .ascii " \000" + .ascii "Copyright (c) 2001, Dr Brian Gladman , Worcester, UK.\000" + .ascii "All rights reserved.\000" + .ascii " \000" + .ascii "TERMS\000" + .ascii " \000" + .ascii " Redistribution and use in source and binary forms, with or without\000" + .ascii " modification, are permitted subject to the following conditions:\000" + .ascii " \000" + .ascii " 1. Redistributions of source code must retain the above copyright\000" + .ascii " notice, this list of conditions and the following disclaimer.\000" + .ascii " \000" + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000" + .ascii " notice, this list of conditions and the following disclaimer in the\000" + .ascii " documentation and/or other materials provided with the distribution.\000" + .ascii " \000" + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000" + .ascii " any products derived from this software without his specific prior\000" + .ascii " written permission.\000" + .ascii " \000" + .ascii " This software is provided 'as is' with no express or implied warranties\000" + .ascii " of correctness or fitness for purpose.\000" + .ascii " \000" + +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) + +// offsets in context structure + +#define nkey 0 // key length, size 4 +#define nrnd 4 // number of rounds, size 4 +#define ekey 8 // encryption key schedule base address, size 256 +#define dkey 264 // decryption key schedule base address, size 256 + +// This macro performs a forward encryption cycle. It is entered with +// the first previous round column values in I1E, I2E, I3E and I4E and +// exits with the final values OU1, OU2, OU3 and OU4 registers. + +#define fwd_rnd(p1,p2,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \ + movl p2(%rbp),OU1 ;\ + movl p2+4(%rbp),OU2 ;\ + movl p2+8(%rbp),OU3 ;\ + movl p2+12(%rbp),OU4 ;\ + movzbl I1B,%edi ;\ + movzbl I2B,%esi ;\ + movzbl I3B,%r8d ;\ + movzbl I4B,%r13d ;\ + shrl $8,I3E ;\ + shrl $8,I4E ;\ + xorl p1(,%rdi,4),OU1 ;\ + xorl p1(,%rsi,4),OU2 ;\ + xorl p1(,%r8,4),OU3 ;\ + xorl p1(,%r13,4),OU4 ;\ + movzbl I2H,%esi ;\ + movzbl I3B,%r8d ;\ + movzbl I4B,%r13d ;\ + movzbl I1H,%edi ;\ + shrl $8,I3E ;\ + shrl $8,I4E ;\ + xorl p1+tlen(,%rsi,4),OU1 ;\ + xorl p1+tlen(,%r8,4),OU2 ;\ + xorl p1+tlen(,%r13,4),OU3 ;\ + xorl p1+tlen(,%rdi,4),OU4 ;\ + shrl $16,I1E ;\ + shrl $16,I2E ;\ + movzbl I3B,%r8d ;\ + movzbl I4B,%r13d ;\ + movzbl I1B,%edi ;\ + movzbl I2B,%esi ;\ + xorl p1+2*tlen(,%r8,4),OU1 ;\ + xorl p1+2*tlen(,%r13,4),OU2 ;\ + xorl p1+2*tlen(,%rdi,4),OU3 ;\ + xorl p1+2*tlen(,%rsi,4),OU4 ;\ + shrl $8,I4E ;\ + movzbl I1H,%edi ;\ + movzbl I2H,%esi ;\ + shrl $8,I3E ;\ + xorl p1+3*tlen(,I4R,4),OU1 ;\ + xorl p1+3*tlen(,%rdi,4),OU2 ;\ + xorl p1+3*tlen(,%rsi,4),OU3 ;\ + xorl p1+3*tlen(,I3R,4),OU4 + +// This macro performs an inverse encryption cycle. It is entered with +// the first previous round column values in I1E, I2E, I3E and I4E and +// exits with the final values OU1, OU2, OU3 and OU4 registers. + +#define inv_rnd(p1,p2,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \ + movl p2+12(%rbp),OU4 ;\ + movl p2+8(%rbp),OU3 ;\ + movl p2+4(%rbp),OU2 ;\ + movl p2(%rbp),OU1 ;\ + movzbl I4B,%edi ;\ + movzbl I3B,%esi ;\ + movzbl I2B,%r8d ;\ + movzbl I1B,%r13d ;\ + shrl $8,I2E ;\ + shrl $8,I1E ;\ + xorl p1(,%rdi,4),OU4 ;\ + xorl p1(,%rsi,4),OU3 ;\ + xorl p1(,%r8,4),OU2 ;\ + xorl p1(,%r13,4),OU1 ;\ + movzbl I3H,%esi ;\ + movzbl I2B,%r8d ;\ + movzbl I1B,%r13d ;\ + movzbl I4H,%edi ;\ + shrl $8,I2E ;\ + shrl $8,I1E ;\ + xorl p1+tlen(,%rsi,4),OU4 ;\ + xorl p1+tlen(,%r8,4),OU3 ;\ + xorl p1+tlen(,%r13,4),OU2 ;\ + xorl p1+tlen(,%rdi,4),OU1 ;\ + shrl $16,I4E ;\ + shrl $16,I3E ;\ + movzbl I2B,%r8d ;\ + movzbl I1B,%r13d ;\ + movzbl I4B,%edi ;\ + movzbl I3B,%esi ;\ + xorl p1+2*tlen(,%r8,4),OU4 ;\ + xorl p1+2*tlen(,%r13,4),OU3 ;\ + xorl p1+2*tlen(,%rdi,4),OU2 ;\ + xorl p1+2*tlen(,%rsi,4),OU1 ;\ + shrl $8,I1E ;\ + movzbl I4H,%edi ;\ + movzbl I3H,%esi ;\ + shrl $8,I2E ;\ + xorl p1+3*tlen(,I1R,4),OU4 ;\ + xorl p1+3*tlen(,%rdi,4),OU3 ;\ + xorl p1+3*tlen(,%rsi,4),OU2 ;\ + xorl p1+3*tlen(,I2R,4),OU1 + +// AES (Rijndael) Encryption Subroutine + +// rdi = pointer to AES context +// rsi = pointer to input plaintext bytes +// rdx = pointer to output ciphertext bytes + + .text + .align ALIGN64BYTES +aes_encrypt: + movl (%rsi),%eax // read in plaintext + movl 4(%rsi),%ecx + movl 8(%rsi),%r10d + movl 12(%rsi),%r11d + + pushq %rbp + leaq ekey+16(%rdi),%rbp // encryption key pointer + movq %rdx,%r9 // pointer to out block + movl nrnd(%rdi),%edx // number of rounds + pushq %rbx + pushq %r13 + pushq %r14 + pushq %r15 + + xorl -16(%rbp),%eax // xor in first round key + xorl -12(%rbp),%ecx + xorl -8(%rbp),%r10d + xorl -4(%rbp),%r11d + + subl $10,%edx + je aes_15 + addq $32,%rbp + subl $2,%edx + je aes_13 + addq $32,%rbp + + fwd_rnd(aes_ft_tab,-64,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) + fwd_rnd(aes_ft_tab,-48,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) + jmp aes_13 + .align ALIGN64BYTES +aes_13: fwd_rnd(aes_ft_tab,-32,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) + fwd_rnd(aes_ft_tab,-16,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) + jmp aes_15 + .align ALIGN64BYTES +aes_15: fwd_rnd(aes_ft_tab,0, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) + fwd_rnd(aes_ft_tab,16, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) + fwd_rnd(aes_ft_tab,32, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) + fwd_rnd(aes_ft_tab,48, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) + fwd_rnd(aes_ft_tab,64, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) + fwd_rnd(aes_ft_tab,80, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) + fwd_rnd(aes_ft_tab,96, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) + fwd_rnd(aes_ft_tab,112,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) + fwd_rnd(aes_ft_tab,128,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) + fwd_rnd(aes_fl_tab,144,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) + + popq %r15 + popq %r14 + popq %r13 + popq %rbx + popq %rbp + + movl %eax,(%r9) // move final values to the output array. + movl %ecx,4(%r9) + movl %r10d,8(%r9) + movl %r11d,12(%r9) + ret + +// AES (Rijndael) Decryption Subroutine + +// rdi = pointer to AES context +// rsi = pointer to input ciphertext bytes +// rdx = pointer to output plaintext bytes + + .align ALIGN64BYTES +aes_decrypt: + movl 12(%rsi),%eax // read in ciphertext + movl 8(%rsi),%ecx + movl 4(%rsi),%r10d + movl (%rsi),%r11d + + pushq %rbp + leaq dkey+16(%rdi),%rbp // decryption key pointer + movq %rdx,%r9 // pointer to out block + movl nrnd(%rdi),%edx // number of rounds + pushq %rbx + pushq %r13 + pushq %r14 + pushq %r15 + + xorl -4(%rbp),%eax // xor in first round key + xorl -8(%rbp),%ecx + xorl -12(%rbp),%r10d + xorl -16(%rbp),%r11d + + subl $10,%edx + je aes_25 + addq $32,%rbp + subl $2,%edx + je aes_23 + addq $32,%rbp + + inv_rnd(aes_it_tab,-64,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) + inv_rnd(aes_it_tab,-48,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) + jmp aes_23 + .align ALIGN64BYTES +aes_23: inv_rnd(aes_it_tab,-32,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) + inv_rnd(aes_it_tab,-16,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) + jmp aes_25 + .align ALIGN64BYTES +aes_25: inv_rnd(aes_it_tab,0, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) + inv_rnd(aes_it_tab,16, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) + inv_rnd(aes_it_tab,32, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) + inv_rnd(aes_it_tab,48, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) + inv_rnd(aes_it_tab,64, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) + inv_rnd(aes_it_tab,80, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) + inv_rnd(aes_it_tab,96, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) + inv_rnd(aes_it_tab,112,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) + inv_rnd(aes_it_tab,128,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) + inv_rnd(aes_il_tab,144,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) + + popq %r15 + popq %r14 + popq %r13 + popq %rbx + popq %rbp + + movl %eax,12(%r9) // move final values to the output array. + movl %ecx,8(%r9) + movl %r10d,4(%r9) + movl %r11d,(%r9) + ret + +// AES (Rijndael) Key Schedule Subroutine + +// This macro performs a column mixing operation on an input 32-bit +// word to give a 32-bit result. It uses each of the 4 bytes in the +// the input column to index 4 different tables of 256 32-bit words +// that are xored together to form the output value. + +#define mix_col(p1) \ + movzbl %bl,%ecx ;\ + movl p1(,%rcx,4),%eax ;\ + movzbl %bh,%ecx ;\ + ror $16,%ebx ;\ + xorl p1+tlen(,%rcx,4),%eax ;\ + movzbl %bl,%ecx ;\ + xorl p1+2*tlen(,%rcx,4),%eax ;\ + movzbl %bh,%ecx ;\ + xorl p1+3*tlen(,%rcx,4),%eax + +// Key Schedule Macros + +#define ksc4(p1) \ + rol $24,%ebx ;\ + mix_col(aes_fl_tab) ;\ + ror $8,%ebx ;\ + xorl 4*p1+aes_rcon_tab,%eax ;\ + xorl %eax,%esi ;\ + xorl %esi,%ebp ;\ + movl %esi,16*p1(%rdi) ;\ + movl %ebp,16*p1+4(%rdi) ;\ + xorl %ebp,%edx ;\ + xorl %edx,%ebx ;\ + movl %edx,16*p1+8(%rdi) ;\ + movl %ebx,16*p1+12(%rdi) + +#define ksc6(p1) \ + rol $24,%ebx ;\ + mix_col(aes_fl_tab) ;\ + ror $8,%ebx ;\ + xorl 4*p1+aes_rcon_tab,%eax ;\ + xorl 24*p1-24(%rdi),%eax ;\ + movl %eax,24*p1(%rdi) ;\ + xorl 24*p1-20(%rdi),%eax ;\ + movl %eax,24*p1+4(%rdi) ;\ + xorl %eax,%esi ;\ + xorl %esi,%ebp ;\ + movl %esi,24*p1+8(%rdi) ;\ + movl %ebp,24*p1+12(%rdi) ;\ + xorl %ebp,%edx ;\ + xorl %edx,%ebx ;\ + movl %edx,24*p1+16(%rdi) ;\ + movl %ebx,24*p1+20(%rdi) + +#define ksc8(p1) \ + rol $24,%ebx ;\ + mix_col(aes_fl_tab) ;\ + ror $8,%ebx ;\ + xorl 4*p1+aes_rcon_tab,%eax ;\ + xorl 32*p1-32(%rdi),%eax ;\ + movl %eax,32*p1(%rdi) ;\ + xorl 32*p1-28(%rdi),%eax ;\ + movl %eax,32*p1+4(%rdi) ;\ + xorl 32*p1-24(%rdi),%eax ;\ + movl %eax,32*p1+8(%rdi) ;\ + xorl 32*p1-20(%rdi),%eax ;\ + movl %eax,32*p1+12(%rdi) ;\ + pushq %rbx ;\ + movl %eax,%ebx ;\ + mix_col(aes_fl_tab) ;\ + popq %rbx ;\ + xorl %eax,%esi ;\ + xorl %esi,%ebp ;\ + movl %esi,32*p1+16(%rdi) ;\ + movl %ebp,32*p1+20(%rdi) ;\ + xorl %ebp,%edx ;\ + xorl %edx,%ebx ;\ + movl %edx,32*p1+24(%rdi) ;\ + movl %ebx,32*p1+28(%rdi) + +// rdi = pointer to AES context +// rsi = pointer to key bytes +// rdx = key length, bytes or bits +// rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt + + .align ALIGN64BYTES +aes_set_key: + pushfq + pushq %rbp + pushq %rbx + + movq %rcx,%r11 // ed_flg + movq %rdx,%rcx // key length + movq %rdi,%r10 // AES context + + cmpl $128,%ecx + jb aes_30 + shrl $3,%ecx +aes_30: cmpl $32,%ecx + je aes_32 + cmpl $24,%ecx + je aes_32 + movl $16,%ecx +aes_32: shrl $2,%ecx + movl %ecx,nkey(%r10) + leaq 6(%rcx),%rax // 10/12/14 for 4/6/8 32-bit key length + movl %eax,nrnd(%r10) + leaq ekey(%r10),%rdi // key position in AES context + cld + movl %ecx,%eax // save key length in eax + rep ; movsl // words in the key schedule + movl -4(%rsi),%ebx // put some values in registers + movl -8(%rsi),%edx // to allow faster code + movl -12(%rsi),%ebp + movl -16(%rsi),%esi + + cmpl $4,%eax // jump on key size + je aes_36 + cmpl $6,%eax + je aes_35 + + ksc8(0) + ksc8(1) + ksc8(2) + ksc8(3) + ksc8(4) + ksc8(5) + ksc8(6) + jmp aes_37 +aes_35: ksc6(0) + ksc6(1) + ksc6(2) + ksc6(3) + ksc6(4) + ksc6(5) + ksc6(6) + ksc6(7) + jmp aes_37 +aes_36: ksc4(0) + ksc4(1) + ksc4(2) + ksc4(3) + ksc4(4) + ksc4(5) + ksc4(6) + ksc4(7) + ksc4(8) + ksc4(9) +aes_37: cmpl $0,%r11d // ed_flg + jne aes_39 + +// compile decryption key schedule from encryption schedule - reverse +// order and do mix_column operation on round keys except first and last + + movl nrnd(%r10),%eax // kt = cx->d_key + nc * cx->Nrnd + shl $2,%rax + leaq dkey(%r10,%rax,4),%rdi + leaq ekey(%r10),%rsi // kf = cx->e_key + + movsq // copy first round key (unmodified) + movsq + subq $32,%rdi + movl $1,%r9d +aes_38: // do mix column on each column of + lodsl // each round key + movl %eax,%ebx + mix_col(aes_im_tab) + stosl + lodsl + movl %eax,%ebx + mix_col(aes_im_tab) + stosl + lodsl + movl %eax,%ebx + mix_col(aes_im_tab) + stosl + lodsl + movl %eax,%ebx + mix_col(aes_im_tab) + stosl + subq $32,%rdi + + incl %r9d + cmpl nrnd(%r10),%r9d + jb aes_38 + + movsq // copy last round key (unmodified) + movsq +aes_39: popq %rbx + popq %rbp + popfq + ret + + +// finite field multiplies by {02}, {04} and {08} + +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) + +// finite field multiplies required in table generation + +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +// These defines generate the forward table entries + +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x)) +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x)) +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x) +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x) + +// These defines generate the inverse table entries + +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x)) +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x)) +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x)) +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x)) + +// These defines generate entries for the last round tables + +#define w0(x) (x) +#define w1(x) (x << 8) +#define w2(x) (x << 16) +#define w3(x) (x << 24) + +// macro to generate inverse mix column tables (needed for the key schedule) + +#define im_data0(p1) \ + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\ + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\ + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\ + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f) +#define im_data1(p1) \ + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\ + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\ + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\ + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f) +#define im_data2(p1) \ + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\ + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\ + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\ + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f) +#define im_data3(p1) \ + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\ + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\ + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\ + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f) +#define im_data4(p1) \ + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\ + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\ + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\ + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f) +#define im_data5(p1) \ + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\ + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\ + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\ + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf) +#define im_data6(p1) \ + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\ + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\ + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\ + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf) +#define im_data7(p1) \ + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\ + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\ + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\ + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff) + +// S-box data - 256 entries + +#define sb_data0(p1) \ + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\ + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\ + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\ + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0) +#define sb_data1(p1) \ + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\ + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\ + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\ + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75) +#define sb_data2(p1) \ + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\ + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\ + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\ + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf) +#define sb_data3(p1) \ + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\ + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\ + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\ + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2) +#define sb_data4(p1) \ + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\ + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\ + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\ + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb) +#define sb_data5(p1) \ + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\ + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\ + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\ + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08) +#define sb_data6(p1) \ + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\ + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\ + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\ + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e) +#define sb_data7(p1) \ + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\ + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\ + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\ + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16) + +// Inverse S-box data - 256 entries + +#define ib_data0(p1) \ + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\ + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\ + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\ + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb) +#define ib_data1(p1) \ + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\ + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\ + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\ + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25) +#define ib_data2(p1) \ + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\ + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\ + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\ + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84) +#define ib_data3(p1) \ + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\ + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\ + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\ + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b) +#define ib_data4(p1) \ + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\ + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\ + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\ + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e) +#define ib_data5(p1) \ + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\ + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\ + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\ + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4) +#define ib_data6(p1) \ + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\ + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\ + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\ + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef) +#define ib_data7(p1) \ + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\ + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\ + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\ + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d) + +// The rcon_table (needed for the key schedule) +// +// Here is original Dr Brian Gladman's source code: +// _rcon_tab: +// %assign x 1 +// %rep 29 +// dd x +// %assign x f2(x) +// %endrep +// +// Here is precomputed output (it's more portable this way): + + .section .rodata + .align ALIGN64BYTES +aes_rcon_tab: + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4 + .long 0xb3,0x7d,0xfa,0xef,0xc5 + +// The forward xor tables + + .align ALIGN64BYTES +aes_ft_tab: + sb_data0(u0) + sb_data1(u0) + sb_data2(u0) + sb_data3(u0) + sb_data4(u0) + sb_data5(u0) + sb_data6(u0) + sb_data7(u0) + + sb_data0(u1) + sb_data1(u1) + sb_data2(u1) + sb_data3(u1) + sb_data4(u1) + sb_data5(u1) + sb_data6(u1) + sb_data7(u1) + + sb_data0(u2) + sb_data1(u2) + sb_data2(u2) + sb_data3(u2) + sb_data4(u2) + sb_data5(u2) + sb_data6(u2) + sb_data7(u2) + + sb_data0(u3) + sb_data1(u3) + sb_data2(u3) + sb_data3(u3) + sb_data4(u3) + sb_data5(u3) + sb_data6(u3) + sb_data7(u3) + + .align ALIGN64BYTES +aes_fl_tab: + sb_data0(w0) + sb_data1(w0) + sb_data2(w0) + sb_data3(w0) + sb_data4(w0) + sb_data5(w0) + sb_data6(w0) + sb_data7(w0) + + sb_data0(w1) + sb_data1(w1) + sb_data2(w1) + sb_data3(w1) + sb_data4(w1) + sb_data5(w1) + sb_data6(w1) + sb_data7(w1) + + sb_data0(w2) + sb_data1(w2) + sb_data2(w2) + sb_data3(w2) + sb_data4(w2) + sb_data5(w2) + sb_data6(w2) + sb_data7(w2) + + sb_data0(w3) + sb_data1(w3) + sb_data2(w3) + sb_data3(w3) + sb_data4(w3) + sb_data5(w3) + sb_data6(w3) + sb_data7(w3) + +// The inverse xor tables + + .align ALIGN64BYTES +aes_it_tab: + ib_data0(v0) + ib_data1(v0) + ib_data2(v0) + ib_data3(v0) + ib_data4(v0) + ib_data5(v0) + ib_data6(v0) + ib_data7(v0) + + ib_data0(v1) + ib_data1(v1) + ib_data2(v1) + ib_data3(v1) + ib_data4(v1) + ib_data5(v1) + ib_data6(v1) + ib_data7(v1) + + ib_data0(v2) + ib_data1(v2) + ib_data2(v2) + ib_data3(v2) + ib_data4(v2) + ib_data5(v2) + ib_data6(v2) + ib_data7(v2) + + ib_data0(v3) + ib_data1(v3) + ib_data2(v3) + ib_data3(v3) + ib_data4(v3) + ib_data5(v3) + ib_data6(v3) + ib_data7(v3) + + .align ALIGN64BYTES +aes_il_tab: + ib_data0(w0) + ib_data1(w0) + ib_data2(w0) + ib_data3(w0) + ib_data4(w0) + ib_data5(w0) + ib_data6(w0) + ib_data7(w0) + + ib_data0(w1) + ib_data1(w1) + ib_data2(w1) + ib_data3(w1) + ib_data4(w1) + ib_data5(w1) + ib_data6(w1) + ib_data7(w1) + + ib_data0(w2) + ib_data1(w2) + ib_data2(w2) + ib_data3(w2) + ib_data4(w2) + ib_data5(w2) + ib_data6(w2) + ib_data7(w2) + + ib_data0(w3) + ib_data1(w3) + ib_data2(w3) + ib_data3(w3) + ib_data4(w3) + ib_data5(w3) + ib_data6(w3) + ib_data7(w3) + +// The inverse mix column tables + + .align ALIGN64BYTES +aes_im_tab: + im_data0(v0) + im_data1(v0) + im_data2(v0) + im_data3(v0) + im_data4(v0) + im_data5(v0) + im_data6(v0) + im_data7(v0) + + im_data0(v1) + im_data1(v1) + im_data2(v1) + im_data3(v1) + im_data4(v1) + im_data5(v1) + im_data6(v1) + im_data7(v1) + + im_data0(v2) + im_data1(v2) + im_data2(v2) + im_data3(v2) + im_data4(v2) + im_data5(v2) + im_data6(v2) + im_data7(v2) + + im_data0(v3) + im_data1(v3) + im_data2(v3) + im_data3(v3) + im_data4(v3) + im_data5(v3) + im_data6(v3) + im_data7(v3) diff -urN linux-2.4.28/drivers/misc/aes-x86.S linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes-x86.S --- linux-2.4.28/drivers/misc/aes-x86.S Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes-x86.S Sun Feb 6 18:45:39 2005 @@ -0,0 +1,922 @@ +// +// Copyright (c) 2001, Dr Brian Gladman , Worcester, UK. +// All rights reserved. +// +// TERMS +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted subject to the following conditions: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The copyright holder's name must not be used to endorse or promote +// any products derived from this software without his specific prior +// written permission. +// +// This software is provided 'as is' with no express or implied warranties +// of correctness or fitness for purpose. + +// Modified by Jari Ruusu, December 24 2001 +// - Converted syntax to GNU CPP/assembler syntax +// - C programming interface converted back to "old" API +// - Minor portability cleanups and speed optimizations + +// Modified by Jari Ruusu, April 11 2002 +// - Added above copyright and terms to resulting object code so that +// binary distributions can avoid legal trouble + +// An AES (Rijndael) implementation for x86 compatible processors. This +// version uses i386 instruction set but instruction scheduling is optimized +// for Pentium-2. This version only implements the standard AES block length +// (128 bits, 16 bytes). This code does not preserve the eax, ecx or edx +// registers or the artihmetic status flags. However, the ebx, esi, edi, and +// ebp registers are preserved across calls. + +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f) +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) + +#if defined(USE_UNDERLINE) +# define aes_set_key _aes_set_key +# define aes_encrypt _aes_encrypt +# define aes_decrypt _aes_decrypt +#endif +#if !defined(ALIGN32BYTES) +# define ALIGN32BYTES 32 +#endif + + .file "aes-x86.S" + .globl aes_set_key + .globl aes_encrypt + .globl aes_decrypt + + .text +copyright: + .ascii " \000" + .ascii "Copyright (c) 2001, Dr Brian Gladman , Worcester, UK.\000" + .ascii "All rights reserved.\000" + .ascii " \000" + .ascii "TERMS\000" + .ascii " \000" + .ascii " Redistribution and use in source and binary forms, with or without\000" + .ascii " modification, are permitted subject to the following conditions:\000" + .ascii " \000" + .ascii " 1. Redistributions of source code must retain the above copyright\000" + .ascii " notice, this list of conditions and the following disclaimer.\000" + .ascii " \000" + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000" + .ascii " notice, this list of conditions and the following disclaimer in the\000" + .ascii " documentation and/or other materials provided with the distribution.\000" + .ascii " \000" + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000" + .ascii " any products derived from this software without his specific prior\000" + .ascii " written permission.\000" + .ascii " \000" + .ascii " This software is provided 'as is' with no express or implied warranties\000" + .ascii " of correctness or fitness for purpose.\000" + .ascii " \000" + +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) + +// offsets to parameters with one register pushed onto stack + +#define ctx 8 // AES context structure +#define in_blk 12 // input byte array address parameter +#define out_blk 16 // output byte array address parameter + +// offsets in context structure + +#define nkey 0 // key length, size 4 +#define nrnd 4 // number of rounds, size 4 +#define ekey 8 // encryption key schedule base address, size 256 +#define dkey 264 // decryption key schedule base address, size 256 + +// This macro performs a forward encryption cycle. It is entered with +// the first previous round column values in %eax, %ebx, %esi and %edi and +// exits with the final values in the same registers. + +#define fwd_rnd(p1,p2) \ + mov %ebx,(%esp) ;\ + movzbl %al,%edx ;\ + mov %eax,%ecx ;\ + mov p2(%ebp),%eax ;\ + mov %edi,4(%esp) ;\ + mov p2+12(%ebp),%edi ;\ + xor p1(,%edx,4),%eax ;\ + movzbl %ch,%edx ;\ + shr $16,%ecx ;\ + mov p2+4(%ebp),%ebx ;\ + xor p1+tlen(,%edx,4),%edi ;\ + movzbl %cl,%edx ;\ + movzbl %ch,%ecx ;\ + xor p1+3*tlen(,%ecx,4),%ebx ;\ + mov %esi,%ecx ;\ + mov p1+2*tlen(,%edx,4),%esi ;\ + movzbl %cl,%edx ;\ + xor p1(,%edx,4),%esi ;\ + movzbl %ch,%edx ;\ + shr $16,%ecx ;\ + xor p1+tlen(,%edx,4),%ebx ;\ + movzbl %cl,%edx ;\ + movzbl %ch,%ecx ;\ + xor p1+2*tlen(,%edx,4),%eax ;\ + mov (%esp),%edx ;\ + xor p1+3*tlen(,%ecx,4),%edi ;\ + movzbl %dl,%ecx ;\ + xor p2+8(%ebp),%esi ;\ + xor p1(,%ecx,4),%ebx ;\ + movzbl %dh,%ecx ;\ + shr $16,%edx ;\ + xor p1+tlen(,%ecx,4),%eax ;\ + movzbl %dl,%ecx ;\ + movzbl %dh,%edx ;\ + xor p1+2*tlen(,%ecx,4),%edi ;\ + mov 4(%esp),%ecx ;\ + xor p1+3*tlen(,%edx,4),%esi ;\ + movzbl %cl,%edx ;\ + xor p1(,%edx,4),%edi ;\ + movzbl %ch,%edx ;\ + shr $16,%ecx ;\ + xor p1+tlen(,%edx,4),%esi ;\ + movzbl %cl,%edx ;\ + movzbl %ch,%ecx ;\ + xor p1+2*tlen(,%edx,4),%ebx ;\ + xor p1+3*tlen(,%ecx,4),%eax + +// This macro performs an inverse encryption cycle. It is entered with +// the first previous round column values in %eax, %ebx, %esi and %edi and +// exits with the final values in the same registers. + +#define inv_rnd(p1,p2) \ + movzbl %al,%edx ;\ + mov %ebx,(%esp) ;\ + mov %eax,%ecx ;\ + mov p2(%ebp),%eax ;\ + mov %edi,4(%esp) ;\ + mov p2+4(%ebp),%ebx ;\ + xor p1(,%edx,4),%eax ;\ + movzbl %ch,%edx ;\ + shr $16,%ecx ;\ + mov p2+12(%ebp),%edi ;\ + xor p1+tlen(,%edx,4),%ebx ;\ + movzbl %cl,%edx ;\ + movzbl %ch,%ecx ;\ + xor p1+3*tlen(,%ecx,4),%edi ;\ + mov %esi,%ecx ;\ + mov p1+2*tlen(,%edx,4),%esi ;\ + movzbl %cl,%edx ;\ + xor p1(,%edx,4),%esi ;\ + movzbl %ch,%edx ;\ + shr $16,%ecx ;\ + xor p1+tlen(,%edx,4),%edi ;\ + movzbl %cl,%edx ;\ + movzbl %ch,%ecx ;\ + xor p1+2*tlen(,%edx,4),%eax ;\ + mov (%esp),%edx ;\ + xor p1+3*tlen(,%ecx,4),%ebx ;\ + movzbl %dl,%ecx ;\ + xor p2+8(%ebp),%esi ;\ + xor p1(,%ecx,4),%ebx ;\ + movzbl %dh,%ecx ;\ + shr $16,%edx ;\ + xor p1+tlen(,%ecx,4),%esi ;\ + movzbl %dl,%ecx ;\ + movzbl %dh,%edx ;\ + xor p1+2*tlen(,%ecx,4),%edi ;\ + mov 4(%esp),%ecx ;\ + xor p1+3*tlen(,%edx,4),%eax ;\ + movzbl %cl,%edx ;\ + xor p1(,%edx,4),%edi ;\ + movzbl %ch,%edx ;\ + shr $16,%ecx ;\ + xor p1+tlen(,%edx,4),%eax ;\ + movzbl %cl,%edx ;\ + movzbl %ch,%ecx ;\ + xor p1+2*tlen(,%edx,4),%ebx ;\ + xor p1+3*tlen(,%ecx,4),%esi + +// AES (Rijndael) Encryption Subroutine + + .text + .align ALIGN32BYTES +aes_encrypt: + push %ebp + mov ctx(%esp),%ebp // pointer to context + mov in_blk(%esp),%ecx + push %ebx + push %esi + push %edi + mov nrnd(%ebp),%edx // number of rounds + lea ekey+16(%ebp),%ebp // key pointer + +// input four columns and xor in first round key + + mov (%ecx),%eax + mov 4(%ecx),%ebx + mov 8(%ecx),%esi + mov 12(%ecx),%edi + xor -16(%ebp),%eax + xor -12(%ebp),%ebx + xor -8(%ebp),%esi + xor -4(%ebp),%edi + + sub $8,%esp // space for register saves on stack + + sub $10,%edx + je aes_15 + add $32,%ebp + sub $2,%edx + je aes_13 + add $32,%ebp + + fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key + fwd_rnd(aes_ft_tab,-48) +aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key + fwd_rnd(aes_ft_tab,-16) +aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key + fwd_rnd(aes_ft_tab,16) + fwd_rnd(aes_ft_tab,32) + fwd_rnd(aes_ft_tab,48) + fwd_rnd(aes_ft_tab,64) + fwd_rnd(aes_ft_tab,80) + fwd_rnd(aes_ft_tab,96) + fwd_rnd(aes_ft_tab,112) + fwd_rnd(aes_ft_tab,128) + fwd_rnd(aes_fl_tab,144) // last round uses a different table + +// move final values to the output array. + + mov out_blk+20(%esp),%ebp + add $8,%esp + mov %eax,(%ebp) + mov %ebx,4(%ebp) + mov %esi,8(%ebp) + mov %edi,12(%ebp) + pop %edi + pop %esi + pop %ebx + pop %ebp + ret + + +// AES (Rijndael) Decryption Subroutine + + .align ALIGN32BYTES +aes_decrypt: + push %ebp + mov ctx(%esp),%ebp // pointer to context + mov in_blk(%esp),%ecx + push %ebx + push %esi + push %edi + mov nrnd(%ebp),%edx // number of rounds + lea dkey+16(%ebp),%ebp // key pointer + +// input four columns and xor in first round key + + mov (%ecx),%eax + mov 4(%ecx),%ebx + mov 8(%ecx),%esi + mov 12(%ecx),%edi + xor -16(%ebp),%eax + xor -12(%ebp),%ebx + xor -8(%ebp),%esi + xor -4(%ebp),%edi + + sub $8,%esp // space for register saves on stack + + sub $10,%edx + je aes_25 + add $32,%ebp + sub $2,%edx + je aes_23 + add $32,%ebp + + inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key + inv_rnd(aes_it_tab,-48) +aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key + inv_rnd(aes_it_tab,-16) +aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key + inv_rnd(aes_it_tab,16) + inv_rnd(aes_it_tab,32) + inv_rnd(aes_it_tab,48) + inv_rnd(aes_it_tab,64) + inv_rnd(aes_it_tab,80) + inv_rnd(aes_it_tab,96) + inv_rnd(aes_it_tab,112) + inv_rnd(aes_it_tab,128) + inv_rnd(aes_il_tab,144) // last round uses a different table + +// move final values to the output array. + + mov out_blk+20(%esp),%ebp + add $8,%esp + mov %eax,(%ebp) + mov %ebx,4(%ebp) + mov %esi,8(%ebp) + mov %edi,12(%ebp) + pop %edi + pop %esi + pop %ebx + pop %ebp + ret + +// AES (Rijndael) Key Schedule Subroutine + +// input/output parameters + +#define aes_cx 12 // AES context +#define in_key 16 // key input array address +#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256) +#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only + +// offsets for locals + +#define cnt -4 +#define slen 8 + +// This macro performs a column mixing operation on an input 32-bit +// word to give a 32-bit result. It uses each of the 4 bytes in the +// the input column to index 4 different tables of 256 32-bit words +// that are xored together to form the output value. + +#define mix_col(p1) \ + movzbl %bl,%ecx ;\ + mov p1(,%ecx,4),%eax ;\ + movzbl %bh,%ecx ;\ + ror $16,%ebx ;\ + xor p1+tlen(,%ecx,4),%eax ;\ + movzbl %bl,%ecx ;\ + xor p1+2*tlen(,%ecx,4),%eax ;\ + movzbl %bh,%ecx ;\ + xor p1+3*tlen(,%ecx,4),%eax + +// Key Schedule Macros + +#define ksc4(p1) \ + rol $24,%ebx ;\ + mix_col(aes_fl_tab) ;\ + ror $8,%ebx ;\ + xor 4*p1+aes_rcon_tab,%eax ;\ + xor %eax,%esi ;\ + xor %esi,%ebp ;\ + mov %esi,16*p1(%edi) ;\ + mov %ebp,16*p1+4(%edi) ;\ + xor %ebp,%edx ;\ + xor %edx,%ebx ;\ + mov %edx,16*p1+8(%edi) ;\ + mov %ebx,16*p1+12(%edi) + +#define ksc6(p1) \ + rol $24,%ebx ;\ + mix_col(aes_fl_tab) ;\ + ror $8,%ebx ;\ + xor 4*p1+aes_rcon_tab,%eax ;\ + xor 24*p1-24(%edi),%eax ;\ + mov %eax,24*p1(%edi) ;\ + xor 24*p1-20(%edi),%eax ;\ + mov %eax,24*p1+4(%edi) ;\ + xor %eax,%esi ;\ + xor %esi,%ebp ;\ + mov %esi,24*p1+8(%edi) ;\ + mov %ebp,24*p1+12(%edi) ;\ + xor %ebp,%edx ;\ + xor %edx,%ebx ;\ + mov %edx,24*p1+16(%edi) ;\ + mov %ebx,24*p1+20(%edi) + +#define ksc8(p1) \ + rol $24,%ebx ;\ + mix_col(aes_fl_tab) ;\ + ror $8,%ebx ;\ + xor 4*p1+aes_rcon_tab,%eax ;\ + xor 32*p1-32(%edi),%eax ;\ + mov %eax,32*p1(%edi) ;\ + xor 32*p1-28(%edi),%eax ;\ + mov %eax,32*p1+4(%edi) ;\ + xor 32*p1-24(%edi),%eax ;\ + mov %eax,32*p1+8(%edi) ;\ + xor 32*p1-20(%edi),%eax ;\ + mov %eax,32*p1+12(%edi) ;\ + push %ebx ;\ + mov %eax,%ebx ;\ + mix_col(aes_fl_tab) ;\ + pop %ebx ;\ + xor %eax,%esi ;\ + xor %esi,%ebp ;\ + mov %esi,32*p1+16(%edi) ;\ + mov %ebp,32*p1+20(%edi) ;\ + xor %ebp,%edx ;\ + xor %edx,%ebx ;\ + mov %edx,32*p1+24(%edi) ;\ + mov %ebx,32*p1+28(%edi) + + .align ALIGN32BYTES +aes_set_key: + pushfl + push %ebp + mov %esp,%ebp + sub $slen,%esp + push %ebx + push %esi + push %edi + + mov aes_cx(%ebp),%edx // edx -> AES context + + mov key_ln(%ebp),%ecx // key length + cmpl $128,%ecx + jb aes_30 + shr $3,%ecx +aes_30: cmpl $32,%ecx + je aes_32 + cmpl $24,%ecx + je aes_32 + mov $16,%ecx +aes_32: shr $2,%ecx + mov %ecx,nkey(%edx) + + lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length + mov %eax,nrnd(%edx) + + mov in_key(%ebp),%esi // key input array + lea ekey(%edx),%edi // key position in AES context + cld + push %ebp + mov %ecx,%eax // save key length in eax + rep ; movsl // words in the key schedule + mov -4(%esi),%ebx // put some values in registers + mov -8(%esi),%edx // to allow faster code + mov -12(%esi),%ebp + mov -16(%esi),%esi + + cmpl $4,%eax // jump on key size + je aes_36 + cmpl $6,%eax + je aes_35 + + ksc8(0) + ksc8(1) + ksc8(2) + ksc8(3) + ksc8(4) + ksc8(5) + ksc8(6) + jmp aes_37 +aes_35: ksc6(0) + ksc6(1) + ksc6(2) + ksc6(3) + ksc6(4) + ksc6(5) + ksc6(6) + ksc6(7) + jmp aes_37 +aes_36: ksc4(0) + ksc4(1) + ksc4(2) + ksc4(3) + ksc4(4) + ksc4(5) + ksc4(6) + ksc4(7) + ksc4(8) + ksc4(9) +aes_37: pop %ebp + mov aes_cx(%ebp),%edx // edx -> AES context + cmpl $0,ed_flg(%ebp) + jne aes_39 + +// compile decryption key schedule from encryption schedule - reverse +// order and do mix_column operation on round keys except first and last + + mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd + shl $2,%eax + lea dkey(%edx,%eax,4),%edi + lea ekey(%edx),%esi // kf = cx->e_key + + movsl // copy first round key (unmodified) + movsl + movsl + movsl + sub $32,%edi + movl $1,cnt(%ebp) +aes_38: // do mix column on each column of + lodsl // each round key + mov %eax,%ebx + mix_col(aes_im_tab) + stosl + lodsl + mov %eax,%ebx + mix_col(aes_im_tab) + stosl + lodsl + mov %eax,%ebx + mix_col(aes_im_tab) + stosl + lodsl + mov %eax,%ebx + mix_col(aes_im_tab) + stosl + sub $32,%edi + + incl cnt(%ebp) + mov cnt(%ebp),%eax + cmp nrnd(%edx),%eax + jb aes_38 + + movsl // copy last round key (unmodified) + movsl + movsl + movsl +aes_39: pop %edi + pop %esi + pop %ebx + mov %ebp,%esp + pop %ebp + popfl + ret + + +// finite field multiplies by {02}, {04} and {08} + +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) + +// finite field multiplies required in table generation + +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +// These defines generate the forward table entries + +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x)) +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x)) +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x) +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x) + +// These defines generate the inverse table entries + +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x)) +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x)) +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x)) +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x)) + +// These defines generate entries for the last round tables + +#define w0(x) (x) +#define w1(x) (x << 8) +#define w2(x) (x << 16) +#define w3(x) (x << 24) + +// macro to generate inverse mix column tables (needed for the key schedule) + +#define im_data0(p1) \ + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\ + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\ + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\ + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f) +#define im_data1(p1) \ + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\ + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\ + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\ + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f) +#define im_data2(p1) \ + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\ + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\ + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\ + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f) +#define im_data3(p1) \ + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\ + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\ + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\ + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f) +#define im_data4(p1) \ + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\ + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\ + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\ + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f) +#define im_data5(p1) \ + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\ + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\ + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\ + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf) +#define im_data6(p1) \ + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\ + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\ + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\ + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf) +#define im_data7(p1) \ + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\ + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\ + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\ + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff) + +// S-box data - 256 entries + +#define sb_data0(p1) \ + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\ + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\ + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\ + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0) +#define sb_data1(p1) \ + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\ + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\ + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\ + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75) +#define sb_data2(p1) \ + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\ + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\ + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\ + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf) +#define sb_data3(p1) \ + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\ + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\ + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\ + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2) +#define sb_data4(p1) \ + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\ + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\ + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\ + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb) +#define sb_data5(p1) \ + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\ + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\ + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\ + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08) +#define sb_data6(p1) \ + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\ + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\ + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\ + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e) +#define sb_data7(p1) \ + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\ + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\ + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\ + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16) + +// Inverse S-box data - 256 entries + +#define ib_data0(p1) \ + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\ + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\ + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\ + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb) +#define ib_data1(p1) \ + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\ + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\ + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\ + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25) +#define ib_data2(p1) \ + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\ + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\ + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\ + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84) +#define ib_data3(p1) \ + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\ + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\ + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\ + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b) +#define ib_data4(p1) \ + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\ + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\ + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\ + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e) +#define ib_data5(p1) \ + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\ + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\ + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\ + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4) +#define ib_data6(p1) \ + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\ + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\ + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\ + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef) +#define ib_data7(p1) \ + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\ + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\ + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\ + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d) + +// The rcon_table (needed for the key schedule) +// +// Here is original Dr Brian Gladman's source code: +// _rcon_tab: +// %assign x 1 +// %rep 29 +// dd x +// %assign x f2(x) +// %endrep +// +// Here is precomputed output (it's more portable this way): + + .align ALIGN32BYTES +aes_rcon_tab: + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4 + .long 0xb3,0x7d,0xfa,0xef,0xc5 + +// The forward xor tables + + .align ALIGN32BYTES +aes_ft_tab: + sb_data0(u0) + sb_data1(u0) + sb_data2(u0) + sb_data3(u0) + sb_data4(u0) + sb_data5(u0) + sb_data6(u0) + sb_data7(u0) + + sb_data0(u1) + sb_data1(u1) + sb_data2(u1) + sb_data3(u1) + sb_data4(u1) + sb_data5(u1) + sb_data6(u1) + sb_data7(u1) + + sb_data0(u2) + sb_data1(u2) + sb_data2(u2) + sb_data3(u2) + sb_data4(u2) + sb_data5(u2) + sb_data6(u2) + sb_data7(u2) + + sb_data0(u3) + sb_data1(u3) + sb_data2(u3) + sb_data3(u3) + sb_data4(u3) + sb_data5(u3) + sb_data6(u3) + sb_data7(u3) + + .align ALIGN32BYTES +aes_fl_tab: + sb_data0(w0) + sb_data1(w0) + sb_data2(w0) + sb_data3(w0) + sb_data4(w0) + sb_data5(w0) + sb_data6(w0) + sb_data7(w0) + + sb_data0(w1) + sb_data1(w1) + sb_data2(w1) + sb_data3(w1) + sb_data4(w1) + sb_data5(w1) + sb_data6(w1) + sb_data7(w1) + + sb_data0(w2) + sb_data1(w2) + sb_data2(w2) + sb_data3(w2) + sb_data4(w2) + sb_data5(w2) + sb_data6(w2) + sb_data7(w2) + + sb_data0(w3) + sb_data1(w3) + sb_data2(w3) + sb_data3(w3) + sb_data4(w3) + sb_data5(w3) + sb_data6(w3) + sb_data7(w3) + +// The inverse xor tables + + .align ALIGN32BYTES +aes_it_tab: + ib_data0(v0) + ib_data1(v0) + ib_data2(v0) + ib_data3(v0) + ib_data4(v0) + ib_data5(v0) + ib_data6(v0) + ib_data7(v0) + + ib_data0(v1) + ib_data1(v1) + ib_data2(v1) + ib_data3(v1) + ib_data4(v1) + ib_data5(v1) + ib_data6(v1) + ib_data7(v1) + + ib_data0(v2) + ib_data1(v2) + ib_data2(v2) + ib_data3(v2) + ib_data4(v2) + ib_data5(v2) + ib_data6(v2) + ib_data7(v2) + + ib_data0(v3) + ib_data1(v3) + ib_data2(v3) + ib_data3(v3) + ib_data4(v3) + ib_data5(v3) + ib_data6(v3) + ib_data7(v3) + + .align ALIGN32BYTES +aes_il_tab: + ib_data0(w0) + ib_data1(w0) + ib_data2(w0) + ib_data3(w0) + ib_data4(w0) + ib_data5(w0) + ib_data6(w0) + ib_data7(w0) + + ib_data0(w1) + ib_data1(w1) + ib_data2(w1) + ib_data3(w1) + ib_data4(w1) + ib_data5(w1) + ib_data6(w1) + ib_data7(w1) + + ib_data0(w2) + ib_data1(w2) + ib_data2(w2) + ib_data3(w2) + ib_data4(w2) + ib_data5(w2) + ib_data6(w2) + ib_data7(w2) + + ib_data0(w3) + ib_data1(w3) + ib_data2(w3) + ib_data3(w3) + ib_data4(w3) + ib_data5(w3) + ib_data6(w3) + ib_data7(w3) + +// The inverse mix column tables + + .align ALIGN32BYTES +aes_im_tab: + im_data0(v0) + im_data1(v0) + im_data2(v0) + im_data3(v0) + im_data4(v0) + im_data5(v0) + im_data6(v0) + im_data7(v0) + + im_data0(v1) + im_data1(v1) + im_data2(v1) + im_data3(v1) + im_data4(v1) + im_data5(v1) + im_data6(v1) + im_data7(v1) + + im_data0(v2) + im_data1(v2) + im_data2(v2) + im_data3(v2) + im_data4(v2) + im_data5(v2) + im_data6(v2) + im_data7(v2) + + im_data0(v3) + im_data1(v3) + im_data2(v3) + im_data3(v3) + im_data4(v3) + im_data5(v3) + im_data6(v3) + im_data7(v3) diff -urN linux-2.4.28/drivers/misc/aes.c linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes.c --- linux-2.4.28/drivers/misc/aes.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes.c Sun Feb 6 18:45:39 2005 @@ -0,0 +1,1479 @@ +// I retain copyright in this code but I encourage its free use provided +// that I don't carry any responsibility for the results. I am especially +// happy to see it used in free and open source software. If you do use +// it I would appreciate an acknowledgement of its origin in the code or +// the product that results and I would also appreciate knowing a little +// about the use to which it is being put. I am grateful to Frank Yellin +// for some ideas that are used in this implementation. +// +// Dr B. R. Gladman 6th April 2001. +// +// This is an implementation of the AES encryption algorithm (Rijndael) +// designed by Joan Daemen and Vincent Rijmen. This version is designed +// to provide both fixed and dynamic block and key lengths and can also +// run with either big or little endian internal byte order (see aes.h). +// It inputs block and key lengths in bytes with the legal values being +// 16, 24 and 32. + +/* + * Modified by Jari Ruusu, May 1 2001 + * - Fixed some compile warnings, code was ok but gcc warned anyway. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t + * - Major name space cleanup: Names visible to outside now begin + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c + * - Removed C++ and DLL support as part of name space cleanup. + * - Eliminated unnecessary recomputation of tables. (actual bug fix) + * - Merged precomputed constant tables to aes.c file. + * - Removed data alignment restrictions for portability reasons. + * - Made block and key lengths accept bit count (128/192/256) + * as well byte count (16/24/32). + * - Removed all error checks. This change also eliminated the need + * to preinitialize the context struct to zero. + * - Removed some totally unused constants. + */ +/* + * Modified by Jari Ruusu, April 21 2004 + * - Added back code that avoids byte swaps on big endian boxes. + */ + +#include "aes.h" + +// CONFIGURATION OPTIONS (see also aes.h) +// +// 1. Define UNROLL for full loop unrolling in encryption and decryption. +// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption. +// 3. Define FIXED_TABLES for compiled rather than dynamic tables. +// 4. Define FF_TABLES to use tables for field multiplies and inverses. +// Do not enable this without understanding stack space requirements. +// 5. Define ARRAYS to use arrays to hold the local state block. If this +// is not defined, individually declared 32-bit words are used. +// 6. Define FAST_VARIABLE if a high speed variable block implementation +// is needed (essentially three separate fixed block size code sequences) +// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven +// version using 1 table (2 kbytes of table space) or 4 tables (8 +// kbytes of table space) for higher speed. +// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed +// increase by using tables for the last rounds but with more table +// space (2 or 8 kbytes extra). +// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but +// slower version is provided. +// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE +// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra). + +#define UNROLL +//#define PARTIAL_UNROLL + +#define FIXED_TABLES +//#define FF_TABLES +//#define ARRAYS +#define FAST_VARIABLE + +//#define ONE_TABLE +#define FOUR_TABLES + +//#define ONE_LR_TABLE +#define FOUR_LR_TABLES + +//#define ONE_IM_TABLE +#define FOUR_IM_TABLES + +#if defined(UNROLL) && defined (PARTIAL_UNROLL) +#error both UNROLL and PARTIAL_UNROLL are defined +#endif + +#if defined(ONE_TABLE) && defined (FOUR_TABLES) +#error both ONE_TABLE and FOUR_TABLES are defined +#endif + +#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES) +#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined +#endif + +#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES) +#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined +#endif + +#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32 +#error an illegal block size has been specified +#endif + +/* INTERNAL_BYTE_ORDER: 0=unknown, 1=little endian, 2=big endian */ +#if defined(INTERNAL_BYTE_ORDER) +#elif defined(__i386__)||defined(__i386)||defined(__x86_64__)||defined(__x86_64)||defined(__amd64__)||defined(__amd64)||defined(__AMD64__)||defined(__AMD64) +# define INTERNAL_BYTE_ORDER 1 +# undef DATA_ALWAYS_ALIGNED +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */ +#elif defined(__ppc__)||defined(__ppc)||defined(__PPC__)||defined(__PPC)||defined(__powerpc__)||defined(__powerpc)||defined(__POWERPC__)||defined(__POWERPC)||defined(__PowerPC__)||defined(__PowerPC)||defined(__ppc64__)||defined(__ppc64)||defined(__PPC64__)||defined(__PPC64)||defined(__powerpc64__)||defined(__powerpc64)||defined(__s390__)||defined(__s390) +# define INTERNAL_BYTE_ORDER 2 +# undef DATA_ALWAYS_ALIGNED +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */ +#elif defined(__alpha__)||defined(__alpha)||defined(__ia64__)||defined(__ia64) +# define INTERNAL_BYTE_ORDER 1 +#elif defined(__hppa__)||defined(__hppa)||defined(__HPPA__)||defined(__HPPA)||defined(__parisc__)||defined(__parisc)||defined(__sparc__)||defined(__sparc)||defined(__sparc_v9__)||defined(__sparc_v9)||defined(__sparc64__)||defined(__sparc64)||defined(__mc68000__)||defined(__mc68000) +# define INTERNAL_BYTE_ORDER 2 +#elif defined(CONFIGURE_DETECTS_BYTE_ORDER) +# if WORDS_BIGENDIAN +# define INTERNAL_BYTE_ORDER 2 +# else +# define INTERNAL_BYTE_ORDER 1 +# endif +#elif defined(__linux__) && defined(__KERNEL__) +# include +# if defined(__BIG_ENDIAN) +# define INTERNAL_BYTE_ORDER 2 +# else +# define INTERNAL_BYTE_ORDER 1 +# endif +#else +# include +# if (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) +# define INTERNAL_BYTE_ORDER 1 +# elif WORDS_BIGENDIAN || defined(__BIG_ENDIAN__) || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) +# define INTERNAL_BYTE_ORDER 2 +# else +# define INTERNAL_BYTE_ORDER 0 +# endif +#endif + +#if defined(DATA_ALWAYS_ALIGNED) && (INTERNAL_BYTE_ORDER > 0) +# define word_in(x) *(u_int32_t*)(x) +# define word_out(x,v) *(u_int32_t*)(x) = (v) +#elif defined(__linux__) && defined(__KERNEL__) +# include +# define word_in(x) get_unaligned((u_int32_t*)(x)) +# define word_out(x,v) put_unaligned((v),(u_int32_t*)(x)) +#else +/* unknown endianness and/or unable to handle unaligned data */ +# undef INTERNAL_BYTE_ORDER +# define INTERNAL_BYTE_ORDER 1 +# define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24)) +# define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24) +#endif + +// upr(x,n): rotates bytes within words by n positions, moving bytes +// to higher index positions with wrap around into low positions +// ups(x,n): moves bytes by n positions to higher index positions in +// words but without wrap around +// bval(x,n): extracts a byte from a word + +#if (INTERNAL_BYTE_ORDER < 2) +/* little endian */ +#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n)))) +#define ups(x,n) ((x) << 8 * (n)) +#define bval(x,n) ((unsigned char)((x) >> 8 * (n))) +#define bytes2word(b0, b1, b2, b3) \ + ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0)) +#else +/* big endian */ +#define upr(x,n) (((x) >> 8 * (n)) | ((x) << (32 - 8 * (n)))) +#define ups(x,n) ((x) >> 8 * (n))) +#define bval(x,n) ((unsigned char)((x) >> (24 - 8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + ((u_int32_t)(b0) << 24 | (u_int32_t)(b1) << 16 | (u_int32_t)(b2) << 8 | (b3)) +#endif + +// Disable at least some poor combinations of options + +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES) +#define FIXED_TABLES +#undef UNROLL +#undef ONE_LR_TABLE +#undef FOUR_LR_TABLES +#undef ONE_IM_TABLE +#undef FOUR_IM_TABLES +#elif !defined(FOUR_TABLES) +#ifdef FOUR_LR_TABLES +#undef FOUR_LR_TABLES +#define ONE_LR_TABLE +#endif +#ifdef FOUR_IM_TABLES +#undef FOUR_IM_TABLES +#define ONE_IM_TABLE +#endif +#elif !defined(AES_BLOCK_SIZE) +#if defined(UNROLL) +#define PARTIAL_UNROLL +#undef UNROLL +#endif +#endif + +// the finite field modular polynomial and elements + +#define ff_poly 0x011b +#define ff_hi 0x80 + +// multiply four bytes in GF(2^8) by 'x' {02} in parallel + +#define m1 0x80808080 +#define m2 0x7f7f7f7f +#define m3 0x0000001b +#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3)) + +// The following defines provide alternative definitions of FFmulX that might +// give improved performance if a fast 32-bit multiply is not available. Note +// that a temporary variable u needs to be defined where FFmulX is used. + +// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) +// #define m4 0x1b1b1b1b +// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) + +// perform column mix operation on four bytes in parallel + +#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1)) + +#if defined(FIXED_TABLES) + +// the S-Box table + +static const unsigned char s_box[256] = +{ + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +}; + +// the inverse S-Box table + +static const unsigned char inv_s_box[256] = +{ + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +}; + +// used to ensure table is generated in the right format +// depending on the internal byte order required + +#if (INTERNAL_BYTE_ORDER < 2) +/* little endian */ +#define w0(p) 0x000000##p +#else +/* big endian */ +#define w0(p) 0x##p##000000 +#endif + +// Number of elements required in this table for different +// block and key lengths is: +// +// Nk = 4 6 8 +// ---------- +// Nb = 4 | 10 8 7 +// 6 | 19 12 11 +// 8 | 29 19 14 +// +// this table can be a table of bytes if the key schedule +// code is adjusted accordingly + +static const u_int32_t rcon_tab[29] = +{ + w0(01), w0(02), w0(04), w0(08), + w0(10), w0(20), w0(40), w0(80), + w0(1b), w0(36), w0(6c), w0(d8), + w0(ab), w0(4d), w0(9a), w0(2f), + w0(5e), w0(bc), w0(63), w0(c6), + w0(97), w0(35), w0(6a), w0(d4), + w0(b3), w0(7d), w0(fa), w0(ef), + w0(c5) +}; + +#undef w0 + +// used to ensure table is generated in the right format +// depending on the internal byte order required + +#if (INTERNAL_BYTE_ORDER < 2) +/* little endian */ +#define r0(p,q,r,s) 0x##p##q##r##s +#define r1(p,q,r,s) 0x##q##r##s##p +#define r2(p,q,r,s) 0x##r##s##p##q +#define r3(p,q,r,s) 0x##s##p##q##r +#define w0(p) 0x000000##p +#define w1(p) 0x0000##p##00 +#define w2(p) 0x00##p##0000 +#define w3(p) 0x##p##000000 +#else +/* big endian */ +#define r0(p,q,r,s) 0x##s##r##q##p +#define r1(p,q,r,s) 0x##p##s##r##q +#define r2(p,q,r,s) 0x##q##p##s##r +#define r3(p,q,r,s) 0x##r##q##p##s +#define w0(p) 0x##p##000000 +#define w1(p) 0x00##p##0000 +#define w2(p) 0x0000##p##00 +#define w3(p) 0x000000##p +#endif + +#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES)) + +// data for forward tables (other than last round) + +#define f_table \ + r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\ + r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\ + r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\ + r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\ + r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\ + r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\ + r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\ + r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\ + r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\ + r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\ + r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\ + r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\ + r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\ + r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\ + r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\ + r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\ + r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\ + r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\ + r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\ + r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\ + r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\ + r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\ + r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\ + r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\ + r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\ + r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\ + r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\ + r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\ + r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\ + r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\ + r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\ + r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\ + r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\ + r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\ + r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\ + r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\ + r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\ + r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\ + r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\ + r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\ + r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\ + r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\ + r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\ + r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\ + r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\ + r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\ + r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\ + r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\ + r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\ + r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\ + r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\ + r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\ + r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\ + r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\ + r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\ + r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\ + r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\ + r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\ + r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\ + r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\ + r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\ + r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\ + r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\ + r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c) + +// data for inverse tables (other than last round) + +#define i_table \ + r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\ + r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\ + r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\ + r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\ + r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\ + r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\ + r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\ + r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\ + r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\ + r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\ + r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\ + r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\ + r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\ + r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\ + r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\ + r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\ + r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\ + r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\ + r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\ + r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\ + r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\ + r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\ + r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\ + r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\ + r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\ + r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\ + r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\ + r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\ + r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\ + r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\ + r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\ + r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\ + r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\ + r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\ + r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\ + r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\ + r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\ + r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\ + r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\ + r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\ + r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\ + r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\ + r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\ + r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\ + r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\ + r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\ + r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\ + r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\ + r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\ + r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\ + r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\ + r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\ + r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\ + r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\ + r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\ + r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\ + r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\ + r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\ + r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\ + r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\ + r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\ + r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\ + r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\ + r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0) + +// generate the required tables in the desired endian format + +#undef r +#define r r0 + +#if defined(ONE_TABLE) +static const u_int32_t ft_tab[256] = + { f_table }; +#elif defined(FOUR_TABLES) +static const u_int32_t ft_tab[4][256] = +{ { f_table }, +#undef r +#define r r1 + { f_table }, +#undef r +#define r r2 + { f_table }, +#undef r +#define r r3 + { f_table } +}; +#endif + +#undef r +#define r r0 +#if defined(ONE_TABLE) +static const u_int32_t it_tab[256] = + { i_table }; +#elif defined(FOUR_TABLES) +static const u_int32_t it_tab[4][256] = +{ { i_table }, +#undef r +#define r r1 + { i_table }, +#undef r +#define r r2 + { i_table }, +#undef r +#define r r3 + { i_table } +}; +#endif + +#endif + +#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES)) + +// data for inverse tables (last round) + +#define li_table \ + w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\ + w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\ + w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\ + w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\ + w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\ + w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\ + w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\ + w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\ + w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\ + w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\ + w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\ + w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\ + w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\ + w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\ + w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\ + w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\ + w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\ + w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\ + w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\ + w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\ + w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\ + w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\ + w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\ + w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\ + w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\ + w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\ + w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\ + w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\ + w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\ + w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\ + w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\ + w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d), + +// generate the required tables in the desired endian format + +#undef r +#define r(p,q,r,s) w0(q) +#if defined(ONE_LR_TABLE) +static const u_int32_t fl_tab[256] = + { f_table }; +#elif defined(FOUR_LR_TABLES) +static const u_int32_t fl_tab[4][256] = +{ { f_table }, +#undef r +#define r(p,q,r,s) w1(q) + { f_table }, +#undef r +#define r(p,q,r,s) w2(q) + { f_table }, +#undef r +#define r(p,q,r,s) w3(q) + { f_table } +}; +#endif + +#undef w +#define w w0 +#if defined(ONE_LR_TABLE) +static const u_int32_t il_tab[256] = + { li_table }; +#elif defined(FOUR_LR_TABLES) +static const u_int32_t il_tab[4][256] = +{ { li_table }, +#undef w +#define w w1 + { li_table }, +#undef w +#define w w2 + { li_table }, +#undef w +#define w w3 + { li_table } +}; +#endif + +#endif + +#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES)) + +#define m_table \ + r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\ + r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\ + r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\ + r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\ + r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\ + r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\ + r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\ + r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\ + r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\ + r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\ + r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\ + r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\ + r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\ + r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\ + r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\ + r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\ + r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\ + r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\ + r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\ + r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\ + r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\ + r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\ + r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\ + r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\ + r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\ + r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\ + r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\ + r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\ + r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\ + r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\ + r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\ + r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\ + r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\ + r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\ + r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\ + r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\ + r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\ + r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\ + r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\ + r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\ + r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\ + r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\ + r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\ + r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\ + r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\ + r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\ + r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\ + r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\ + r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\ + r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\ + r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\ + r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\ + r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\ + r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\ + r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\ + r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\ + r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\ + r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\ + r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\ + r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\ + r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\ + r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\ + r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\ + r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d) + +#undef r +#define r r0 + +#if defined(ONE_IM_TABLE) +static const u_int32_t im_tab[256] = + { m_table }; +#elif defined(FOUR_IM_TABLES) +static const u_int32_t im_tab[4][256] = +{ { m_table }, +#undef r +#define r r1 + { m_table }, +#undef r +#define r r2 + { m_table }, +#undef r +#define r r3 + { m_table } +}; +#endif + +#endif + +#else + +static int tab_gen = 0; + +static unsigned char s_box[256]; // the S box +static unsigned char inv_s_box[256]; // the inverse S box +static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants + +#if defined(ONE_TABLE) +static u_int32_t ft_tab[256]; +static u_int32_t it_tab[256]; +#elif defined(FOUR_TABLES) +static u_int32_t ft_tab[4][256]; +static u_int32_t it_tab[4][256]; +#endif + +#if defined(ONE_LR_TABLE) +static u_int32_t fl_tab[256]; +static u_int32_t il_tab[256]; +#elif defined(FOUR_LR_TABLES) +static u_int32_t fl_tab[4][256]; +static u_int32_t il_tab[4][256]; +#endif + +#if defined(ONE_IM_TABLE) +static u_int32_t im_tab[256]; +#elif defined(FOUR_IM_TABLES) +static u_int32_t im_tab[4][256]; +#endif + +// Generate the tables for the dynamic table option + +#if !defined(FF_TABLES) + +// It will generally be sensible to use tables to compute finite +// field multiplies and inverses but where memory is scarse this +// code might sometimes be better. + +// return 2 ^ (n - 1) where n is the bit number of the highest bit +// set in x with x in the range 1 < x < 0x00000200. This form is +// used so that locals within FFinv can be bytes rather than words + +static unsigned char hibit(const u_int32_t x) +{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2)); + + r |= (r >> 2); + r |= (r >> 4); + return (r + 1) >> 1; +} + +// return the inverse of the finite field element x + +static unsigned char FFinv(const unsigned char x) +{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0; + + if(x < 2) return x; + + for(;;) + { + if(!n1) return v1; + + while(n2 >= n1) + { + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2); + } + + if(!n2) return v2; + + while(n1 >= n2) + { + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1); + } + } +} + +// define the finite field multiplies required for Rijndael + +#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0)) +#define FFmul03(x) ((x) ^ FFmul02(x)) +#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x)))) +#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x)))) +#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x)))) +#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x))) + +#else + +#define FFinv(x) ((x) ? pow[255 - log[x]]: 0) + +#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0) +#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0) +#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0) +#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0) +#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0) +#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0) + +#endif + +// The forward and inverse affine transformations used in the S-box + +#define fwd_affine(x) \ + (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8))) + +static void gen_tabs(void) +{ u_int32_t i, w; + +#if defined(FF_TABLES) + + unsigned char pow[512], log[256]; + + // log and power tables for GF(2^8) finite field with + // 0x011b as modular polynomial - the simplest primitive + // root is 0x03, used here to generate the tables + + i = 0; w = 1; + do + { + pow[i] = (unsigned char)w; + pow[i + 255] = (unsigned char)w; + log[w] = (unsigned char)i++; + w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0); + } + while (w != 1); + +#endif + + for(i = 0, w = 1; i < AES_RC_LENGTH; ++i) + { + rcon_tab[i] = bytes2word(w, 0, 0, 0); + w = (w << 1) ^ (w & ff_hi ? ff_poly : 0); + } + + for(i = 0; i < 256; ++i) + { unsigned char b; + + s_box[i] = b = fwd_affine(FFinv((unsigned char)i)); + + w = bytes2word(b, 0, 0, 0); +#if defined(ONE_LR_TABLE) + fl_tab[i] = w; +#elif defined(FOUR_LR_TABLES) + fl_tab[0][i] = w; + fl_tab[1][i] = upr(w,1); + fl_tab[2][i] = upr(w,2); + fl_tab[3][i] = upr(w,3); +#endif + w = bytes2word(FFmul02(b), b, b, FFmul03(b)); +#if defined(ONE_TABLE) + ft_tab[i] = w; +#elif defined(FOUR_TABLES) + ft_tab[0][i] = w; + ft_tab[1][i] = upr(w,1); + ft_tab[2][i] = upr(w,2); + ft_tab[3][i] = upr(w,3); +#endif + inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i)); + + w = bytes2word(b, 0, 0, 0); +#if defined(ONE_LR_TABLE) + il_tab[i] = w; +#elif defined(FOUR_LR_TABLES) + il_tab[0][i] = w; + il_tab[1][i] = upr(w,1); + il_tab[2][i] = upr(w,2); + il_tab[3][i] = upr(w,3); +#endif + w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b)); +#if defined(ONE_TABLE) + it_tab[i] = w; +#elif defined(FOUR_TABLES) + it_tab[0][i] = w; + it_tab[1][i] = upr(w,1); + it_tab[2][i] = upr(w,2); + it_tab[3][i] = upr(w,3); +#endif +#if defined(ONE_IM_TABLE) + im_tab[b] = w; +#elif defined(FOUR_IM_TABLES) + im_tab[0][b] = w; + im_tab[1][b] = upr(w,1); + im_tab[2][b] = upr(w,2); + im_tab[3][b] = upr(w,3); +#endif + + } +} + +#endif + +#define no_table(x,box,vf,rf,c) bytes2word( \ + box[bval(vf(x,0,c),rf(0,c))], \ + box[bval(vf(x,1,c),rf(1,c))], \ + box[bval(vf(x,2,c),rf(2,c))], \ + box[bval(vf(x,3,c),rf(3,c))]) + +#define one_table(x,op,tab,vf,rf,c) \ + ( tab[bval(vf(x,0,c),rf(0,c))] \ + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) + +#define four_tables(x,tab,vf,rf,c) \ + ( tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((r-c)&3) + +#if defined(FOUR_LR_TABLES) +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) +#elif defined(ONE_LR_TABLE) +#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c) +#else +#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c) +#endif + +#if defined(FOUR_IM_TABLES) +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) +#elif defined(ONE_IM_TABLE) +#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0) +#else +#define inv_mcol(x) \ + (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \ + f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1)) +#endif + +// Subroutine to set the block size (if variable) in bytes, legal +// values being 16, 24 and 32. + +#if defined(AES_BLOCK_SIZE) +#define nc (AES_BLOCK_SIZE / 4) +#else +#define nc (cx->aes_Ncol) + +void aes_set_blk(aes_context *cx, int n_bytes) +{ +#if !defined(FIXED_TABLES) + if(!tab_gen) { gen_tabs(); tab_gen = 1; } +#endif + + switch(n_bytes) { + case 32: /* bytes */ + case 256: /* bits */ + nc = 8; + break; + case 24: /* bytes */ + case 192: /* bits */ + nc = 6; + break; + case 16: /* bytes */ + case 128: /* bits */ + default: + nc = 4; + break; + } +} + +#endif + +// Initialise the key schedule from the user supplied key. The key +// length is now specified in bytes - 16, 24 or 32 as appropriate. +// This corresponds to bit lengths of 128, 192 and 256 bits, and +// to Nk values of 4, 6 and 8 respectively. + +#define mx(t,f) (*t++ = inv_mcol(*f),f++) +#define cp(t,f) *t++ = *f++ + +#if AES_BLOCK_SIZE == 16 +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s) +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s) +#elif AES_BLOCK_SIZE == 24 +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \ + cp(d,s); cp(d,s) +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \ + mx(d,s); mx(d,s) +#elif AES_BLOCK_SIZE == 32 +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \ + cp(d,s); cp(d,s); cp(d,s); cp(d,s) +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \ + mx(d,s); mx(d,s); mx(d,s); mx(d,s) +#else + +#define cpy(d,s) \ +switch(nc) \ +{ case 8: cp(d,s); cp(d,s); \ + case 6: cp(d,s); cp(d,s); \ + case 4: cp(d,s); cp(d,s); \ + cp(d,s); cp(d,s); \ +} + +#define mix(d,s) \ +switch(nc) \ +{ case 8: mx(d,s); mx(d,s); \ + case 6: mx(d,s); mx(d,s); \ + case 4: mx(d,s); mx(d,s); \ + mx(d,s); mx(d,s); \ +} + +#endif + +void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f) +{ u_int32_t *kf, *kt, rci; + +#if !defined(FIXED_TABLES) + if(!tab_gen) { gen_tabs(); tab_gen = 1; } +#endif + + switch(n_bytes) { + case 32: /* bytes */ + case 256: /* bits */ + cx->aes_Nkey = 8; + break; + case 24: /* bytes */ + case 192: /* bits */ + cx->aes_Nkey = 6; + break; + case 16: /* bytes */ + case 128: /* bits */ + default: + cx->aes_Nkey = 4; + break; + } + + cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6; + + cx->aes_e_key[0] = word_in(in_key ); + cx->aes_e_key[1] = word_in(in_key + 4); + cx->aes_e_key[2] = word_in(in_key + 8); + cx->aes_e_key[3] = word_in(in_key + 12); + + kf = cx->aes_e_key; + kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey; + rci = 0; + + switch(cx->aes_Nkey) + { + case 4: do + { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++]; + kf[5] = kf[1] ^ kf[4]; + kf[6] = kf[2] ^ kf[5]; + kf[7] = kf[3] ^ kf[6]; + kf += 4; + } + while(kf < kt); + break; + + case 6: cx->aes_e_key[4] = word_in(in_key + 16); + cx->aes_e_key[5] = word_in(in_key + 20); + do + { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++]; + kf[ 7] = kf[1] ^ kf[ 6]; + kf[ 8] = kf[2] ^ kf[ 7]; + kf[ 9] = kf[3] ^ kf[ 8]; + kf[10] = kf[4] ^ kf[ 9]; + kf[11] = kf[5] ^ kf[10]; + kf += 6; + } + while(kf < kt); + break; + + case 8: cx->aes_e_key[4] = word_in(in_key + 16); + cx->aes_e_key[5] = word_in(in_key + 20); + cx->aes_e_key[6] = word_in(in_key + 24); + cx->aes_e_key[7] = word_in(in_key + 28); + do + { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++]; + kf[ 9] = kf[1] ^ kf[ 8]; + kf[10] = kf[2] ^ kf[ 9]; + kf[11] = kf[3] ^ kf[10]; + kf[12] = kf[4] ^ ls_box(kf[11],0); + kf[13] = kf[5] ^ kf[12]; + kf[14] = kf[6] ^ kf[13]; + kf[15] = kf[7] ^ kf[14]; + kf += 8; + } + while (kf < kt); + break; + } + + if(!f) + { u_int32_t i; + + kt = cx->aes_d_key + nc * cx->aes_Nrnd; + kf = cx->aes_e_key; + + cpy(kt, kf); kt -= 2 * nc; + + for(i = 1; i < cx->aes_Nrnd; ++i) + { +#if defined(ONE_TABLE) || defined(FOUR_TABLES) +#if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES) + u_int32_t f2, f4, f8, f9; +#endif + mix(kt, kf); +#else + cpy(kt, kf); +#endif + kt -= 2 * nc; + } + + cpy(kt, kf); + } +} + +// y = output word, x = input word, r = row, c = column +// for r = 0, 1, 2 and 3 = column accessed for row r + +#if defined(ARRAYS) +#define s(x,c) x[c] +#else +#define s(x,c) x##c +#endif + +// I am grateful to Frank Yellin for the following constructions +// which, given the column (c) of the output state variable that +// is being computed, return the input state variables which are +// needed for each row (r) of the state + +// For the fixed block size options, compilers reduce these two +// expressions to fixed variable references. For variable block +// size code conditional clauses will sometimes be returned + +#define unused 77 // Sunset Strip + +#define fwd_var(x,r,c) \ + ( r==0 ? \ + ( c==0 ? s(x,0) \ + : c==1 ? s(x,1) \ + : c==2 ? s(x,2) \ + : c==3 ? s(x,3) \ + : c==4 ? s(x,4) \ + : c==5 ? s(x,5) \ + : c==6 ? s(x,6) \ + : s(x,7)) \ + : r==1 ? \ + ( c==0 ? s(x,1) \ + : c==1 ? s(x,2) \ + : c==2 ? s(x,3) \ + : c==3 ? nc==4 ? s(x,0) : s(x,4) \ + : c==4 ? s(x,5) \ + : c==5 ? nc==8 ? s(x,6) : s(x,0) \ + : c==6 ? s(x,7) \ + : s(x,0)) \ + : r==2 ? \ + ( c==0 ? nc==8 ? s(x,3) : s(x,2) \ + : c==1 ? nc==8 ? s(x,4) : s(x,3) \ + : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \ + : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \ + : c==4 ? nc==8 ? s(x,7) : s(x,0) \ + : c==5 ? nc==8 ? s(x,0) : s(x,1) \ + : c==6 ? s(x,1) \ + : s(x,2)) \ + : \ + ( c==0 ? nc==8 ? s(x,4) : s(x,3) \ + : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \ + : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \ + : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \ + : c==4 ? nc==8 ? s(x,0) : s(x,1) \ + : c==5 ? nc==8 ? s(x,1) : s(x,2) \ + : c==6 ? s(x,2) \ + : s(x,3))) + +#define inv_var(x,r,c) \ + ( r==0 ? \ + ( c==0 ? s(x,0) \ + : c==1 ? s(x,1) \ + : c==2 ? s(x,2) \ + : c==3 ? s(x,3) \ + : c==4 ? s(x,4) \ + : c==5 ? s(x,5) \ + : c==6 ? s(x,6) \ + : s(x,7)) \ + : r==1 ? \ + ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \ + : c==1 ? s(x,0) \ + : c==2 ? s(x,1) \ + : c==3 ? s(x,2) \ + : c==4 ? s(x,3) \ + : c==5 ? s(x,4) \ + : c==6 ? s(x,5) \ + : s(x,6)) \ + : r==2 ? \ + ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \ + : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \ + : c==2 ? nc==8 ? s(x,7) : s(x,0) \ + : c==3 ? nc==8 ? s(x,0) : s(x,1) \ + : c==4 ? nc==8 ? s(x,1) : s(x,2) \ + : c==5 ? nc==8 ? s(x,2) : s(x,3) \ + : c==6 ? s(x,3) \ + : s(x,4)) \ + : \ + ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \ + : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \ + : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \ + : c==3 ? nc==8 ? s(x,7) : s(x,0) \ + : c==4 ? nc==8 ? s(x,0) : s(x,1) \ + : c==5 ? nc==8 ? s(x,1) : s(x,2) \ + : c==6 ? s(x,2) \ + : s(x,3))) + +#define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c] +#define so(y,x,c) word_out(y + 4 * c, s(x,c)) + +#if defined(FOUR_TABLES) +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c) +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c) +#elif defined(ONE_TABLE) +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c) +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c) +#else +#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c] +#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]) +#endif + +#if defined(FOUR_LR_TABLES) +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c) +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c) +#elif defined(ONE_LR_TABLE) +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c) +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c) +#else +#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c] +#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c] +#endif + +#if AES_BLOCK_SIZE == 16 + +#if defined(ARRAYS) +#define locals(y,x) x[4],y[4] +#else +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 +// the following defines prevent the compiler requiring the declaration +// of generated but unused variables in the fwd_var and inv_var macros +#define b04 unused +#define b05 unused +#define b06 unused +#define b07 unused +#define b14 unused +#define b15 unused +#define b16 unused +#define b17 unused +#endif +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) + +#elif AES_BLOCK_SIZE == 24 + +#if defined(ARRAYS) +#define locals(y,x) x[6],y[6] +#else +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \ + y##0,y##1,y##2,y##3,y##4,y##5 +#define b06 unused +#define b07 unused +#define b16 unused +#define b17 unused +#endif +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); \ + s(y,4) = s(x,4); s(y,5) = s(x,5); +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \ + si(y,x,k,3); si(y,x,k,4); si(y,x,k,5) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \ + so(y,x,3); so(y,x,4); so(y,x,5) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \ + rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5) +#else + +#if defined(ARRAYS) +#define locals(y,x) x[8],y[8] +#else +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \ + y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7 +#endif +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); \ + s(y,4) = s(x,4); s(y,5) = s(x,5); \ + s(y,6) = s(x,6); s(y,7) = s(x,7); + +#if AES_BLOCK_SIZE == 32 + +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \ + si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \ + so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \ + rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7) +#else + +#define state_in(y,x,k) \ +switch(nc) \ +{ case 8: si(y,x,k,7); si(y,x,k,6); \ + case 6: si(y,x,k,5); si(y,x,k,4); \ + case 4: si(y,x,k,3); si(y,x,k,2); \ + si(y,x,k,1); si(y,x,k,0); \ +} + +#define state_out(y,x) \ +switch(nc) \ +{ case 8: so(y,x,7); so(y,x,6); \ + case 6: so(y,x,5); so(y,x,4); \ + case 4: so(y,x,3); so(y,x,2); \ + so(y,x,1); so(y,x,0); \ +} + +#if defined(FAST_VARIABLE) + +#define round(rm,y,x,k) \ +switch(nc) \ +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \ + rm(y,x,k,5); rm(y,x,k,4); \ + rm(y,x,k,3); rm(y,x,k,2); \ + rm(y,x,k,1); rm(y,x,k,0); \ + break; \ + case 6: rm(y,x,k,5); rm(y,x,k,4); \ + rm(y,x,k,3); rm(y,x,k,2); \ + rm(y,x,k,1); rm(y,x,k,0); \ + break; \ + case 4: rm(y,x,k,3); rm(y,x,k,2); \ + rm(y,x,k,1); rm(y,x,k,0); \ + break; \ +} +#else + +#define round(rm,y,x,k) \ +switch(nc) \ +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \ + case 6: rm(y,x,k,5); rm(y,x,k,4); \ + case 4: rm(y,x,k,3); rm(y,x,k,2); \ + rm(y,x,k,1); rm(y,x,k,0); \ +} + +#endif + +#endif +#endif + +void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) +{ u_int32_t locals(b0, b1); + const u_int32_t *kp = cx->aes_e_key; + +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES) + u_int32_t f2; +#endif + + state_in(b0, in_blk, kp); kp += nc; + +#if defined(UNROLL) + + switch(cx->aes_Nrnd) + { + case 14: round(fwd_rnd, b1, b0, kp ); + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc; + case 12: round(fwd_rnd, b1, b0, kp ); + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc; + case 10: round(fwd_rnd, b1, b0, kp ); + round(fwd_rnd, b0, b1, kp + nc); + round(fwd_rnd, b1, b0, kp + 2 * nc); + round(fwd_rnd, b0, b1, kp + 3 * nc); + round(fwd_rnd, b1, b0, kp + 4 * nc); + round(fwd_rnd, b0, b1, kp + 5 * nc); + round(fwd_rnd, b1, b0, kp + 6 * nc); + round(fwd_rnd, b0, b1, kp + 7 * nc); + round(fwd_rnd, b1, b0, kp + 8 * nc); + round(fwd_lrnd, b0, b1, kp + 9 * nc); + } + +#elif defined(PARTIAL_UNROLL) + { u_int32_t rnd; + + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd) + { + round(fwd_rnd, b1, b0, kp); + round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc; + } + + round(fwd_rnd, b1, b0, kp); + round(fwd_lrnd, b0, b1, kp + nc); + } +#else + { u_int32_t rnd; + + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd) + { + round(fwd_rnd, b1, b0, kp); + l_copy(b0, b1); kp += nc; + } + + round(fwd_lrnd, b0, b1, kp); + } +#endif + + state_out(out_blk, b0); +} + +void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) +{ u_int32_t locals(b0, b1); + const u_int32_t *kp = cx->aes_d_key; + +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES) + u_int32_t f2, f4, f8, f9; +#endif + + state_in(b0, in_blk, kp); kp += nc; + +#if defined(UNROLL) + + switch(cx->aes_Nrnd) + { + case 14: round(inv_rnd, b1, b0, kp ); + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc; + case 12: round(inv_rnd, b1, b0, kp ); + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc; + case 10: round(inv_rnd, b1, b0, kp ); + round(inv_rnd, b0, b1, kp + nc); + round(inv_rnd, b1, b0, kp + 2 * nc); + round(inv_rnd, b0, b1, kp + 3 * nc); + round(inv_rnd, b1, b0, kp + 4 * nc); + round(inv_rnd, b0, b1, kp + 5 * nc); + round(inv_rnd, b1, b0, kp + 6 * nc); + round(inv_rnd, b0, b1, kp + 7 * nc); + round(inv_rnd, b1, b0, kp + 8 * nc); + round(inv_lrnd, b0, b1, kp + 9 * nc); + } + +#elif defined(PARTIAL_UNROLL) + { u_int32_t rnd; + + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd) + { + round(inv_rnd, b1, b0, kp); + round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc; + } + + round(inv_rnd, b1, b0, kp); + round(inv_lrnd, b0, b1, kp + nc); + } +#else + { u_int32_t rnd; + + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd) + { + round(inv_rnd, b1, b0, kp); + l_copy(b0, b1); kp += nc; + } + + round(inv_lrnd, b0, b1, kp); + } +#endif + + state_out(out_blk, b0); +} diff -urN linux-2.4.28/drivers/misc/aes.h linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes.h --- linux-2.4.28/drivers/misc/aes.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/aes.h Sun Feb 6 18:45:39 2005 @@ -0,0 +1,113 @@ +// I retain copyright in this code but I encourage its free use provided +// that I don't carry any responsibility for the results. I am especially +// happy to see it used in free and open source software. If you do use +// it I would appreciate an acknowledgement of its origin in the code or +// the product that results and I would also appreciate knowing a little +// about the use to which it is being put. I am grateful to Frank Yellin +// for some ideas that are used in this implementation. +// +// Dr B. R. Gladman 6th April 2001. +// +// This is an implementation of the AES encryption algorithm (Rijndael) +// designed by Joan Daemen and Vincent Rijmen. This version is designed +// to provide both fixed and dynamic block and key lengths and can also +// run with either big or little endian internal byte order (see aes.h). +// It inputs block and key lengths in bytes with the legal values being +// 16, 24 and 32. + +/* + * Modified by Jari Ruusu, May 1 2001 + * - Fixed some compile warnings, code was ok but gcc warned anyway. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t + * - Major name space cleanup: Names visible to outside now begin + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c + * - Removed C++ and DLL support as part of name space cleanup. + * - Eliminated unnecessary recomputation of tables. (actual bug fix) + * - Merged precomputed constant tables to aes.c file. + * - Removed data alignment restrictions for portability reasons. + * - Made block and key lengths accept bit count (128/192/256) + * as well byte count (16/24/32). + * - Removed all error checks. This change also eliminated the need + * to preinitialize the context struct to zero. + * - Removed some totally unused constants. + */ + +#ifndef _AES_H +#define _AES_H + +#include +#include +#include +#include + +// CONFIGURATION OPTIONS (see also aes.c) +// +// Define AES_BLOCK_SIZE to set the cipher block size (16, 24 or 32) or +// leave this undefined for dynamically variable block size (this will +// result in much slower code). +// IMPORTANT NOTE: AES_BLOCK_SIZE is in BYTES (16, 24, 32 or undefined). If +// left undefined a slower version providing variable block length is compiled + +#define AES_BLOCK_SIZE 16 + +// The number of key schedule words for different block and key lengths +// allowing for method of computation which requires the length to be a +// multiple of the key length +// +// Nk = 4 6 8 +// ------------- +// Nb = 4 | 60 60 64 +// 6 | 96 90 96 +// 8 | 120 120 120 + +#if !defined(AES_BLOCK_SIZE) || (AES_BLOCK_SIZE == 32) +#define AES_KS_LENGTH 120 +#define AES_RC_LENGTH 29 +#else +#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE +#define AES_RC_LENGTH (9 * AES_BLOCK_SIZE) / 8 - 8 +#endif + +typedef struct +{ + u_int32_t aes_Nkey; // the number of words in the key input block + u_int32_t aes_Nrnd; // the number of cipher rounds + u_int32_t aes_e_key[AES_KS_LENGTH]; // the encryption key schedule + u_int32_t aes_d_key[AES_KS_LENGTH]; // the decryption key schedule +#if !defined(AES_BLOCK_SIZE) + u_int32_t aes_Ncol; // the number of columns in the cipher state +#endif +} aes_context; + +// avoid global name conflict with mainline kernel +#define aes_set_key _aes_set_key +#define aes_encrypt _aes_encrypt +#define aes_decrypt _aes_decrypt + +// THE CIPHER INTERFACE + +#if !defined(AES_BLOCK_SIZE) +extern void aes_set_blk(aes_context *, const int); +#endif + +#if defined(CONFIG_X86) || defined(CONFIG_X86_64) + asmlinkage +#endif +extern void aes_set_key(aes_context *, const unsigned char [], const int, const int); + +#if defined(CONFIG_X86) || defined(CONFIG_X86_64) + asmlinkage +#endif +extern void aes_encrypt(const aes_context *, const unsigned char [], unsigned char []); + +#if defined(CONFIG_X86) || defined(CONFIG_X86_64) + asmlinkage +#endif +extern void aes_decrypt(const aes_context *, const unsigned char [], unsigned char []); + +// The block length inputs to aes_set_block and aes_set_key are in numbers +// of bytes or bits. The calls to subroutines must be made in the above +// order but multiple calls can be made without repeating earlier calls +// if their parameters have not changed. + +#endif // _AES_H diff -urN linux-2.4.28/drivers/misc/crypto-ksym.c linux-2.4.28-loop-AES-v3.0b/drivers/misc/crypto-ksym.c --- linux-2.4.28/drivers/misc/crypto-ksym.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/crypto-ksym.c Sun Feb 6 18:45:39 2005 @@ -0,0 +1,7 @@ +#include +#include "aes.h" +#include "md5.h" +EXPORT_SYMBOL_NOVERS(aes_set_key); +EXPORT_SYMBOL_NOVERS(aes_encrypt); +EXPORT_SYMBOL_NOVERS(aes_decrypt); +EXPORT_SYMBOL_NOVERS(md5_transform_CPUbyteorder); diff -urN linux-2.4.28/drivers/misc/md5-amd64.S linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5-amd64.S --- linux-2.4.28/drivers/misc/md5-amd64.S Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5-amd64.S Sun Feb 6 18:45:39 2005 @@ -0,0 +1,200 @@ +// +// md5-amd64.S +// +// Written by Jari Ruusu, October 1 2003 +// +// Copyright 2003 by Jari Ruusu. +// Redistribution of this file is permitted under the GNU Public License. +// + +// Modified by Jari Ruusu, June 12 2004 +// - Converted 32 bit x86 code to 64 bit AMD64 code + +// A MD5 transform implementation for AMD64 compatible processors. +// This code does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 +// registers or the artihmetic status flags. However, the rbx, rbp and +// r12-r15 registers are preserved across calls. + +// void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t *in) + +#if defined(USE_UNDERLINE) +# define md5_transform_CPUbyteorder _md5_transform_CPUbyteorder +#endif +#if !defined(ALIGN64BYTES) +# define ALIGN64BYTES 64 +#endif + + .file "md5-amd64.S" + .globl md5_transform_CPUbyteorder + +// rdi = pointer to hash[4] array which is read and written +// rsi = pointer to in[16] array which is read only + + .text + .align ALIGN64BYTES +md5_transform_CPUbyteorder: + movl 12(%rdi),%eax + movl 8(%rdi),%ecx + movl (%rdi),%r8d + movl 4(%rdi),%r9d + movl (%rsi),%r10d + prefetcht0 60(%rsi) + movl %eax,%edx + xorl %ecx,%eax + +#define REPEAT1(p1w,p2x,p3z,p4c,p5s,p6Nin,p7Nz,p8Ny) \ + addl $p4c,p1w ;\ + andl p2x,%eax ;\ + addl %r10d,p1w ;\ + xorl p3z,%eax ;\ + movl p6Nin*4(%rsi),%r10d ;\ + addl %eax,p1w ;\ + movl p7Nz,%eax ;\ + roll $p5s,p1w ;\ + xorl p8Ny,%eax ;\ + addl p2x,p1w + + REPEAT1(%r8d,%r9d,%edx,0xd76aa478, 7, 1,%ecx,%r9d) + REPEAT1(%edx,%r8d,%ecx,0xe8c7b756,12, 2,%r9d,%r8d) + REPEAT1(%ecx,%edx,%r9d,0x242070db,17, 3,%r8d,%edx) + REPEAT1(%r9d,%ecx,%r8d,0xc1bdceee,22, 4,%edx,%ecx) + REPEAT1(%r8d,%r9d,%edx,0xf57c0faf, 7, 5,%ecx,%r9d) + REPEAT1(%edx,%r8d,%ecx,0x4787c62a,12, 6,%r9d,%r8d) + REPEAT1(%ecx,%edx,%r9d,0xa8304613,17, 7,%r8d,%edx) + REPEAT1(%r9d,%ecx,%r8d,0xfd469501,22, 8,%edx,%ecx) + REPEAT1(%r8d,%r9d,%edx,0x698098d8, 7, 9,%ecx,%r9d) + REPEAT1(%edx,%r8d,%ecx,0x8b44f7af,12,10,%r9d,%r8d) + REPEAT1(%ecx,%edx,%r9d,0xffff5bb1,17,11,%r8d,%edx) + REPEAT1(%r9d,%ecx,%r8d,0x895cd7be,22,12,%edx,%ecx) + REPEAT1(%r8d,%r9d,%edx,0x6b901122, 7,13,%ecx,%r9d) + REPEAT1(%edx,%r8d,%ecx,0xfd987193,12,14,%r9d,%r8d) + REPEAT1(%ecx,%edx,%r9d,0xa679438e,17,15,%r8d,%edx) + + addl $0x49b40821,%r9d + andl %ecx,%eax + addl %r10d,%r9d + xorl %r8d,%eax + movl 1*4(%rsi),%r10d + addl %eax,%r9d + movl %ecx,%eax + roll $22,%r9d + addl %ecx,%r9d + +#define REPEAT2(p1w,p2x,p3y,p4z,p5c,p6s,p7Nin,p8Ny) \ + xorl p2x,%eax ;\ + addl $p5c,p1w ;\ + andl p4z,%eax ;\ + addl %r10d,p1w ;\ + xorl p3y,%eax ;\ + movl p7Nin*4(%rsi),%r10d ;\ + addl %eax,p1w ;\ + movl p8Ny,%eax ;\ + roll $p6s,p1w ;\ + addl p2x,p1w + + REPEAT2(%r8d,%r9d,%ecx,%edx,0xf61e2562, 5, 6,%r9d) + REPEAT2(%edx,%r8d,%r9d,%ecx,0xc040b340, 9,11,%r8d) + REPEAT2(%ecx,%edx,%r8d,%r9d,0x265e5a51,14, 0,%edx) + REPEAT2(%r9d,%ecx,%edx,%r8d,0xe9b6c7aa,20, 5,%ecx) + REPEAT2(%r8d,%r9d,%ecx,%edx,0xd62f105d, 5,10,%r9d) + REPEAT2(%edx,%r8d,%r9d,%ecx,0x02441453, 9,15,%r8d) + REPEAT2(%ecx,%edx,%r8d,%r9d,0xd8a1e681,14, 4,%edx) + REPEAT2(%r9d,%ecx,%edx,%r8d,0xe7d3fbc8,20, 9,%ecx) + REPEAT2(%r8d,%r9d,%ecx,%edx,0x21e1cde6, 5,14,%r9d) + REPEAT2(%edx,%r8d,%r9d,%ecx,0xc33707d6, 9, 3,%r8d) + REPEAT2(%ecx,%edx,%r8d,%r9d,0xf4d50d87,14, 8,%edx) + REPEAT2(%r9d,%ecx,%edx,%r8d,0x455a14ed,20,13,%ecx) + REPEAT2(%r8d,%r9d,%ecx,%edx,0xa9e3e905, 5, 2,%r9d) + REPEAT2(%edx,%r8d,%r9d,%ecx,0xfcefa3f8, 9, 7,%r8d) + REPEAT2(%ecx,%edx,%r8d,%r9d,0x676f02d9,14,12,%edx) + + xorl %ecx,%eax + addl $0x8d2a4c8a,%r9d + andl %r8d,%eax + addl %r10d,%r9d + xorl %edx,%eax + movl 5*4(%rsi),%r10d + addl %eax,%r9d + movl %ecx,%eax + roll $20,%r9d + xorl %edx,%eax + addl %ecx,%r9d + +#define REPEAT3(p1w,p2x,p3c,p4s,p5Nin,p6Ny,p7Nz) \ + addl $p3c,p1w ;\ + xorl p2x,%eax ;\ + addl %r10d,p1w ;\ + movl p5Nin*4(%rsi),%r10d ;\ + addl %eax,p1w ;\ + movl p6Ny,%eax ;\ + roll $p4s,p1w ;\ + xorl p7Nz,%eax ;\ + addl p2x,p1w + + REPEAT3(%r8d,%r9d,0xfffa3942, 4, 8,%r9d,%ecx) + REPEAT3(%edx,%r8d,0x8771f681,11,11,%r8d,%r9d) + REPEAT3(%ecx,%edx,0x6d9d6122,16,14,%edx,%r8d) + REPEAT3(%r9d,%ecx,0xfde5380c,23, 1,%ecx,%edx) + REPEAT3(%r8d,%r9d,0xa4beea44, 4, 4,%r9d,%ecx) + REPEAT3(%edx,%r8d,0x4bdecfa9,11, 7,%r8d,%r9d) + REPEAT3(%ecx,%edx,0xf6bb4b60,16,10,%edx,%r8d) + REPEAT3(%r9d,%ecx,0xbebfbc70,23,13,%ecx,%edx) + REPEAT3(%r8d,%r9d,0x289b7ec6, 4, 0,%r9d,%ecx) + REPEAT3(%edx,%r8d,0xeaa127fa,11, 3,%r8d,%r9d) + REPEAT3(%ecx,%edx,0xd4ef3085,16, 6,%edx,%r8d) + REPEAT3(%r9d,%ecx,0x04881d05,23, 9,%ecx,%edx) + REPEAT3(%r8d,%r9d,0xd9d4d039, 4,12,%r9d,%ecx) + REPEAT3(%edx,%r8d,0xe6db99e5,11,15,%r8d,%r9d) + REPEAT3(%ecx,%edx,0x1fa27cf8,16, 2,%edx,%r8d) + + addl $0xc4ac5665,%r9d + xorl %ecx,%eax + addl %r10d,%r9d + movl (%rsi),%r10d + addl %eax,%r9d + movl %edx,%eax + roll $23,%r9d + notl %eax + addl %ecx,%r9d + +#define REPEAT4(p1w,p2x,p3y,p4c,p5s,p6Nin,p7Nz) \ + addl $p4c,p1w ;\ + orl p2x,%eax ;\ + addl %r10d,p1w ;\ + xorl p3y,%eax ;\ + movl p6Nin*4(%rsi),%r10d ;\ + addl %eax,p1w ;\ + movl p7Nz,%eax ;\ + roll $p5s,p1w ;\ + notl %eax ;\ + addl p2x,p1w + + REPEAT4(%r8d,%r9d,%ecx,0xf4292244, 6, 7,%ecx) + REPEAT4(%edx,%r8d,%r9d,0x432aff97,10,14,%r9d) + REPEAT4(%ecx,%edx,%r8d,0xab9423a7,15, 5,%r8d) + REPEAT4(%r9d,%ecx,%edx,0xfc93a039,21,12,%edx) + REPEAT4(%r8d,%r9d,%ecx,0x655b59c3, 6, 3,%ecx) + REPEAT4(%edx,%r8d,%r9d,0x8f0ccc92,10,10,%r9d) + REPEAT4(%ecx,%edx,%r8d,0xffeff47d,15, 1,%r8d) + REPEAT4(%r9d,%ecx,%edx,0x85845dd1,21, 8,%edx) + REPEAT4(%r8d,%r9d,%ecx,0x6fa87e4f, 6,15,%ecx) + REPEAT4(%edx,%r8d,%r9d,0xfe2ce6e0,10, 6,%r9d) + REPEAT4(%ecx,%edx,%r8d,0xa3014314,15,13,%r8d) + REPEAT4(%r9d,%ecx,%edx,0x4e0811a1,21, 4,%edx) + REPEAT4(%r8d,%r9d,%ecx,0xf7537e82, 6,11,%ecx) + REPEAT4(%edx,%r8d,%r9d,0xbd3af235,10, 2,%r9d) + REPEAT4(%ecx,%edx,%r8d,0x2ad7d2bb,15, 9,%r8d) + + addl $0xeb86d391,%r9d + orl %ecx,%eax + addl %r10d,%r9d + xorl %edx,%eax + addl %eax,%r9d + roll $21,%r9d + addl %ecx,%r9d + + addl %r8d,(%rdi) + addl %r9d,4(%rdi) + addl %ecx,8(%rdi) + addl %edx,12(%rdi) + ret diff -urN linux-2.4.28/drivers/misc/md5-x86.S linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5-x86.S --- linux-2.4.28/drivers/misc/md5-x86.S Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5-x86.S Sun Feb 6 18:45:39 2005 @@ -0,0 +1,207 @@ +// +// md5-x86.S +// +// Written by Jari Ruusu, October 1 2003 +// +// Copyright 2003 by Jari Ruusu. +// Redistribution of this file is permitted under the GNU Public License. +// + +// A MD5 transform implementation for x86 compatible processors. This +// version uses i386 instruction set but instruction scheduling is optimized +// for Pentium-2. This code does not preserve the eax, ecx or edx registers +// or the artihmetic status flags. However, the ebx, esi, edi, and ebp +// registers are preserved across calls. + +// void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t *in) + +#if defined(USE_UNDERLINE) +# define md5_transform_CPUbyteorder _md5_transform_CPUbyteorder +#endif +#if !defined(ALIGN32BYTES) +# define ALIGN32BYTES 32 +#endif + + .file "md5-x86.S" + .globl md5_transform_CPUbyteorder + .text + .align ALIGN32BYTES + +md5_transform_CPUbyteorder: + push %ebp + mov 4+4(%esp),%eax // pointer to 'hash' input + mov 8+4(%esp),%ebp // pointer to 'in' array + push %ebx + push %esi + push %edi + + mov (%eax),%esi + mov 4(%eax),%edi + mov 8(%eax),%ecx + mov 12(%eax),%eax + mov (%ebp),%ebx + mov %eax,%edx + xor %ecx,%eax + +#define REPEAT1(p1w,p2x,p3z,p4c,p5s,p6Nin,p7Nz,p8Ny) \ + add $p4c,p1w ;\ + and p2x,%eax ;\ + add %ebx,p1w ;\ + xor p3z,%eax ;\ + mov p6Nin*4(%ebp),%ebx ;\ + add %eax,p1w ;\ + mov p7Nz,%eax ;\ + rol $p5s,p1w ;\ + xor p8Ny,%eax ;\ + add p2x,p1w + + REPEAT1(%esi,%edi,%edx,0xd76aa478, 7, 1,%ecx,%edi) + REPEAT1(%edx,%esi,%ecx,0xe8c7b756,12, 2,%edi,%esi) + REPEAT1(%ecx,%edx,%edi,0x242070db,17, 3,%esi,%edx) + REPEAT1(%edi,%ecx,%esi,0xc1bdceee,22, 4,%edx,%ecx) + REPEAT1(%esi,%edi,%edx,0xf57c0faf, 7, 5,%ecx,%edi) + REPEAT1(%edx,%esi,%ecx,0x4787c62a,12, 6,%edi,%esi) + REPEAT1(%ecx,%edx,%edi,0xa8304613,17, 7,%esi,%edx) + REPEAT1(%edi,%ecx,%esi,0xfd469501,22, 8,%edx,%ecx) + REPEAT1(%esi,%edi,%edx,0x698098d8, 7, 9,%ecx,%edi) + REPEAT1(%edx,%esi,%ecx,0x8b44f7af,12,10,%edi,%esi) + REPEAT1(%ecx,%edx,%edi,0xffff5bb1,17,11,%esi,%edx) + REPEAT1(%edi,%ecx,%esi,0x895cd7be,22,12,%edx,%ecx) + REPEAT1(%esi,%edi,%edx,0x6b901122, 7,13,%ecx,%edi) + REPEAT1(%edx,%esi,%ecx,0xfd987193,12,14,%edi,%esi) + REPEAT1(%ecx,%edx,%edi,0xa679438e,17,15,%esi,%edx) + + add $0x49b40821,%edi + and %ecx,%eax + add %ebx,%edi + xor %esi,%eax + mov 1*4(%ebp),%ebx + add %eax,%edi + mov %ecx,%eax + rol $22,%edi + add %ecx,%edi + +#define REPEAT2(p1w,p2x,p3y,p4z,p5c,p6s,p7Nin,p8Ny) \ + xor p2x,%eax ;\ + add $p5c,p1w ;\ + and p4z,%eax ;\ + add %ebx,p1w ;\ + xor p3y,%eax ;\ + mov p7Nin*4(%ebp),%ebx ;\ + add %eax,p1w ;\ + mov p8Ny,%eax ;\ + rol $p6s,p1w ;\ + add p2x,p1w + + REPEAT2(%esi,%edi,%ecx,%edx,0xf61e2562, 5, 6,%edi) + REPEAT2(%edx,%esi,%edi,%ecx,0xc040b340, 9,11,%esi) + REPEAT2(%ecx,%edx,%esi,%edi,0x265e5a51,14, 0,%edx) + REPEAT2(%edi,%ecx,%edx,%esi,0xe9b6c7aa,20, 5,%ecx) + REPEAT2(%esi,%edi,%ecx,%edx,0xd62f105d, 5,10,%edi) + REPEAT2(%edx,%esi,%edi,%ecx,0x02441453, 9,15,%esi) + REPEAT2(%ecx,%edx,%esi,%edi,0xd8a1e681,14, 4,%edx) + REPEAT2(%edi,%ecx,%edx,%esi,0xe7d3fbc8,20, 9,%ecx) + REPEAT2(%esi,%edi,%ecx,%edx,0x21e1cde6, 5,14,%edi) + REPEAT2(%edx,%esi,%edi,%ecx,0xc33707d6, 9, 3,%esi) + REPEAT2(%ecx,%edx,%esi,%edi,0xf4d50d87,14, 8,%edx) + REPEAT2(%edi,%ecx,%edx,%esi,0x455a14ed,20,13,%ecx) + REPEAT2(%esi,%edi,%ecx,%edx,0xa9e3e905, 5, 2,%edi) + REPEAT2(%edx,%esi,%edi,%ecx,0xfcefa3f8, 9, 7,%esi) + REPEAT2(%ecx,%edx,%esi,%edi,0x676f02d9,14,12,%edx) + + xor %ecx,%eax + add $0x8d2a4c8a,%edi + and %esi,%eax + add %ebx,%edi + xor %edx,%eax + mov 5*4(%ebp),%ebx + add %eax,%edi + mov %ecx,%eax + rol $20,%edi + xor %edx,%eax + add %ecx,%edi + +#define REPEAT3(p1w,p2x,p3c,p4s,p5Nin,p6Ny,p7Nz) \ + add $p3c,p1w ;\ + xor p2x,%eax ;\ + add %ebx,p1w ;\ + mov p5Nin*4(%ebp),%ebx ;\ + add %eax,p1w ;\ + mov p6Ny,%eax ;\ + rol $p4s,p1w ;\ + xor p7Nz,%eax ;\ + add p2x,p1w + + REPEAT3(%esi,%edi,0xfffa3942, 4, 8,%edi,%ecx) + REPEAT3(%edx,%esi,0x8771f681,11,11,%esi,%edi) + REPEAT3(%ecx,%edx,0x6d9d6122,16,14,%edx,%esi) + REPEAT3(%edi,%ecx,0xfde5380c,23, 1,%ecx,%edx) + REPEAT3(%esi,%edi,0xa4beea44, 4, 4,%edi,%ecx) + REPEAT3(%edx,%esi,0x4bdecfa9,11, 7,%esi,%edi) + REPEAT3(%ecx,%edx,0xf6bb4b60,16,10,%edx,%esi) + REPEAT3(%edi,%ecx,0xbebfbc70,23,13,%ecx,%edx) + REPEAT3(%esi,%edi,0x289b7ec6, 4, 0,%edi,%ecx) + REPEAT3(%edx,%esi,0xeaa127fa,11, 3,%esi,%edi) + REPEAT3(%ecx,%edx,0xd4ef3085,16, 6,%edx,%esi) + REPEAT3(%edi,%ecx,0x04881d05,23, 9,%ecx,%edx) + REPEAT3(%esi,%edi,0xd9d4d039, 4,12,%edi,%ecx) + REPEAT3(%edx,%esi,0xe6db99e5,11,15,%esi,%edi) + REPEAT3(%ecx,%edx,0x1fa27cf8,16, 2,%edx,%esi) + + add $0xc4ac5665,%edi + xor %ecx,%eax + add %ebx,%edi + mov (%ebp),%ebx + add %eax,%edi + mov %edx,%eax + rol $23,%edi + not %eax + add %ecx,%edi + +#define REPEAT4(p1w,p2x,p3y,p4c,p5s,p6Nin,p7Nz) \ + add $p4c,p1w ;\ + or p2x,%eax ;\ + add %ebx,p1w ;\ + xor p3y,%eax ;\ + mov p6Nin*4(%ebp),%ebx ;\ + add %eax,p1w ;\ + mov p7Nz,%eax ;\ + rol $p5s,p1w ;\ + not %eax ;\ + add p2x,p1w + + REPEAT4(%esi,%edi,%ecx,0xf4292244, 6, 7,%ecx) + REPEAT4(%edx,%esi,%edi,0x432aff97,10,14,%edi) + REPEAT4(%ecx,%edx,%esi,0xab9423a7,15, 5,%esi) + REPEAT4(%edi,%ecx,%edx,0xfc93a039,21,12,%edx) + REPEAT4(%esi,%edi,%ecx,0x655b59c3, 6, 3,%ecx) + REPEAT4(%edx,%esi,%edi,0x8f0ccc92,10,10,%edi) + REPEAT4(%ecx,%edx,%esi,0xffeff47d,15, 1,%esi) + REPEAT4(%edi,%ecx,%edx,0x85845dd1,21, 8,%edx) + REPEAT4(%esi,%edi,%ecx,0x6fa87e4f, 6,15,%ecx) + REPEAT4(%edx,%esi,%edi,0xfe2ce6e0,10, 6,%edi) + REPEAT4(%ecx,%edx,%esi,0xa3014314,15,13,%esi) + REPEAT4(%edi,%ecx,%edx,0x4e0811a1,21, 4,%edx) + REPEAT4(%esi,%edi,%ecx,0xf7537e82, 6,11,%ecx) + REPEAT4(%edx,%esi,%edi,0xbd3af235,10, 2,%edi) + REPEAT4(%ecx,%edx,%esi,0x2ad7d2bb,15, 9,%esi) + + add $0xeb86d391,%edi + or %ecx,%eax + add %ebx,%edi + xor %edx,%eax + mov 4+16(%esp),%ebp // pointer to 'hash' output + add %eax,%edi + rol $21,%edi + add %ecx,%edi + + add %esi,(%ebp) + add %edi,4(%ebp) + add %ecx,8(%ebp) + add %edx,12(%ebp) + + pop %edi + pop %esi + pop %ebx + pop %ebp + ret diff -urN linux-2.4.28/drivers/misc/md5.c linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5.c --- linux-2.4.28/drivers/misc/md5.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5.c Sun Feb 6 18:45:39 2005 @@ -0,0 +1,106 @@ +/* + * MD5 Message Digest Algorithm (RFC1321). + * + * Derived from cryptoapi implementation, originally based on the + * public domain implementation written by Colin Plumb in 1993. + * + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include "md5.h" + +#define MD5_F1(x, y, z) (z ^ (x & (y ^ z))) +#define MD5_F2(x, y, z) MD5_F1(z, x, y) +#define MD5_F3(x, y, z) (x ^ y ^ z) +#define MD5_F4(x, y, z) (y ^ (x | ~z)) +#define MD5_STEP(f, w, x, y, z, in, s) \ + (w += f(x, y, z) + in, w = (w<>(32-s)) + x) + +void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t const *in) +{ + u_int32_t a, b, c, d; + + a = hash[0]; + b = hash[1]; + c = hash[2]; + d = hash[3]; + + MD5_STEP(MD5_F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5_STEP(MD5_F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5_STEP(MD5_F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5_STEP(MD5_F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5_STEP(MD5_F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5_STEP(MD5_F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5_STEP(MD5_F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5_STEP(MD5_F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5_STEP(MD5_F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5_STEP(MD5_F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5_STEP(MD5_F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5_STEP(MD5_F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5_STEP(MD5_F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5_STEP(MD5_F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5_STEP(MD5_F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5_STEP(MD5_F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5_STEP(MD5_F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5_STEP(MD5_F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5_STEP(MD5_F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5_STEP(MD5_F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5_STEP(MD5_F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5_STEP(MD5_F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5_STEP(MD5_F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5_STEP(MD5_F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5_STEP(MD5_F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5_STEP(MD5_F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5_STEP(MD5_F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5_STEP(MD5_F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5_STEP(MD5_F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5_STEP(MD5_F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5_STEP(MD5_F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5_STEP(MD5_F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5_STEP(MD5_F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5_STEP(MD5_F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5_STEP(MD5_F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5_STEP(MD5_F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5_STEP(MD5_F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5_STEP(MD5_F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5_STEP(MD5_F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5_STEP(MD5_F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5_STEP(MD5_F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5_STEP(MD5_F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5_STEP(MD5_F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5_STEP(MD5_F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5_STEP(MD5_F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5_STEP(MD5_F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5_STEP(MD5_F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5_STEP(MD5_F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5_STEP(MD5_F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5_STEP(MD5_F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5_STEP(MD5_F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5_STEP(MD5_F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5_STEP(MD5_F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5_STEP(MD5_F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5_STEP(MD5_F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5_STEP(MD5_F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5_STEP(MD5_F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5_STEP(MD5_F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5_STEP(MD5_F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5_STEP(MD5_F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5_STEP(MD5_F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5_STEP(MD5_F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5_STEP(MD5_F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5_STEP(MD5_F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + hash[0] += a; + hash[1] += b; + hash[2] += c; + hash[3] += d; +} diff -urN linux-2.4.28/drivers/misc/md5.h linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5.h --- linux-2.4.28/drivers/misc/md5.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.28-loop-AES-v3.0b/drivers/misc/md5.h Sun Feb 6 18:45:39 2005 @@ -0,0 +1,11 @@ +/* md5.h */ + +#include +#include +#include +#include + +#if defined(CONFIG_X86) || defined(CONFIG_X86_64) + asmlinkage +#endif +extern void md5_transform_CPUbyteorder(u_int32_t *, u_int32_t const *); diff -urN linux-2.4.28/include/linux/loop.h linux-2.4.28-loop-AES-v3.0b/include/linux/loop.h --- linux-2.4.28/include/linux/loop.h Sun Aug 4 14:26:52 2002 +++ linux-2.4.28-loop-AES-v3.0b/include/linux/loop.h Sun Feb 6 18:45:39 2005 @@ -17,6 +17,11 @@ #ifdef __KERNEL__ +/* definitions for IV metric -- cryptoapi specific */ +#define LOOP_IV_SECTOR_BITS 9 +#define LOOP_IV_SECTOR_SIZE (1 << LOOP_IV_SECTOR_BITS) +typedef int loop_iv_t; + /* Possible states of device */ enum { Lo_unbound, @@ -27,35 +32,47 @@ struct loop_device { int lo_number; int lo_refcnt; - kdev_t lo_device; - int lo_offset; + loff_t lo_offset; + loff_t lo_sizelimit; int lo_encrypt_type; int lo_encrypt_key_size; - int lo_flags; int (*transfer)(struct loop_device *, int cmd, char *raw_buf, char *loop_buf, int size, int real_block); + int (*ioctl)(struct loop_device *, int cmd, + unsigned long arg); char lo_name[LO_NAME_SIZE]; char lo_encrypt_key[LO_KEY_SIZE]; __u32 lo_init[2]; uid_t lo_key_owner; /* Who set the key */ - int (*ioctl)(struct loop_device *, int cmd, - unsigned long arg); + kdev_t lo_device; + int lo_flags; struct file * lo_backing_file; - void *key_data; + void *key_data; char key_reserved[48]; /* for use by the filter modules */ int old_gfp_mask; + int lo_state; + struct buffer_head *lo_bh_que0; + struct buffer_head *lo_bh_que1; + struct buffer_head *lo_bh_que2; + struct buffer_head *lo_bh_free; spinlock_t lo_lock; - struct buffer_head *lo_bh; - struct buffer_head *lo_bhtail; - int lo_state; struct semaphore lo_sem; struct semaphore lo_ctl_mutex; - struct semaphore lo_bh_mutex; atomic_t lo_pending; + int lo_bh_flsh; + int lo_bh_need; + wait_queue_head_t lo_bh_wait; + unsigned long lo_offs_sec; + unsigned long lo_iv_remove; + unsigned char lo_crypt_name[LO_NAME_SIZE]; +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB + void (*lo_keyscrub_fn)(void *); + void *lo_keyscrub_ptr; +#endif }; typedef int (* transfer_proc_t)(struct loop_device *, int cmd, @@ -77,20 +94,19 @@ */ #define LO_FLAGS_DO_BMAP 1 #define LO_FLAGS_READ_ONLY 2 -#define LO_FLAGS_BH_REMAP 4 -/* +/* * Note that this structure gets the wrong offsets when directly used * from a glibc program, because glibc has a 32bit dev_t. - * Prevent people from shooting in their own foot. + * Prevent people from shooting in their own foot. */ #if __GLIBC__ >= 2 && !defined(dev_t) #error "Wrong dev_t in loop.h" -#endif +#endif /* * This uses kdev_t because glibc currently has no appropiate - * conversion version for the loop ioctls. + * conversion version for the loop ioctls. * The situation is very unpleasant */ @@ -109,6 +125,22 @@ char reserved[4]; }; +struct loop_info64 { + __u64 lo_device; /* ioctl r/o */ + __u64 lo_inode; /* ioctl r/o */ + __u64 lo_rdevice; /* ioctl r/o */ + __u64 lo_offset; + __u64 lo_sizelimit;/* bytes, 0 == max available */ + __u32 lo_number; /* ioctl r/o */ + __u32 lo_encrypt_type; + __u32 lo_encrypt_key_size; /* ioctl w/o */ + __u32 lo_flags; /* ioctl r/o */ + __u8 lo_file_name[LO_NAME_SIZE]; + __u8 lo_crypt_name[LO_NAME_SIZE]; + __u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ + __u64 lo_init[2]; +}; + /* * Loop filter types */ @@ -122,25 +154,27 @@ #define LO_CRYPT_IDEA 6 #define LO_CRYPT_DUMMY 9 #define LO_CRYPT_SKIPJACK 10 +#define LO_CRYPT_AES 16 +#define LO_CRYPT_CRYPTOAPI 18 #define MAX_LO_CRYPT 20 #ifdef __KERNEL__ /* Support for loadable transfer modules */ struct loop_func_table { - int number; /* filter type */ + int number; /* filter type */ int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf, char *loop_buf, int size, int real_block); - int (*init)(struct loop_device *, struct loop_info *); + int (*init)(struct loop_device *, struct loop_info *); /* release is called from loop_unregister_transfer or clr_fd */ - int (*release)(struct loop_device *); + int (*release)(struct loop_device *); int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); - /* lock and unlock manage the module use counts */ + /* lock and unlock manage the module use counts */ void (*lock)(struct loop_device *); void (*unlock)(struct loop_device *); -}; +}; -int loop_register_transfer(struct loop_func_table *funcs); -int loop_unregister_transfer(int number); +int loop_register_transfer(struct loop_func_table *funcs); +int loop_unregister_transfer(int number); #endif /* @@ -151,5 +185,10 @@ #define LOOP_CLR_FD 0x4C01 #define LOOP_SET_STATUS 0x4C02 #define LOOP_GET_STATUS 0x4C03 +#define LOOP_SET_STATUS64 0x4C04 +#define LOOP_GET_STATUS64 0x4C05 + +#define LOOP_MULTI_KEY_SETUP 0x4C4D +#define LOOP_MULTI_KEY_SETUP_V3 0x4C4E #endif