/* * fuc microcode for nv98 pcrypt engine * Copyright (C) 2010 Marcin Koƛcielnicki * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ .section #nv98_pcrypt_data ctx_dma: ctx_dma_query: .b32 0 ctx_dma_src: .b32 0 ctx_dma_dst: .b32 0 .equ #dma_count 3 ctx_query_address_high: .b32 0 ctx_query_address_low: .b32 0 ctx_query_counter: .b32 0 ctx_cond_address_high: .b32 0 ctx_cond_address_low: .b32 0 ctx_cond_off: .b32 0 ctx_src_address_high: .b32 0 ctx_src_address_low: .b32 0 ctx_dst_address_high: .b32 0 ctx_dst_address_low: .b32 0 ctx_mode: .b32 0 .align 16 ctx_key: .skip 16 ctx_iv: .skip 16 .align 0x80 swap: .skip 32 .align 8 common_cmd_dtable: .b32 #ctx_query_address_high + 0x20000 ~0xff .b32 #ctx_query_address_low + 0x20000 ~0xfffffff0 .b32 #ctx_query_counter + 0x20000 ~0xffffffff .b32 #cmd_query_get + 0x00000 ~1 .b32 #ctx_cond_address_high + 0x20000 ~0xff .b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0 .b32 #cmd_cond_mode + 0x00000 ~7 .b32 #cmd_wrcache_flush + 0x00000 ~0 .equ #common_cmd_max 0x88 .align 8 engine_cmd_dtable: .b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff .b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff .b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff .b32 #ctx_key + 0xc + 0x20000 ~0xffffffff .b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff .b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff .b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff .b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff .b32 #ctx_src_address_high + 0x20000 ~0xff .b32 #ctx_src_address_low + 0x20000 ~0xfffffff0 .b32 #ctx_dst_address_high + 0x20000 ~0xff .b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0 .b32 #crypt_cmd_mode + 0x00000 ~0xf .b32 #crypt_cmd_length + 0x10000 ~0x0ffffff0 .equ #engine_cmd_max 0xce .align 4 crypt_dtable: .b16 #crypt_copy_prep #crypt_do_inout .b16 #crypt_store_prep #crypt_do_out .b16 #crypt_ecb_e_prep #crypt_do_inout .b16 #crypt_ecb_d_prep #crypt_do_inout .b16 #crypt_cbc_e_prep #crypt_do_inout .b16 #crypt_cbc_d_prep #crypt_do_inout .b16 #crypt_pcbc_e_prep #crypt_do_inout .b16 #crypt_pcbc_d_prep #crypt_do_inout .b16 #crypt_cfb_e_prep #crypt_do_inout .b16 #crypt_cfb_d_prep #crypt_do_inout .b16 #crypt_ofb_prep #crypt_do_inout .b16 #crypt_ctr_prep #crypt_do_inout .b16 #crypt_cbc_mac_prep #crypt_do_in .b16 #crypt_cmac_finish_complete_prep #crypt_do_in .b16 #crypt_cmac_finish_partial_prep #crypt_do_in .align 0x100 .section #nv98_pcrypt_code // $r0 is always set to 0 in our code - this allows some space savings. clear b32 $r0 // set up the interrupt handler mov $r1 #ih mov $iv0 $r1 // init stack pointer mov $sp $r0 // set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host movw $r1 0xfff0 sethi $r1 0 mov $r2 0x400 iowr I[$r2 + 0x300] $r1 // enable the interrupts or $r1 0xc iowr I[$r2] $r1 // enable fifo access and context switching mov $r1 3 mov $r2 0x1200 iowr I[$r2] $r1 // enable i0 delivery bset $flags ie0 // sleep forver, waking only for interrupts. bset $flags $p0 spin: sleep $p0 bra #spin // i0 handler ih: // see which interrupts we got iord $r1 I[$r0 + 0x200] and $r2 $r1 0x8 cmpu b32 $r2 0 bra e #noctx // context switch... prepare the regs for xfer mov $r2 0x7700 mov $xtargets $r2 mov $xdbase $r0 // 128-byte context. mov $r2 0 sethi $r2 0x50000 // read current channel mov $r3 0x1400 iord $r4 I[$r3] // if bit 30 set, it's active, so we have to unload it first. shl b32 $r5 $r4 1 cmps b32 $r5 0 bra nc #ctxload // unload the current channel - save the context xdst $r0 $r2 xdwait // and clear bit 30, then write back bclr $r4 0x1e iowr I[$r3] $r4 // tell PFIFO we unloaded mov $r4 1 iowr I[$r3 + 0x200] $r4 bra #noctx ctxload: // no channel loaded - perhaps we're requested to load one iord $r4 I[$r3 + 0x100] shl b32 $r15 $r4 1 cmps b32 $r15 0 // if bit 30 of next channel not set, probably PFIFO is just // killing a context. do a faux load, without the active bit. bra nc #dummyload // ok, do a real context load. xdld $r0 $r2 xdwait mov $r5 #ctx_dma mov $r6 #dma_count - 1 ctxload_dma_loop: ld b32 $r7 D[$r5 + $r6 * 4] add b32 $r8 $r6 0x180 shl b32 $r8 8 iowr I[$r8] $r7 sub b32 $r6 1 bra nc #ctxload_dma_loop dummyload: // tell PFIFO we're done mov $r5 2 iowr I[$r3 + 0x200] $r5 noctx: and $r2 $r1 0x4 cmpu b32 $r2 0 bra e #nocmd // incoming fifo command. mov $r3 0x1900 iord $r2 I[$r3 + 0x100] iord $r3 I[$r3] // extract the method and $r4 $r2 0x7ff // shift the addr to proper position if we need to interrupt later shl b32 $r2 0x10 // mthd 0 and 0x100 [NAME, NOP]: ignore and $r5 $r4 0x7bf cmpu b32 $r5 0 bra e #cmddone mov $r5 #engine_cmd_dtable - 0xc0 * 8 mov $r6 #engine_cmd_max cmpu b32 $r4 0xc0 bra nc #dtable_cmd mov $r5 #common_cmd_dtable - 0x80 * 8 mov $r6 #common_cmd_max cmpu b32 $r4 0x80 bra nc #dtable_cmd cmpu b32 $r4 0x60 bra nc #dma_cmd cmpu b32 $r4 0x50 bra ne #illegal_mthd // mthd 0x140: PM_TRIGGER mov $r2 0x2200 clear b32 $r3 sethi $r3 0x20000 iowr I[$r2] $r3 bra #cmddone dma_cmd: // mthd 0x180...: DMA_* cmpu b32 $r4 0x60+#dma_count bra nc #illegal_mthd shl b32 $r5 $r4 2 add b32 $r5 (#ctx_dma - 0x60 * 4) & 0xffff bset $r3 0x1e st b32 D[$r5] $r3 add b32 $r4 0x180 - 0x60 shl b32 $r4 8 iowr I[$r4] $r3 bra #cmddone dtable_cmd: cmpu b32 $r4 $r6 bra nc #illegal_mthd shl b32 $r4 3 add b32 $r4 $r5 ld b32 $r5 D[$r4 + 4] and $r5 $r3 cmpu b32 $r5 0 bra ne #invalid_bitfield ld b16 $r5 D[$r4] ld b16 $r6 D[$r4 + 2] cmpu b32 $r6 2 bra e #cmd_setctx ld b32 $r7 D[$r0 + #ctx_cond_off] and $r6 $r7 cmpu b32 $r6 1 bra e #cmddone call $r5 bra $p1 #dispatch_error bra #cmddone cmd_setctx: st b32 D[$r5] $r3 bra #cmddone invalid_bitfield: or $r2 1 dispatch_error: illegal_mthd: mov $r4 0x1000 iowr I[$r4] $r2 iowr I[$r4 + 0x100] $r3 mov $r4 0x40 iowr I[$r0] $r4 im_loop: iord $r4 I[$r0 + 0x200] and $r4 0x40 cmpu b32 $r4 0 bra ne #im_loop cmddone: // remove the command from FIFO mov $r3 0x1d00 mov $r4 1 iowr I[$r3] $r4 nocmd: // ack the processed interrupts and $r1 $r1 0xc iowr I[$r0 + 0x100] $r1 iret cmd_query_get: // if bit 0 of param set, trigger interrupt afterwards. setp $p1 $r3 or $r2 3 // read PTIMER, beware of races... mov $r4 0xb00 ptimer_retry: iord $r6 I[$r4 + 0x100] iord $r5 I[$r4] iord $r7 I[$r4 + 0x100] cmpu b32 $r6 $r7 bra ne #ptimer_retry // prepare the query structure ld b32 $r4 D[$r0 + #ctx_query_counter] st b32 D[$r0 + #swap + 0x0] $r4 st b32 D[$r0 + #swap + 0x4] $r0 st b32 D[$r0 + #swap + 0x8] $r5 st b32 D[$r0 + #swap + 0xc] $r6 // will use target 0, DMA_QUERY. mov $xtargets $r0 ld b32 $r4 D[$r0 + #ctx_query_address_high] shl b32 $r4 0x18 mov $xdbase $r4 ld b32 $r4 D[$r0 + #ctx_query_address_low] mov $r5 #swap sethi $r5 0x20000 xdst $r4 $r5 xdwait ret cmd_cond_mode: // if >= 5, INVALID_ENUM bset $flags $p1 or $r2 2 cmpu b32 $r3 5 bra nc #return // otherwise, no error. bclr $flags $p1 // if < 2, no QUERY object is involved cmpu b32 $r3 2 bra nc #cmd_cond_mode_queryful xor $r3 1 st b32 D[$r0 + #ctx_cond_off] $r3 return: ret cmd_cond_mode_queryful: // ok, will need to pull a QUERY object, prepare offsets ld b32 $r4 D[$r0 + #ctx_cond_address_high] ld b32 $r5 D[$r0 + #ctx_cond_address_low] and $r6 $r5 0xff shr b32 $r5 8 shl b32 $r4 0x18 or $r4 $r5 mov $xdbase $r4 mov $xtargets $r0 // pull the first one mov $r5 #swap sethi $r5 0x20000 xdld $r6 $r5 // if == 2, only a single QUERY is involved... cmpu b32 $r3 2 bra ne #cmd_cond_mode_double xdwait ld b32 $r4 D[$r0 + #swap + 4] cmpu b32 $r4 0 xbit $r4 $flags z st b32 D[$r0 + #ctx_cond_off] $r4 ret // ok, we'll need to pull second one too cmd_cond_mode_double: add b32 $r6 0x10 add b32 $r5 0x10 xdld $r6 $r5 xdwait // compare COUNTERs ld b32 $r5 D[$r0 + #swap + 0x00] ld b32 $r6 D[$r0 + #swap + 0x10] cmpu b32 $r5 $r6 xbit $r4 $flags z // compare RESen ld b32 $r5 D[$r0 + #swap + 0x04] ld b32 $r6 D[$r0 + #swap + 0x14] cmpu b32 $r5 $r6 xbit $r5 $flags z and $r4 $r5 // and negate or not, depending on mode cmpu b32 $r3 3 xbit $r5 $flags z xor $r4 $r5 st b32 D[$r0 + #ctx_cond_off] $r4 ret cmd_wrcache_flush: bclr $flags $p1 mov $r2 0x2200 clear b32 $r3 sethi $r3 0x10000 iowr I[$r2] $r3 ret crypt_cmd_mode: // if >= 0xf, INVALID_ENUM bset $flags $p1 or $r2 2 cmpu b32 $r3 0xf bra nc #crypt_cmd_mode_return bclr $flags $p1 st b32 D[$r0 + #ctx_mode] $r3 crypt_cmd_mode_return: ret crypt_cmd_length: // nop if length == 0 cmpu b32 $r3 0 bra e #crypt_cmd_mode_return // init key, IV cxset 3 mov $r4 #ctx_key sethi $r4 0x70000 xdst $r0 $r4 mov $r4 #ctx_iv sethi $r4 0x60000 xdst $r0 $r4 xdwait ckeyreg $c7 // prepare the targets mov $r4 0x2100 mov $xtargets $r4 // prepare src address ld b32 $r4 D[$r0 + #ctx_src_address_high] ld b32 $r5 D[$r0 + #ctx_src_address_low] shr b32 $r8 $r5 8 shl b32 $r4 0x18 or $r4 $r8 and $r5 $r5 0xff // prepare dst address ld b32 $r6 D[$r0 + #ctx_dst_address_high] ld b32 $r7 D[$r0 + #ctx_dst_address_low] shr b32 $r8 $r7 8 shl b32 $r6 0x18 or $r6 $r8 and $r7 $r7 0xff // find the proper prep & do functions ld b32 $r8 D[$r0 + #ctx_mode] shl b32 $r8 2 // run prep ld b16 $r9 D[$r8 + #crypt_dtable] call $r9 // do it ld b16 $r9 D[$r8 + #crypt_dtable + 2] call $r9 cxset 1 xdwait cxset 0x61 xdwait xdwait // update src address shr b32 $r8 $r4 0x18 shl b32 $r9 $r4 8 add b32 $r9 $r5 adc b32 $r8 0 st b32 D[$r0 + #ctx_src_address_high] $r8 st b32 D[$r0 + #ctx_src_address_low] $r9 // update dst address shr b32 $r8 $r6 0x18 shl b32 $r9 $r6 8 add b32 $r9 $r7 adc b32 $r8 0 st b32 D[$r0 + #ctx_dst_address_high] $r8 st b32 D[$r0 + #ctx_dst_address_low] $r9 // pull updated IV cxset 2 mov $r4 #ctx_iv sethi $r4 0x60000 xdld $r0 $r4 xdwait ret crypt_copy_prep: cs0begin 2 cxsin $c0 cxsout $c0 ret crypt_store_prep: cs0begin 1 cxsout $c6 ret crypt_ecb_e_prep: cs0begin 3 cxsin $c0 cenc $c0 $c0 cxsout $c0 ret crypt_ecb_d_prep: ckexp $c7 $c7 cs0begin 3 cxsin $c0 cdec $c0 $c0 cxsout $c0 ret crypt_cbc_e_prep: cs0begin 4 cxsin $c0 cxor $c6 $c0 cenc $c6 $c6 cxsout $c6 ret crypt_cbc_d_prep: ckexp $c7 $c7 cs0begin 5 cmov $c2 $c6 cxsin $c6 cdec $c0 $c6 cxor $c0 $c2 cxsout $c0 ret crypt_pcbc_e_prep: cs0begin 5 cxsin $c0 cxor $c6 $c0 cenc $c6 $c6 cxsout $c6 cxor $c6 $c0 ret crypt_pcbc_d_prep: ckexp $c7 $c7 cs0begin 5 cxsin $c0 cdec $c1 $c0 cxor $c6 $c1 cxsout $c6 cxor $c6 $c0 ret crypt_cfb_e_prep: cs0begin 4 cenc $c6 $c6 cxsin $c0 cxor $c6 $c0 cxsout $c6 ret crypt_cfb_d_prep: cs0begin 4 cenc $c0 $c6 cxsin $c6 cxor $c0 $c6 cxsout $c0 ret crypt_ofb_prep: cs0begin 4 cenc $c6 $c6 cxsin $c0 cxor $c0 $c6 cxsout $c0 ret crypt_ctr_prep: cs0begin 5 cenc $c1 $c6 cadd $c6 1 cxsin $c0 cxor $c0 $c1 cxsout $c0 ret crypt_cbc_mac_prep: cs0begin 3 cxsin $c0 cxor $c6 $c0 cenc $c6 $c6 ret crypt_cmac_finish_complete_prep: cs0begin 7 cxsin $c0 cxor $c6 $c0 cxor $c0 $c0 cenc $c0 $c0 cprecmac $c0 $c0 cxor $c6 $c0 cenc $c6 $c6 ret crypt_cmac_finish_partial_prep: cs0begin 8 cxsin $c0 cxor $c6 $c0 cxor $c0 $c0 cenc $c0 $c0 cprecmac $c0 $c0 cprecmac $c0 $c0 cxor $c6 $c0 cenc $c6 $c6 ret // TODO crypt_do_in: add b32 $r3 $r5 mov $xdbase $r4 mov $r9 #swap sethi $r9 0x20000 crypt_do_in_loop: xdld $r5 $r9 xdwait cxset 0x22 xdst $r0 $r9 cs0exec 1 xdwait add b32 $r5 0x10 cmpu b32 $r5 $r3 bra ne #crypt_do_in_loop cxset 1 xdwait ret crypt_do_out: add b32 $r3 $r7 mov $xdbase $r6 mov $r9 #swap sethi $r9 0x20000 crypt_do_out_loop: cs0exec 1 cxset 0x61 xdld $r7 $r9 xdst $r7 $r9 cxset 1 xdwait add b32 $r7 0x10 cmpu b32 $r7 $r3 bra ne #crypt_do_out_loop ret crypt_do_inout: add b32 $r3 $r5 mov $r9 #swap sethi $r9 0x20000 crypt_do_inout_loop: mov $xdbase $r4 xdld $r5 $r9 xdwait cxset 0x21 xdst $r0 $r9 cs0exec 1 cxset 0x61 mov $xdbase $r6 xdld $r7 $r9 xdst $r7 $r9 cxset 1 xdwait add b32 $r5 0x10 add b32 $r7 0x10 cmpu b32 $r5 $r3 bra ne #crypt_do_inout_loop ret .align 0x100