dnl -*- mode: m4; comment-start: "%" -*-
include(`macros.m4')divert(-1)
% $Id: rc6.m4,v 1.9 1999/02/23 14:01:21 geoffk Exp $

define(v_L, 0xD0)	The key to schedule.
define(v_sch_round,0xE0)The round number
define(v_schtmp, 0xE1)	Temporary for scheduler
define(v_sA, 0xE4)	Variables A, B in scheduler
define(v_sB, 0xE8)	

define(v_S, 0x100)	The key schedule 0x100-0x1AF
define(v_trotate, 0xE2)	Size of rotation
define(v_rotvar, 0xE3)	Variable to rotate

define(v_A, 0xD0)	The block to encrypt, MSB-first
define(v_B, 0xD4)
define(v_C, 0xD8)
define(v_D, 0xDC)
define(v_round, 0xE0)	The round number
define(v_crotate, 0xE1)	The amount by which C will be rotated
define(v_t, 0xE2)	Temporary for encrypt (4 bytes)

define(test_ram, 0xEC)	Data for test program

define(NUMROUNDS,20)

% compute $1 <- $2**2 * 2 + $2
% if x = 2^24 x_3 + 2^16 x_2 + 2^8 x_1 + x_0, then
% 2 x**2 + x =   2^24 (4 x_3 x_0 + x_2 x_1 + x_3) 
%		 + 2^16 (4 x_2 x_0 + 2 x_1 x_1 + x_2)
%		 + 2^8  (4 x_1 x_0 + x_1) 
%		 +      (2 x_0 x_0 + x_0)
define(quad,`dnl
%  do (4 x_3 + x_2>>6) x_0
	lda	$2+0
	ldx	$2+1
	aslx
	rola
	aslx
	rola
	ldx	$2+3
	mul
	sta	$1+0

%  do (4 x_2 + x_1>>7) x_1
	lda	$2+1
	ldx	$2+2
	asla
	aslx
	rola
	ldx	$2+2
	mul
	add	$1+0
	sta	$1+0

%  do (4 x_2 + x_1>>6) x_0
	lda	$2+1
	ldx	$2+2
	aslx
	rola
	aslx
	rola
	ldx	$2+3
	mul
	sta	$1+1
	txa
	adc	$1+0
	sta	$1+0

%  do (2 x_1) x_1
	lda	$2+2
	tax
	lsla
	mul
	add	$1+1
	sta	$1+1
	txa
	adc	$1+0
	sta	$1+0
	
%  do (4 x_1 + x_0>>7) x_0 + x_1
	lda	$2+2
	ldx	$2+3
	asla
	aslx
	rola
	ldx	$2+3
	mul
	add	$2+2
	sta	$1+2
	txa
	adc	$1+1
	sta	$1+1
	lda	#0
	adc	$1+0
	sta	$1+0

%  finally, do (2 x_0 + 1) x_0, and add in the x_3 and x_2 terms
	lda	$2+3
	tax
	sec
	rola
	mul
	sta	$1+3
	txa
	add	$1+2
	sta	$1+2
	lda	$2+1
	adc	$1+1
	sta	$1+1
	lda	$2+0
	adc	$1+0
	sta	$1+0')

divert`'dnl

% The rc6 encryption
% Input: block to encrypt in v_A, key schedule in v_S
% Output: encrypted block in v_A.
rc6:	ldx	#0
round_loop:
	add4(v_D,v_D,`v_S,X')
	add4(v_B,v_B,`v_S+4,X')
	txa
	add	#8
	sta	v_round
	cmp	#eval(NUMROUNDS*8+8)
	bne	round_continues
	add4(v_A,v_A,eval(v_S+8*NUMROUNDS+8))
	add4(v_C,v_C,eval(v_S+8*NUMROUNDS+12))
	rts
round_continues:
	quad(v_t, v_B)
	ldx	#v_A
	jsr	rot5andxor
	sta	v_crotate
	quad(v_t, v_D)
	ldx	#v_C
	jsr	rot5andxor
	ldx	#v_A
	jsr	dorotate
	ldx	#v_C
	lda	v_crotate
	jsr	dorotate
	
	ldx	#3
rotate_loop:
	lda	v_A,X
	sta	v_t
	lda	v_B,X
	sta	v_A,X
	lda	v_C,X
	sta	v_B,X
	lda	v_D,X
	sta	v_C,X
	lda	v_t
	sta	v_D,X
	decx
	bpl	rotate_loop
	ldx	v_round
	jmp	round_loop

%  compute ,X = ,X ^ (v_t <<< 5)
%  and leave the low 5 bits of (v_t <<< 5) in the the accumulator.
rot5andxor:
	rotr4(v_t,2)
	lsra
	lda	v_t+1
	rora
	eor	,X
	sta	,X
	lda	v_t+2
	rora
	eor	1,X
	sta	1,X
	lda	v_t+3
	rora
	eor	2,X
	sta	2,X
	lda	v_t
	rora
	sta	v_t
	eor	3,X
	sta	3,X
	lda	v_t
	rts

%  Rotate the variable at X left by A.
dorotate: variable_rotate

%  key schedule for rc6
% Input: key in v_A
% Output: key schedule in v_S, and v_A trashed.
rc6_schedule:
	seti4(v_S,0xb7e15163)

	ldx	#4
firstschedloop:
	add4i(`v_S,X',`v_S-4,X',0x9e3779b9)
	txa
	add	#4
	tax
	cmp	#eval((NUMROUNDS*2+4)*4)
	bne	firstschedloop

% Start with A=B=0.
	lda	#0
	ldx	#3
ab_clear_loop:
	sta	v_sA,X
	sta	v_sB,X
	decx
	bpl	ab_clear_loop

	lda	#4
	sta	v_sch_round

sch_round_loop:
	lda	#0
	dec	v_sch_round
	bne	sch_S_loop

% switch the first two words in the schedule, which is what `rc6' expects.
	ldx     #3
sch_switch_loop:
	lda     v_S,X
	sta     v_sA
	lda     v_S+4,X
	sta     v_S,X
	lda     v_sA
	sta     v_S+4,X
	decx
	bpl     sch_switch_loop
	rts

sch_next_S_loop:
	lda	v_schtmp
	add	#4
	cmp	#eval((NUMROUNDS*2+4)*4)
	beq	sch_round_loop

sch_S_loop:
	sta	v_schtmp
	tax

	add4(v_sA,v_sA,v_sB)
	add4(v_sA,v_sA,`v_S,X')
	rotl4(v_sA,3)
foreach(`i',`3,2,1,0',`dnl
	lda	v_sA+i
	sta	v_S+i,X
ifelse(i,3,`dnl
	add	v_sB+i
	sta	v_trotate
',`dnl
	adc	v_sB+i
')dnl
	sta	v_sB+i
')dnl
	txa
	and	#0xC
	tax
	add4(v_sB,v_sB,`v_L,X')
	ldx	#v_sB
	lda	v_trotate
	jsr	dorotate
	lda	v_schtmp
	and	#0xC
	tax
	set4(`v_L,X',v_sB)
	jmp	sch_next_S_loop

dnl define(TIME_ROTATE,1)
ifdef(`TIME_ROTATE',`
% Test program for timing the rotate operation
rottime:
	clr	v_round
rottime_loop:
	lda	v_round
	ldx	#v_A
	dbga
	jsr	dorotate
	dbg	v_round
	inc	v_round
	lda	v_round
	cmp	#0x20
	bne	rottime_loop
	stop

	org	m_RESETv
	word	rottime
',`
test_program(test_ram,v_L,16,v_A,v_A,16,jsr rc6_schedule,jsr rc6)

% Note that the implementation here has its endianness reversed.
test_data:
define(t,`dnl
forloop(`i',0,2,`dnl
	forloop(`j',0,15,`byte 0x`'substr($1,eval(i*33+(j^3)*2),2);')
')')dnl
t(00000000000000000000000000000000 00000000000000000000000000000000 dnl
8fc3a53656b1f778c129df4e9848a41e)
t(0123456789abcdef0112233445566778 02132435465768798a9bacbdcedfe0f1 dnl
524e192f4715c6231f51f6367ea43f18)
test_data_end:
')dnl
