dnl -*- mode: m4; comment-start: "%" -*-
dnl Ignore all the whitespace added by the below.
divert(-1)
dnl For the purposes of M4, things after % will be unprocessed
dnl  (use `%' to avoid this)
changecom(`%')

% $Id: macros.m4,v 1.18 1999/04/15 03:01:06 geoffk Exp $

% Various quoting and unquoting functions
%  `quote' differs from just using `' because if define(x,`1,2,3'),
%  `x' is just `x', but quote(x) is ``1',`2',`3''.
%  `quote' differs from `defn', because defn(shift(x)) makes no sense,
%  but quote(shift(x)) is ``2',`3''.
define(quote,``$@'')
%  first(x) is 1; first(shift(x)) is 2.
define(first,`$1')
%  length(x) is 3
define(length,`$#')
% (note that you should not quote arguments to any of these).

% evaluate $4 with $1 set to $2,$2+1,...,$3
define(`forloop',`pushdef(`$1',`$2')_forloop(`$1',`$2',`$3',`$4')popdef(`$1')')
define(`_forloop',dnl
`$4`'ifelse($1, `$3', ,dnl
`define(`$1', incr($1))_forloop(`$1', `$2', `$3', `$4')')')

% evaluate $2 while $1 is not '0'.
define(`whileloop',`ifelse($1,0,,`$2`'whileloop(`$1',`$2')')')

%  Return the $1+1th element of $2
%  (for instance, choosei(2,`x') is 3 with x defined as above).
define(choosei,`pushdef(`x',$eval($1+1))x($2)`'popdef(`x')')

%  Return the index of $1 in $2 (indices start from 0)
define(search,`pushdef(`x',quote($2))_search($1,0)popdef(`x')')
define(_search,`ifelse(`x',`',-1,eval($1==first(x)),1,`$2',dnl
`define(`x',quote(shift(x)))_search($1,eval($2+1))')')

%  Evaluate $3 with $1 set to first($2), first(shift($2)), ...
define(`foreach',`ifelse(`$2',``'',,dnl
`pushdef(`$1',first($2))`'$3`'popdef(`$1')dnl
foreach(`$1',quote(shift($2)),`$3')')')
%  Like foreach, but with commas between results
define(`foreach_c',`ifelse(`$2',``'',,dnl
`pushdef(`$1',first($2))`'$3`'popdef(`$1')dnl
foreach(`$1',quote(shift($2)),`,`'$3')')')

%  Find the maximum value in its arguments
define(`max',`ifelse($#,1,$1,`pushdef(`tmp_max',max(shift($*)))dnl
ifelse(eval($1 > tmp_max),1,$1,tmp_max)')popdef(`tmp_max')')

%  Rotate an immediate value left.
define(`rotli',`eval((($1) << ($2)) 
		     | (($1) >> (32-($2)) & ((1 << ($2))-1)))')

%  Write each parameter in its input as argument to a `byte' pseudo-op.
define(bytes,`ifelse($#,0,,$#,1,byte $1,`byte $1;bytes(shift($*))')')

%  Given a string of hex digits, write it out as `byte' pseudo-ops.
define(xbytes,`dnl
pushdef(`tmp_xbytes',translit(`$1',`
	 '))dnl
forloop(`i',0,eval(len(tmp_xbytes)/2-1),`dnl
	byte	0x`'substr(tmp_xbytes,eval(i*2),2)
')popdef(`tmp_xbytes')')

% Machine-generated labels.
define(labelcount,0)
% Tlbl is the current label
define(Tlbl,`TL_`'labelcount')
% newlabel makes Tlbl different
define(newlabel,`define(`labelcount',incr(labelcount))')

% Arbitrary bitwise permutations.

% These all work on a table $3, and an input $2, and permute $2 so that
% bit choosei(T,$3) of $2 goes to bit T of the result.

% Return byte $1 of the result.
define(permute_byte,`
pushdef(`tmp_permute_byte')dnl
forloop(`shftn',0,7,`newlabel`'dnl
define(`tmp_permute_byte',choosei(eval($1*8+shftn),`$3'))dnl
	brclr`'eval((tmp_permute_byte&7)^7)dnl
	$2+eval(tmp_permute_byte/8),Tlbl
Tlbl:	rola
')popdef(`tmp_permute_byte')')

% Store the permuted result to $1
define(permute_and_store,`dnl
forloop(`vrn',0,eval((length($3)-1)/8),`dnl
permute_byte(vrn,`$2',`$3')dnl
	sta	vrn+$1
')')

% Eor the permuted result with $1
define(permute_and_eor,`dnl
forloop(`vrn',0,eval((length($3)-1)/8),`dnl
permute_byte(vrn,`$2',`$3')dnl
	eor	vrn+$1
	sta	vrn+$1
')')

% The following macros work on 32-bit-wide, MSB-first values.
% Constant.
define(const4,`word	eval($1 >> 16 & 0xFFFF)
	word	eval($1 & 0xFFFF)')
% constant, LSB-first
define(const4r,`forloop(`i',0,3,`dnl
	byte	eval(($1 >> (i*8)) & 0xFF)
')')
% Set $1 <- $2
define(set4,`lda	$2
	sta	$1
	lda	1+$2
	sta	1+$1
	lda	2+$2
	sta	2+$1
	lda	3+$2
	sta	3+$1')
% Set $1 <- $2, where $2 is a constant.
define(seti4,`lda	#eval(($2) >> 24 & 0xFF)
	sta	$1
	lda	#eval(($2) >> 16 & 0xFF)
	sta	1+$1
	lda	#eval(($2) >>  8 & 0xFF)
	sta	2+$1
	lda	#eval(($2) >>  0 & 0xFF)
	sta	3+$1')
% Compute $1 <- $2+$3.
define(add4,`lda	3+$3
	add	3+$2
	sta	3+$1
	lda	2+$3
	adc	2+$2
	sta	2+$1
	lda	1+$3
	adc	1+$2
	sta	1+$1
	lda	0+$3
	adc	0+$2
	sta	0+$1')
% Compute $1 <- $2+$3, where $3 is a constant.
define(add4i,`lda	#eval(($3) >>  0 & 0xFF)
	add	3+$2
	sta	3+$1
	lda	#eval(($3) >>  8 & 0xFF)
	adc	2+$2
	sta	2+$1
	lda	#eval(($3) >> 16 & 0xFF)
	adc	1+$2
	sta	1+$1
	lda	#eval(($3) >> 24 & 0xFF)
	adc	0+$2
	sta	0+$1')
% Compute $1 <- $2+A (assumes A is unsigned)
define(add4Au,`add	3+$2
	sta	3+$1
	lda	#0
	adc	2+$2
	sta	2+$1
	lda	#0
	adc	1+$2
	sta	1+$1
	lda	#0
	adc	0+$2
	sta	0+$1')
% Compute $1 <- $2-$3.
define(sub4,`lda	3+$2
	sub	3+$3
	sta	3+$1
	lda	2+$2
	sbc	2+$3
	sta	2+$1
	lda	1+$2
	sbc	1+$3
	sta	1+$1
	lda	0+$2
	sbc	0+$3
	sta	0+$1')
% Compute $1 <- $2^$3.
define(eor4,`forloop(`i',0,3,`dnl
	lda	i+$3
	eor	i+$2
	sta	i+$1
')')
% Compute $1 <- $2^$3, where $3 is a constant.
define(eor4i,`lda	#eval(($3) >>  0 & 0xFF)
	eor	3+$2
	sta	3+$1
	lda	#eval(($3) >>  8 & 0xFF)
	eor	2+$2
	sta	2+$1
	lda	#eval(($3) >> 16 & 0xFF)
	eor	1+$2
	sta	1+$1
	lda	#eval(($3) >> 24 & 0xFF)
	eor	0+$2
	sta	0+$1')
% Rotate $1 right by $2 steps
define(rotr4,`ifelse($2,0,,`lda	$1 dnl
forloop(`i',1,$2,`
	lsra
	ror	1+$1
	ror	2+$1
	ror	3+$1
	ror	$1')')')
% Rotate $1 left by $2 steps
define(rotl4,`ifelse($2,0,,`lda	$1 dnl
forloop(`i',1,$2,`
	lsla
	rol	3+$1
	rol	2+$1
	rol	1+$1
	rol	$1')')')
% Rotate $2 left by 9 steps, put result in $1 (must have $1 != $2)
define(rot9lc4,`lda	$2
	lsla
foreach(`i',`3,2,1,0',`dnl
	lda	i+$2
	rola
	sta	eval((i+3)&0x3)+$1
')')
% Rotate $2 left by 15 steps, put result in $1 (must have $1 != $2)
define(rot15lc4,`lda	$2+3
	lsra
foreach(`i',`0,1,2,3',`dnl
	lda	i+$2
	rora
	sta	eval((i+2)&0x3)+$1
')')
% Rotate $2 left by $3 bytes, put result in $1 (must have $1 != $2)
define(rotblc4,`forloop(`i',0,3,`dnl
	lda	$2+i
	sta	$2+eval((i+$3)&3)
')')
% Rotate $1 left by $2 bytes, using $3 as a temporary, 
% leave $1[0] in the accumulator.
% As a special case, if $3 is `,X' the index register will
% be used as the temporary, and $1[0] will be in the index register.
define(rotbl4,`dnl
pushdef(`tmp_rotbl4_settmp',ifelse(`$3',`,X',`ldx	$`'1',`lda	$`'1
	sta	$3'))dnl
pushdef(`tmp_rotbl4_gettmp',ifelse(`$3',`,X',`stx	$`'1',`lda	$3
	sta	$`'1'))dnl
ifelse($2,1,`tmp_rotbl4_settmp(`1+$1')
	lda	2+$1
	sta	1+$1
	lda	3+$1
	sta	2+$1
	lda	$1
	sta	3+$1
	tmp_rotbl4_gettmp(`$1')',dnl
$2,2,`tmp_rotbl4_settmp(`1+$1')
	lda	3+$1
	sta	1+$1
	tmp_rotbl4_gettmp(`3+$1')
	tmp_rotbl4_settmp(`2+$1')
	lda	$1
	sta	2+$1
	tmp_rotbl4_gettmp(`$1')',dnl
`tmp_rotbl4_settmp(`3+$1')
	lda	2+$1
	sta	3+$1
	lda	1+$1
	sta	2+$1
	lda	$1
	sta	1+$1
	tmp_rotbl4_gettmp(`$1')')`'dnl
popdef(`tmp_rotbl4_settmp')popdef(`tmp_rotbl4_gettmp')')
% Shift $1 left by $2 steps
define(shl4,`ifelse($2,0,,`dnl
forloop(`i',1,$2,`
	lsl	3+$1
	rol	2+$1
	rol	1+$1
	rol	$1')')')
% Shift $1 right by $2 steps
define(shr4,`ifelse($2,0,,`dnl
forloop(`i',1,$2,`
	lsr	$1
	ror	1+$1
	ror	2+$1
	ror	3+$1')')')
% Compute $1 <- $2*$3.  $1 must be different from $2,$3.
define(mul4,`dnl
`%' compute $1 <- $2 ``x'' * $3 ``y''
% if x = 2^24 x_3 + 2^16 x_2 + 2^8 x_1 + x_0, and y similarly, then
% x*y =	(x_3 y_0 + x_2 y_1 + x_1 y_2 + x_0 y_3) * 2^24
%	+ (x_2 y_0 + x_1 y_1 + x_0 y_2) * 2^16
%	+ (x_1 y_0 + x_0 y_1) * 2^8
%	+ x_0 y_0

% Do x_0 y_0
	lda	3+$3
	ldx	3+$2
	mul
	sta	3+$1
	stx	2+$1
% Do x_1 y_0
	lda	3+$3
	ldx	2+$2
	mul
	add	2+$1
	sta	2+$1
	txa
	adc	#0
	sta	1+$1
% Do x_0 y_1
	lda	2+$3
	ldx	3+$2
	mul
	add	2+$1
	sta	2+$1
	txa
	adc	1+$1
	sta	1+$1
	lda	#0
	adc	#0
	sta	0+$1
% Do x_2 y_0
	lda	3+$3
	ldx	1+$2
	mul
	add	1+$1
	sta	1+$1
	txa
	adc	0+$1
	sta	0+$1
% Do x_1 y_1
	lda	2+$3
	ldx	2+$2
	mul
	add	1+$1
	sta	1+$1
	txa
	adc	0+$1
	sta	0+$1
% Do x_0 y_2
	lda	1+$3
	ldx	3+$2
	mul
	add	1+$1
	sta	1+$1
	txa
	adc	0+$1
	sta	0+$1
% Do x_3 y_0
	lda	3+$3
	ldx	0+$2
	mul
	add	0+$1
	sta	0+$1
% Do x_2 y_1
	lda	2+$3
	ldx	1+$2
	mul
	add	0+$1
	sta	0+$1
% Do x_1 y_2
	lda	1+$3
	ldx	2+$2
	mul
	add	0+$1
	sta	0+$1
% Do x_0 y_3
	lda	0+$3
	ldx	3+$2
	mul
	add	0+$1
	sta	0+$1
')

% Output for debugging
define(dbg4,`forloop(`i',0,3,`dbg i+$1;')')

% A routine to delay.
define(delay_routine,`dnl
% Delay for 18+A clock cycles.
delay_loop:
	sub	#5
	bcc	delay_loop
	tax
	ldx	delays-0x100+5,X
	jmp	delays,X
delays:	bytes(delay0-delays, delay1-delays, delay2-delays, delay3-delays)
	bytes(delay4-delays)
delay3: nop
delay1:	bra	delay_done
delay4: nop
delay2: nop
delay0:	nop
delay_done:
')

% Program for testing.
% Arguments:
% $1 - 5 bytes of RAM (not used by scheduler or encryption code)
% $2 - where to put the key
% $3 - size of key
% $4 - where to put the plaintext
% $5 - where the ciphertext is returned
% $6 - size of plain/ciphertext
% $7 - how to call schedule (should be like `jsr schedule')
% $8 - how to call encrypt (should be like `jsr encrypt')
define(test_program,`dnl
dnl
define(test_counter,`$1+0')dnl
define(test_load,`$1+1')dnl
dnl
% The test program.

% This is a little self-modifying code used to read the test data array.
test_load_code:
	lda	test_data,X
	rts
test_load_code_end:

% This is the test program.

testprogram:
	clr	test_counter

testloop:
	ldx	#test_load_code_end-test_load_code-1
test_load_copy_loop:
	lda	test_load_code,X
	sta	test_load,X
	decx
	bpl	test_load_copy_loop

	lda	test_counter
	ldx	#eval(($3)+2*($6))
	mul
	add	#test_data&0xFF
	sta	test_load+2
	txa
	adc	#test_data>>8
	sta	test_load+1

	cmp	#test_data_end>>8
	bne	test_loop_continues
	lda	test_load+2
	cmp	#test_data_end&0xFF
	bne	test_loop_continues
% now, a quick timing run.
	dbga
	$7
	dbga
	$8
	dbga
	stop

test_loop_continues:
	ldx	#$3-1
test_copy_key_loop:
	jsr	test_load
	sta	$2,X
	decx
	bpl	test_copy_key_loop

	$7

	ldx	#$6+$3-1
test_copy_plain_loop:
	jsr	test_load
	sta	$4-$3,X
	decx
	cpx	#$3
	bpl	test_copy_plain_loop

	$8

	ldx	#$6+$6+$3-1
test_check_loop:
	jsr	test_load
	cmp	eval($5-$6-$3),X
	bne	test_failed
	decx
	cpx	#$6+$3
	bpl	test_check_loop
	bra	test_succeded
test_failed:
	dbga
	dbg	eval($5-$6-$3),X
	dbg	test_counter
test_succeded:
	inc	test_counter
	bra	testloop
end_of_testprogram:

	org	m_RESETv
	word	testprogram
	org	end_of_testprogram
')

% A variable-width rotate subroutine.
define(`variable_rotate',`dnl
%  Rotate the variable at X left by A.
%  This has been written to take a constant time.
	and	#0x1F
	stx	v_rotvar
	sta	v_trotate
	tax
	lda	rotdelay,X
delay_routine
	lda	v_trotate
	ldx	v_rotvar
define(rotate_boundary,12)dnl
	sub	#rotate_boundary
	cmp	#9
	bcs	rotate_16
	bpl	rotate_r8
	add	#7
	bmi	rotate_bits

	sub	#8+7-rotate_boundary
	sta	v_trotate
	lda	1,X
	sta	v_rotvar
	lda	2,X
	sta	1,X
	lda	3,X
	sta	2,X
	lda	,X
	sta	3,X
	bra	rotate_bits_0

rotate_r8:
	cmp	#16
	bpl	rotate_bits_2
	sub	#32-8-rotate_boundary
	sta	v_trotate
	lda	3,X
	sta	v_rotvar
	lda	2,X
	sta	3,X
	lda	1,X
	sta	2,X
	lda	,X
	sta	1,X
	bra	rotate_bits_0
rotate_bits_2:
	sub	#32-rotate_boundary
	sta	v_trotate
rotate_bits:
	lda	,X
	bra	rotate_bits_1

rotate_16:
	sub	#16-rotate_boundary
	sta	v_trotate
	lda	1,X
	sta	v_rotvar
	lda	3,X
	sta	1,X
	lda	v_rotvar
	sta	3,X
	lda	2,X
	sta	v_rotvar
	lda	,X
	sta	2,X
rotate_bits_0:
	lda	v_rotvar
	sta	,X

rotate_bits_1:
	tst	v_trotate
	bmi	rotr
	beq	rot_done

rotl:	lsla
	rol	3,X
	rol	2,X
	rol	1,X
	rol	,X
	dec	v_trotate
	bne	rotl
rot_done:
	rts

rotr:	lsra
	ror	1,X
	ror	2,X
	ror	3,X
	ror	,X
	inc	v_trotate
	bne	rotr
	rts

% This is the # of clock cycles to delay for a given size of rotation.
rotdelay:
	bytes(176,142,108, 74, 40, 33, 67,101,132, 98, 64, 30,  3, 37, 71,105)
	bytes(136,102, 68, 34,  0, 33, 67,101,132, 98, 64, 30, 37, 71,105,139)
dnl  for testing, use
dnl	forloop(`i',0,31,`byte 0;')
')

% Some address-space parameters.
define(m_PORTA,0x00)
define(m_PORTB,0x01)
define(m_PORTC,0x02)
define(m_PORTD,0x03)
define(m_DDRA,0x04)
define(m_DDRB,0x05)
define(m_DDRC,0x06)
define(m_DDRD,0x07)
define(m_TSCR,0x08)
define(m_TCR,0x09)
define(m_ISCR,0x0A)
define(m_PDRA,0x10)
define(m_PDRB,0x11)
define(m_EPROG,0x18)
define(m_VECSTART,0x3FF0)
define(m_COPR,m_VECSTART)
define(m_MOR,eval(m_VECSTART+1))
define(m_TIMERv,eval(m_VECSTART+8))
define(m_IRQv,eval(m_VECSTART+10))
define(m_SWIv,eval(m_VECSTART+12))
define(m_RESETv,eval(m_VECSTART+14))

divert`'dnl
