dnl -*- mode: m4; comment-start: "%" -*-
include(`macros.m4')divert(-1)
% $Id: serpent.m4,v 1.4 1999/02/20 13:56:41 geoffk Exp $

include(`serpent-sboxes.m4')

define(v_k, 0x90)		The key

define(v_pkblock, 0xA0)		The half of the prekey we are working on
define(v_keydata, 0xB0)		The key data, after the S-box
define(v_X, 0xC0)		The block to encrypt, MSB-first

define(v_u, 0xD0)		The prekey.
define(v_round,0xF0)		The current round.
define(v_sch_cnt, 0xF1)		Counter for the affine recurrence.
define(v_sch_tmp, 0xF2)		Temporary for the key rotate
define(v_t, 0xF1)		Temporaries for the S-boxes (4 bytes)

define(test_ram, 0xF5)		Some RAM for the test program.

define(NUMROUNDS,32)

define(`sbox_subr',`dnl
do_sbox_`'$1:
	tax
sbox_`'$1`'_loop:
	sbox_$1(`0*4+16,X',`0*4,X',`v_t')
	incx
	txa
	bit	#3
	bne	sbox_`'$1`'_loop
	rts
')

divert`'dnl
key_table:
forloop(`i',0,43,`dnl
	byte	v_u+eval(i&31)
')dnl
key_const:
	xbytes(9e3779b9)

sbox_jumptable:
	jmp	do_sbox_0
	jmp	do_sbox_1
	jmp	do_sbox_2
	jmp	do_sbox_3
	jmp	do_sbox_4
	jmp	do_sbox_5
	jmp	do_sbox_6
sbox_subr(7)
sbox_subr(0)
sbox_subr(1)
sbox_subr(2)
sbox_subr(3)
sbox_subr(4)
sbox_subr(5)
sbox_subr(6)

% Serpent encryption
% Input: The (padded) key in v_k, the block to encrypt in v_X
% Output: The encrypted block in v_X
serpent:
% Copy in the key
	ldx	#15
key_setup_loop:
	lda	v_k,X
	sta	v_u,X
	clr	v_u+16,X
	decx
	bpl	key_setup_loop
	bset0	v_u+16+3

	clr	v_round

round_loop:
	brset2	v_round,no_affine_loop

% First, do the key schedule.
	lda	#v_u
	sta	v_sch_cnt
affine_loop_outer:
	lda	#0
affine_loop:
	tax
	lda	key_const,X
	ldx	v_sch_cnt
	eor	,X
	ldx	key_table+eval(3*4)-v_u,X
	eor	,X
	ldx	key_table+eval(2*4)-v_u,X
	eor	,X
	ldx	key_table+eval(2*4)-v_u,X
	eor	,X
	ldx	v_sch_cnt
	sta	,X
	txa
	add	#1
	sta	v_sch_cnt
	and	#3
	bne	affine_loop

	txa
	sub	#v_u
	lsra
	lsra
	add	v_round
	eor	,X
	sta	,X
	txa
	sub	#3
	tax
	rotbl4(`0,X',1,v_sch_tmp)
forloop(`i',0,2,`dnl
	lsla
	rol	3,X
	rol	2,X
	rol	1,X
	rol	0,X
')dnl
	cpx	#32-4+v_u
	bne	affine_loop_outer

% Do the key schedule S-box.
% First, select the appropriate half of the prekey.
% (the branch for this is way above, at the start of the round function).

	ldx	#15
copy_even_prekey_loop:
	lda	v_u,X
	sta	v_pkblock,X
	decx
	bpl	copy_even_prekey_loop
	bra	done_key_copy

no_affine_loop:
	ldx	#15
copy_odd_prekey_loop:
	lda	v_u+16,X
	sta	v_pkblock,X
	decx
	bpl	copy_odd_prekey_loop

done_key_copy:

% Now call the appropriate s-box.
	lda	#3*4
	sub	v_round
	lsra
	and	#0xE
	sta	v_t
	lsra
	add	v_t
	tax
	lda	#v_pkblock
	jsr	sbox_jumptable,X

% Do the key material XOR
% (in the post-final round, it's slightly different)
	lda	v_round
	cmp	#NUMROUNDS*4
	bne	continuing_round_loop

	ldx	#15
final_key_eor_loop:
	lda	v_keydata,X
	eor	v_X,X
	sta	v_X,X
	decx
	bpl	final_key_eor_loop
	rts

continuing_round_loop:
	ldx	#15
round_key_eor_loop:
	lda	v_keydata,X
	eor	v_X,X
	sta	v_keydata,X
	decx
	bpl	round_key_eor_loop

% Do the s-box
	lda	v_round
	lsra
	and	#0xE
	sta	v_t
	lsra
	add	v_t
	tax
	lda	#v_keydata
	jsr	sbox_jumptable,X

% In the final round, no linear xform is done.
	lda	v_round
	add	#4
	sta	v_round
	cmp	#NUMROUNDS*4
	bne	do_lin_xform
	jmp	round_loop

do_lin_xform:
% Do the linear transformation
	rotl4(v_X+8,3)
	ldx	v_X
	lda	v_X+2
	sta	v_X
	eor	v_X+8
	eor	v_X+12
	sta	v_X+12
	txa
	sta	v_X+2
	eor	v_X+8+2
	eor	v_X+12+2
	sta	v_X+12+2
	ldx	v_X+1
	lda	v_X+3
	sta	v_X+1
	eor	v_X+8+1
	eor	v_X+12+1
	sta	v_X+12+1
	txa
	sta	v_X+3
	and	#0xF8
	eor	v_X+8+3
	eor	v_X+12+3
	sta	v_X+12+3
	rotr4(v_X,3)

	ldx	#3
lin_xform_1_loop:
	lda	v_X,X
	eor	v_X+4,X
	eor	v_X+8,X
	sta	v_X+4,X
	decx
	bpl	lin_xform_1_loop

	rotbl4(v_X+12,1,`,X')
	lsrx
	ror	1+v_X+12
	ror	2+v_X+12
	ror	3+v_X+12
	ror	0+v_X+12

foreach(`i',`0,3,2,1',`dnl
	lda	v_X+4+eval((i+1)&3)
ifelse(i,3,`dnl
	and	#0x80
')dnl
	eor	v_X+8+i
	eor	v_X+12+i
ifelse(i,0,`dnl
	tax
',`dnl
	sta	v_X+8+eval((i+1)&3)
')dnl
')dnl
	stx	v_X+8+1

	rotl4(v_X+4,1)

	ldx	#3
lin_xform_2_loop:
	lda	v_X,X
	eor	v_X+4,X
	eor	v_X+12,X
	sta	v_X,X
	decx
	bpl	lin_xform_2_loop

	rotl4(v_X,5)
	rotr4(v_X+8,2)

% Do the next round
	jmp	round_loop

test_program(test_ram,v_k,16,v_X,v_X,16,`',jsr serpent)

test_data:
xbytes(
00000000000000000000000000000000
  00000000 00000000 00000000 00000000 7ab12036 d093a9e6 76b81896 e9ba6682
00000000000000000000000000000000
  7ab12036 d093a9e6 76b81896 e9ba6682  670928d7 b4a05cca b1bc0a5e 2d521631
00000000000000000000000000000000
  670928d7 b4a05cca b1bc0a5e 2d521631  1d1b0682 c701cf61 beaafae4 be3ad30c
1d1b0682 c701cf61 beaafae4 be3ad30c
  00000000 00000000 00000000 00000000  a20324ed 7e79eef0 e80e7c6b bc95654e
1d1b0682 c701cf61 beaafae4 be3ad30c
  a20324ed 7e79eef0 e80e7c6b bc95654e  fc907c8b 511c8f2d c46ae1ad 49fc2fbf)
test_data_end:
