* ========== Copyright Header Begin ==========================================
* OpenSPARC T2 Processor File: modarith.c
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* ========== Copyright Header End ============================================
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
#pragma ident "@(#)modarith.c 1.8 07/02/26 SMI"
* The module implements the modular arithmetic unit.
/* exact cut and paste from ss_common.c */
#include <string.h> /* memcpy/memset */
static mod_arith_rv_t
mod_mpy(mod_arith_t
*masp
);
static mod_arith_rv_t
mod_reduce(mod_arith_t
*masp
);
static mod_arith_rv_t
mod_exp(mod_arith_t
*masp
);
#define DBGX(s) do { } while (0)
* Make bignums be twice the largest possible size + 1. (Size is in
* words determined by the BIG_CHUNK_SIZE.)
#define BIG_CHUNK_SIZE 32
#define BIGSIZE (2 * 2048 / BIG_CHUNK_SIZE + 1)
#define EXTR(val, hibit, lobit) \
(((val) >> (lobit)) & ((1ULL << ((hibit) - (lobit) + 1)) - 1))
* addr RW SZ Function Comment
* ----- -- ------ -------------- --------------------------------
* 0x80 RW 64-bit ASI_MA_CONTROL_REG strand, busy, int, opcode, len +
* 0x88 RW 64-bit ASI_MA_MPA_REG pointer to MA memory region
* 0x90 RW 64-bit ASI_MA_ADDR_REG 6 8-bit offsets into MA mem reg'n
* 0x98 RW 64-bit ASI_MA_NP_REG N' (for Montgomery Mpy)
* 0xa0 R 64-bit ASI_MA_SYNC_REG load blocks until op done
send_interrupt(simcpu_t
*sp
, int thread
, sparcv9_trap_type_t type
)
sparcv9_cpu_t
*v9p
= (sparcv9_cpu_t
*)(sp
->specificp
);
ss_strand_t
*nsp
= v9p
->impl_specificp
;
ss_proc_t
*npp
= (ss_proc_t
*)(sp
->config_procp
->procp
);
int idx
= STRANDID2IDX(npp
,
((core
& 0x7) << 2) | (thread
& 0x3));
sparcv9_cpu_t
*target_v9p
= npp
->strand
[idx
];
simcpu_t
*target_sp
= v9p
->simp
;
target_v9p
->post_precise_trap(target_sp
, type
);
* Loads from the ASI_MY_SYNC_REG (it appears as a word in the address
* space) are tricky. A load blocks until the current MA operation
* completes or is aborted. If it completes normally, a zero is
* returned. If it is aborted, the target register of the load is
* Some operations need to be continued back in ss_common.c. So if
* lstmp->mtxp is set, the mutex is still held and needs to be
* released in the calling environment. (It seems like offset should
modarith_cpu_access(simcpu_t
*sp
, tvaddr_t offset
, maccess_t op
,
int len
; /* count in words */
mod_arith_rv_t rv
= MOD_ARITH_FATAL
;
v9p
= (sparcv9_cpu_t
*)(sp
->specificp
);
nsp
= v9p
->impl_specificp
;
npp
= (ss_proc_t
*)(sp
->config_procp
->procp
);
domainp
= sp
->config_procp
->domainp
;
masp
= &npp
->mod_arith_p
[core
];
size
= op
& MA_Size_Mask
;
pthread_mutex_lock(&masp
->lock
);
switch (offset
) { /* control register (ASI_MA_CONTROL_REG) */
*valp
= masp
->strand
<< 11 |
masp
->do_interrupt
<< 9 |
rv
= MOD_ARITH_LD_COMPLETE
;
if (EXTR(*valp
, 63, 14)) {
EXEC_WARNING(("modarith store to control reg: "
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
* We are supposed to do an abort if the
* ma_unit is busy, but we are going to just
* issue a warning and wait.
EXEC_WARNING(("store to ASI_MA_CONTROL_REG "
"while mod_arith unit is busy"));
* Should use a cv, but that is a big
* pain (had to do a cond_broadcast
* every place busy is cleared), and
* we are treating this as an error
pthread_mutex_unlock(&masp
->lock
);
pthread_mutex_lock(&masp
->lock
);
masp
->do_interrupt
= EXTR(*valp
, 9, 9);
masp
->op
= EXTR(*valp
, 8, 6);
masp
->length
= EXTR(*valp
, 5, 0) + 1;
case 0: /* load MA memory */
if (len
> MA_MEM_XWORDS
) {
* Niagara PRM 20.3 says MA
* length_field + 1 > 160 will
* produce undefined results.
EXEC_WARNING(("modarith load ma_mem: "
"length = %d, set to 160",
ASSERT((masp
->ADDR
[0] + len
) <= MA_MEM_XWORDS
);
bytes_moved
= ss_cpu_mem(domainp
,
masp
->ma_data_p
, /* physaddr */
0, /* already physical */
(unsigned char *) &masp
->ma_mem
[
masp
->ADDR
[0]], /* buffer */
if (bytes_moved
!= 8 * len
) {
IMPL_WARNING(("modarith: ma_load "
if (masp
->do_interrupt
) {
send_interrupt(sp
, masp
->strand
,
N1_trap_modular_arithmetic
);
case 1: /* store MA memory */
if (len
> MA_MEM_XWORDS
) {
* Niagara PRM 20.3 says MA
* length_field + 1 > 160 will
* produce undefined results.
EXEC_WARNING(("modarith store ma_mem: "
"length = %d, set to 160",
ASSERT((masp
->ADDR
[0] + len
) <= MA_MEM_XWORDS
);
bytes_moved
= ss_cpu_mem(domainp
,
masp
->ma_data_p
, /* phys addr */
0, /* already physical */
(unsigned char *) &masp
->ma_mem
[
masp
->ADDR
[0]], /* buffer */
if (bytes_moved
!= 8 * len
) {
IMPL_WARNING(("modarith: ma_store "
"moved %d bytes, expected %d\n",
if (masp
->do_interrupt
) {
send_interrupt(sp
, masp
->strand
,
N1_trap_modular_arithmetic
);
case 2: /* modular multiply */
if (masp
->do_interrupt
) {
send_interrupt(sp
, masp
->strand
,
N1_trap_modular_arithmetic
);
if (masp
->do_interrupt
) {
send_interrupt(sp
,masp
->strand
,
N1_trap_modular_arithmetic
);
if (masp
->do_interrupt
) {
send_interrupt(sp
, masp
->strand
,
N1_trap_modular_arithmetic
);
EXEC_WARNING(("modarith store to control reg: "
"Illegal opcode %d", masp
->op
));
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
EXEC_WARNING(("modarith: Illegal memory access type "
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
case 0x88: /* Address register (ASI_MA_MPA_REG) */
rv
= MOD_ARITH_LD_COMPLETE
;
if (*valp
& (0 - (1ULL << 48))) {
EXEC_WARNING(("modarith: attempt to set "
"reserved bits in ASI_MA_MPA_REG"));
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
if (*valp
& ((1ULL << 39) || 0x7)) {
EXEC_WARNING(("modarith: zeroing bits in "
masp
->ma_data_p
= *valp
& ~((1ULL << 39) || 0x7);
EXEC_WARNING(("modarith: Illegal memory access type "
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
case 0x90: /* offsets register (ASI_MA_ADDR_REG) */
for (i
= 0; i
< MA_N_ADDR
; ++i
) {
*valp
|= masp
->ADDR
[i
] << (8 * i
);
rv
= MOD_ARITH_LD_COMPLETE
;
if (*valp
& (0 - (1ULL << (MA_N_ADDR
*8)))) {
EXEC_WARNING(("modarith offsets: reserved "
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
for (i
= 0; i
< MA_N_ADDR
; ++i
) {
masp
->ADDR
[i
] = EXTR(*valp
, 8 * i
+ 7, 8 * i
);
EXEC_WARNING(("modarith: Illegal memory access type "
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
case 0x98: /* N' register (ASI_MA_NP_REG)---Montgomery mpy, exp, etc */
EXEC_WARNING(("modarith: Illegal memory access type "
rv
= MOD_ARITH_ILLEGAL_INST_TRAP
;
case 0xa0: /* sync register (ASI_MA_SYNC_REG) */
v9p
= (sparcv9_cpu_t
*)(sp
->specificp
);
if (nsp
->vcore_id
== masp
->strand
) {
* Normal case. We are supposed to
* wait until the operation is done.
* But for now we have all operations
* complete instantly, so no waiting
* is necessary. If the calling
* strand does not match the STRAND
* field in the control register, we
* do not update the register.
rv
= MOD_ARITH_LD_COMPLETE
;
* Should cause a data_access_exception trap.
* Do that when we learn how.
EXEC_WARNING(("modarith: Illegal access - only "
"loads allowed to ASI_MA_SYNC_REG"));
rv
= MOD_ARITH_DATA_ACCESS_EX_TRAP
;
* Should take a mem_address_not_aligned trap.
* We'll do that when we learn how.
EXEC_WARNING(("modarith: unaligned memory access"));
rv
= MOD_ARITH_MEM_ALIGN_TRAP
;
EXEC_WARNING(("modarith: access to illegal or "
"unimplmented address"));
rv
= MOD_ARITH_UNIMPLEMENTED
;
pthread_mutex_unlock(&masp
->lock
);
* print messages for BEG_ERROR_CODE values and return a mod_arith
bigrv_print_conv(int bigcode
)
case BIG_OK
: /* can't happen */
IMPL_WARNING(("modarith: bigrv_called with BIG_OK---shouldn't "
IMPL_WARNING(("modarith: malloc failed"));
return (MOD_ARITH_FATAL
);
EXEC_WARNING(("modarith: bignum package complains of invalid "
EXEC_WARNING(("modarith: bignum package complains of zero "
IMPL_WARNING(("modarith: bignum package returned unexpected "
* Initializes *numberp from reg.
big_init2(BIGNUM
*numberp
, mod_arith_t
*masp
, uint_t reg
)
uint_t len64
= masp
->length
; /* in 64 bit words */
uint64_t *p
= &masp
->ma_mem
[masp
->ADDR
[reg
]];
ASSERT((masp
->ADDR
[reg
] + len64
) <= MA_MEM_XWORDS
);
* This code depends on the bignum value being an array of 32 bit
* words. Verify that this is so.
ASSERT(sizeof (numberp
->value
[0]) == BIG_CHUNK_SIZE
/ 8);
rv
= big_init(numberp
, BIGSIZE
);
ASSERT(64 * len64
<= 4096);
for (i
= 0; i
< len64
; ++i
) {
numberp
->value
[2 * i
] = p
[i
] & 0xffffffffULL
;
numberp
->value
[2 * i
+ 1] = p
[i
] >> 32;
numberp
->len
= len64
* 64 / BIG_CHUNK_SIZE
;
* Copies the value out to reg and destroys *numberp. It there is
* insufficient room, BIG_INVALID_ARGS is returned, and *numberp is
* destroyed anyway. This stores the result little-endian by word.
* Thus it must not be used for the exponent.
big_flush(BIGNUM
*numberp
, mod_arith_t
*masp
, uint_t reg
)
uint64_t *tgt
= &masp
->ma_mem
[masp
->ADDR
[reg
]];
uint_t tgtsize
= masp
->length
; /* in words */
ASSERT((masp
->ADDR
[reg
] + tgtsize
) <= MA_MEM_XWORDS
);
* This code depends on the bignum value being an array of 32 bit
* words. Verify that this is so.
ASSERT(sizeof (numberp
->value
[0]) == BIG_CHUNK_SIZE
/ 8);
memset(tgt
, 0, 8 * tgtsize
);
for (i
= 0; i
< numberp
->len
; ++i
) {
tgt
[i
/2] |= ((uint64_t)(numberp
->value
[i
])) <<
overflow
|= !!numberp
->value
[i
];
return (overflow
? BIG_INVALID_ARGS
: BIG_OK
);
* Thre is a gigantic hack in all the code below. the N-Prime value
* for Niagara is the inverse of the modulus mod 2^64. But the
* N-Prime value that is needed by the 32 bit big number package is
* the inverse of the modulus mod 2^32. But the latter value is just
* the 32 lower bits of the former. So we just pass the former, and
* it gets cut down to 32 bits in the parameter passing mechanism.
* Can you spell "sleazy hack"? We really need to convert to a 64 bit
* bignum library so we test the upper bits. Oh well.
* Montogomery multiplications, ie R = A * B * 2^-modbits mod N, where
* modbits is rounded up to a multiple of the wordsize.
mod_mpy(mod_arith_t
*masp
)
BIGNUM A
; /* multiplier */
BIGNUM B
; /* multiplicand */
/* masp->lock must always be held when getting to cleanexit */
rv
= big_init2(&A
, masp
, 0);
rv
= big_init2(&B
, masp
, 1);
rv
= big_init2(&N
, masp
, 2);
rv
= big_init(&X
, BIGSIZE
);
pthread_mutex_unlock(&masp
->lock
);
rv
= big_mont_mul(&X
, &A
, &B
, &N
, masp
->n_prime
);
pthread_mutex_lock(&masp
->lock
);
/* Do calls to free with lock released */
pthread_mutex_lock(&masp
->lock
);
big_finish(&A
); /* idempotent and fast if nothing to do */
ASSERT((masp
->ADDR
[4] + (masp
->length
+ sizeof (uint64_t) - 1) / sizeof (uint64_t)) <= MA_MEM_XWORDS
);
/* the tmp reg (X) gets destroyed; we just set it to a bogus value */
memset(&masp
->ma_mem
[masp
->ADDR
[4]], 0x57, masp
->length
);
return (bigrv_print_conv(rv
));
* mod_reduce is just a Montgomery multiply by 1, i.e. R = A * 2^
* -modbits mod N, where modbits is rounded up to a multpiple of the
mod_reduce(mod_arith_t
*masp
)
rv
= big_init2(&A
, masp
, 0);
rv
= big_init2(&N
, masp
, 1);
pthread_mutex_unlock(&masp
->lock
);
if (big_cmp_abs(&A
, &N
) < 0) {
/* A >= N, so do R = A - N */
pthread_mutex_lock(&masp
->lock
);
rv
= big_flush(&R
, masp
, 2);
big_finish(&A
); /* only do work in error cases */
/* masp->lock must be held when we get here */
return (bigrv_print_conv(rv
));
exponentbit(uint64_t *exponent
, int exponentsize
, int bit
)
int wordfromleft
= bit
/ 64;
int bitpos
= 63 - bit
% 64; /* lsb is bitpos 0 */
return ((exponent
[wordfromleft
] >> bitpos
) & 1);
mod_exp(mod_arith_t
*masp
)
int explen
= 8 * (masp
->ADDR
[5] + 1);
uint64_t *exponentp
= &masp
->ma_mem
[masp
->ADDR
[4]];
ASSERT((masp
->ADDR
[4] + (explen
/ 8)) <= MA_MEM_XWORDS
);
A
.malloced
= 0; /* make safe to call big_finish */
rv
= big_init2(&A
, masp
, 0);
rv
= big_init2(&N
, masp
, 2);
rv
= big_init2(&X
, masp
, 3);
pthread_mutex_unlock(&masp
->lock
);
for (i
= 0; i
< explen
; ++i
) {
rv
= big_mont_mul(&X
, &X
, &X
, &N
, masp
->n_prime
);
if (exponentbit(exponentp
, explen
, i
)) {
rv
= big_mont_mul(&X
, &X
, &A
, &N
, masp
->n_prime
);
pthread_mutex_lock(&masp
->lock
);
ASSERT((masp
->ADDR
[1] + masp
->length
) <= MA_MEM_XWORDS
);
* The tmp reg (M) gets destroyed; we just set it to an
* intentional bogus value, every byte 0x87.
memset(&masp
->ma_mem
[masp
->ADDR
[1]], 0x87, 8 * masp
->length
);
rv
= big_flush(&X
, masp
, 3); /* copies out and finishes X */
big_finish(&A
); /* these only do work in error branches */
pthread_mutex_lock(&masp
->lock
);
return (bigrv_print_conv(rv
));