Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / legion / src / procs / sunsparc / libniagara / modarith.c
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: modarith.c
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23/*
24 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28#pragma ident "@(#)modarith.c 1.8 07/02/26 SMI"
29
30/*
31 * The module implements the modular arithmetic unit.
32 */
33
34/* exact cut and paste from ss_common.c */
35
36#include <stdio.h>
37#include <stdlib.h>
38#include <unistd.h>
39#include <string.h> /* memcpy/memset */
40#include <strings.h>
41#include <thread.h>
42
43#include "ss_common.h"
44
45#ifdef NIAGARA1
46#include "niagara.h"
47#endif
48#ifdef NIAGARA2
49#include "niagara2.h"
50#endif
51
52#include "modarith.h"
53#include "bignum.h"
54
55static mod_arith_rv_t mod_mpy(mod_arith_t *masp);
56static mod_arith_rv_t mod_reduce(mod_arith_t *masp);
57static mod_arith_rv_t mod_exp(mod_arith_t *masp);
58
59extern BIGNUM One;
60
61#define DBGX(s) do { } while (0)
62
63/*
64 * Make bignums be twice the largest possible size + 1. (Size is in
65 * words determined by the BIG_CHUNK_SIZE.)
66 */
67#define BIG_CHUNK_SIZE 32
68#define BIGSIZE (2 * 2048 / BIG_CHUNK_SIZE + 1)
69#define EXTR(val, hibit, lobit) \
70 (((val) >> (lobit)) & ((1ULL << ((hibit) - (lobit) + 1)) - 1))
71
72/*
73 * Registers
74 * addr RW SZ Function Comment
75 * ----- -- ------ -------------- --------------------------------
76 * 0x80 RW 64-bit ASI_MA_CONTROL_REG strand, busy, int, opcode, len +
77 * 0x88 RW 64-bit ASI_MA_MPA_REG pointer to MA memory region
78 * 0x90 RW 64-bit ASI_MA_ADDR_REG 6 8-bit offsets into MA mem reg'n
79 * 0x98 RW 64-bit ASI_MA_NP_REG N' (for Montgomery Mpy)
80 * 0xa0 R 64-bit ASI_MA_SYNC_REG load blocks until op done
81 */
82
83
84
85static void
86send_interrupt(simcpu_t *sp, int thread, sparcv9_trap_type_t type)
87{
88 sparcv9_cpu_t *v9p = (sparcv9_cpu_t *)(sp->specificp);
89 ss_strand_t *nsp = v9p->impl_specificp;
90 ss_proc_t *npp = (ss_proc_t *)(sp->config_procp->procp);
91 int core = nsp->core;
92 int idx = STRANDID2IDX(npp,
93 ((core & 0x7) << 2) | (thread & 0x3));
94 sparcv9_cpu_t *target_v9p = npp->strand[idx];
95 simcpu_t *target_sp = v9p->simp;
96
97 target_v9p->post_precise_trap(target_sp, type);
98}
99
100/*
101 * Loads from the ASI_MY_SYNC_REG (it appears as a word in the address
102 * space) are tricky. A load blocks until the current MA operation
103 * completes or is aborted. If it completes normally, a zero is
104 * returned. If it is aborted, the target register of the load is
105 * unchanged.
106 */
107
108/*
109 * Some operations need to be continued back in ss_common.c. So if
110 * lstmp->mtxp is set, the mutex is still held and needs to be
111 * released in the calling environment. (It seems like offset should
112 * be tpaddr_t. XXX)
113 */
114mod_arith_rv_t
115modarith_cpu_access(simcpu_t *sp, tvaddr_t offset, maccess_t op,
116 uint64_t *valp)
117{
118 int size;
119 int len; /* count in words */
120 int i;
121 mod_arith_rv_t rv = MOD_ARITH_FATAL;
122 sparcv9_cpu_t *v9p;
123 ss_strand_t *nsp;
124 ss_proc_t *npp;
125 mod_arith_t *masp;
126 domain_t *domainp;
127 int bytes_moved;
128 int core;
129
130 v9p = (sparcv9_cpu_t *)(sp->specificp);
131 nsp = v9p->impl_specificp;
132 npp = (ss_proc_t *)(sp->config_procp->procp);
133 domainp = sp->config_procp->domainp;
134 core = nsp->core;
135 masp = &npp->mod_arith_p[core];
136
137 size = op & MA_Size_Mask;
138 op &= MA_Op_Mask;
139
140 pthread_mutex_lock(&masp->lock);
141
142 switch (offset) { /* control register (ASI_MA_CONTROL_REG) */
143 case 0x80:
144 switch (op) {
145 case MA_Ld:
146 case MA_LdSigned:
147 *valp = masp->strand << 11 |
148 masp->busy << 10 |
149 masp->do_interrupt << 9 |
150 masp->op << 6 |
151 (masp->length - 1);
152 rv = MOD_ARITH_LD_COMPLETE;
153 break;
154 case MA_St:
155 if (EXTR(*valp, 63, 14)) {
156 EXEC_WARNING(("modarith store to control reg: "
157 "reserved bits set"));
158 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
159 goto cleanexit;
160 }
161 /*
162 * We are supposed to do an abort if the
163 * ma_unit is busy, but we are going to just
164 * issue a warning and wait.
165 */
166 if (masp->busy) {
167 EXEC_WARNING(("store to ASI_MA_CONTROL_REG "
168 "while mod_arith unit is busy"));
169 /*
170 * Should use a cv, but that is a big
171 * pain (had to do a cond_broadcast
172 * every place busy is cleared), and
173 * we are treating this as an error
174 * case anyway.
175 */
176 while (masp->busy) {
177 pthread_mutex_unlock(&masp->lock);
178 sleep(1);
179 pthread_mutex_lock(&masp->lock);
180 }
181 }
182 masp->busy = 1;
183 masp->do_interrupt = EXTR(*valp, 9, 9);
184 masp->op = EXTR(*valp, 8, 6);
185 masp->length = EXTR(*valp, 5, 0) + 1;
186 switch (masp->op) {
187 case 0: /* load MA memory */
188 len = masp->length;
189 if (len > MA_MEM_XWORDS) {
190 /*
191 * Niagara PRM 20.3 says MA
192 * loads and stores with
193 * length_field + 1 > 160 will
194 * produce undefined results.
195 */
196 EXEC_WARNING(("modarith load ma_mem: "
197 "length = %d, set to 160",
198 len));
199 len = MA_MEM_XWORDS;
200 }
201 rv = MOD_ARITH_DONE;
202 ASSERT((masp->ADDR[0] + len) <= MA_MEM_XWORDS);
203 bytes_moved = ss_cpu_mem(domainp,
204 npp, v9p,
205 NA_mem_read,
206 masp->ma_data_p, /* physaddr */
207 0, /* already physical */
208 (unsigned char *) &masp->ma_mem[
209 masp->ADDR[0]], /* buffer */
210 8 * len);
211 if (bytes_moved != 8 * len) {
212 IMPL_WARNING(("modarith: ma_load "
213 "moved %d bytes, "
214 "expected %d\n",
215 bytes_moved, 8 * len));
216 }
217 if (masp->do_interrupt) {
218 send_interrupt(sp, masp->strand,
219 (sparcv9_trap_type_t)
220 N1_trap_modular_arithmetic);
221
222 }
223 break;
224 case 1: /* store MA memory */
225 len = masp->length;
226 if (len > MA_MEM_XWORDS) {
227 /*
228 * Niagara PRM 20.3 says MA
229 * loads and stores with
230 * length_field + 1 > 160 will
231 * produce undefined results.
232 */
233 EXEC_WARNING(("modarith store ma_mem: "
234 "length = %d, set to 160",
235 len));
236 len = MA_MEM_XWORDS;
237 }
238 rv = MOD_ARITH_DONE;
239 ASSERT((masp->ADDR[0] + len) <= MA_MEM_XWORDS);
240 bytes_moved = ss_cpu_mem(domainp,
241 npp, v9p,
242 NA_mem_write,
243 masp->ma_data_p, /* phys addr */
244 0, /* already physical */
245 (unsigned char *) &masp->ma_mem[
246 masp->ADDR[0]], /* buffer */
247 8 * len); /* size */
248 if (bytes_moved != 8 * len) {
249 IMPL_WARNING(("modarith: ma_store "
250 "moved %d bytes, expected %d\n",
251 bytes_moved, 8 * len));
252 }
253 if (masp->do_interrupt) {
254 send_interrupt(sp, masp->strand,
255 (sparcv9_trap_type_t)
256 N1_trap_modular_arithmetic);
257 }
258 break;
259 case 2: /* modular multiply */
260 rv = mod_mpy(masp);
261 if (masp->do_interrupt) {
262 send_interrupt(sp, masp->strand,
263 (sparcv9_trap_type_t)
264 N1_trap_modular_arithmetic);
265 }
266 break;
267 case 3:
268 rv = mod_reduce(masp);
269 if (masp->do_interrupt) {
270 send_interrupt(sp,masp->strand,
271 (sparcv9_trap_type_t)
272 N1_trap_modular_arithmetic);
273 }
274 break;
275 case 4:
276 rv = mod_exp(masp);
277 if (masp->do_interrupt) {
278 send_interrupt(sp, masp->strand,
279 (sparcv9_trap_type_t)
280 N1_trap_modular_arithmetic);
281 }
282 break;
283 default:
284 EXEC_WARNING(("modarith store to control reg: "
285 "Illegal opcode %d", masp->op));
286 masp->busy = 0;
287 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
288 }
289 break;
290 default:
291 EXEC_WARNING(("modarith: Illegal memory access type "
292 "%d", op));
293 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
294 }
295 break;
296 case 0x88: /* Address register (ASI_MA_MPA_REG) */
297 switch (op) {
298 case MA_Ld:
299 case MA_LdSigned:
300 *valp = masp->ma_data_p;
301 rv = MOD_ARITH_LD_COMPLETE;
302 break;
303 case MA_St:
304 if (*valp & (0 - (1ULL << 48))) {
305 EXEC_WARNING(("modarith: attempt to set "
306 "reserved bits in ASI_MA_MPA_REG"));
307 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
308 goto cleanexit;
309 }
310 if (*valp & ((1ULL << 39) || 0x7)) {
311 EXEC_WARNING(("modarith: zeroing bits in "
312 "ASI_MA_MPA_REG"));
313 }
314 masp->ma_data_p = *valp & ~((1ULL << 39) || 0x7);
315 rv = MOD_ARITH_DONE;
316 break;
317 default:
318 EXEC_WARNING(("modarith: Illegal memory access type "
319 "%d", op));
320 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
321 }
322 break;
323 case 0x90: /* offsets register (ASI_MA_ADDR_REG) */
324 switch (op) {
325 case MA_Ld:
326 case MA_LdSigned:
327 *valp = 0;
328 for (i = 0; i < MA_N_ADDR; ++i) {
329 *valp |= masp->ADDR[i] << (8 * i);
330 }
331 rv = MOD_ARITH_LD_COMPLETE;
332 break;
333 case MA_St:
334 if (*valp & (0 - (1ULL << (MA_N_ADDR*8)))) {
335 EXEC_WARNING(("modarith offsets: reserved "
336 "bits set"));
337 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
338 break;
339 }
340 for (i = 0; i < MA_N_ADDR; ++i) {
341 masp->ADDR[i] = EXTR(*valp, 8 * i + 7, 8 * i);
342 }
343 rv = MOD_ARITH_DONE;
344 break;
345 default:
346 EXEC_WARNING(("modarith: Illegal memory access type "
347 "%d", op));
348 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
349 }
350 break;
351 case 0x98: /* N' register (ASI_MA_NP_REG)---Montgomery mpy, exp, etc */
352 switch (op) {
353 case MA_Ld:
354 case MA_LdSigned:
355 *valp = masp->n_prime;
356 rv = MOD_ARITH_DONE;
357 break;
358 case MA_St:
359 masp->n_prime = *valp;
360 rv = MOD_ARITH_DONE;
361 break;
362 default:
363 EXEC_WARNING(("modarith: Illegal memory access type "
364 "%d", op));
365 rv = MOD_ARITH_ILLEGAL_INST_TRAP;
366 }
367 break;
368 case 0xa0: /* sync register (ASI_MA_SYNC_REG) */
369 switch (op) {
370 sparcv9_cpu_t *v9p;
371 case MA_Ld:
372 case MA_LdSigned:
373 v9p = (sparcv9_cpu_t *)(sp->specificp);
374 if (nsp->vcore_id == masp->strand) {
375 /*
376 * Normal case. We are supposed to
377 * wait until the operation is done.
378 * But for now we have all operations
379 * complete instantly, so no waiting
380 * is necessary. If the calling
381 * strand does not match the STRAND
382 * field in the control register, we
383 * do not update the register.
384 */
385 *valp = 0;
386 rv = MOD_ARITH_LD_COMPLETE;
387 } else {
388 rv = MOD_ARITH_DONE;
389 }
390
391 break;
392 default:
393 /*
394 * Should cause a data_access_exception trap.
395 * Do that when we learn how.
396 */
397 EXEC_WARNING(("modarith: Illegal access - only "
398 "loads allowed to ASI_MA_SYNC_REG"));
399 rv = MOD_ARITH_DATA_ACCESS_EX_TRAP;
400 }
401 break;
402 default:
403 if (offset & 0x7) {
404 /*
405 * Should take a mem_address_not_aligned trap.
406 * We'll do that when we learn how.
407 */
408 EXEC_WARNING(("modarith: unaligned memory access"));
409 rv = MOD_ARITH_MEM_ALIGN_TRAP;
410 } else {
411 /*
412 * Something else wrong.
413 */
414 EXEC_WARNING(("modarith: access to illegal or "
415 "unimplmented address"));
416 rv = MOD_ARITH_UNIMPLEMENTED;
417 }
418 }
419cleanexit:
420 masp->busy = 0;
421 pthread_mutex_unlock(&masp->lock);
422 return (rv);
423}
424
425/*
426 * print messages for BEG_ERROR_CODE values and return a mod_arith
427 * return code
428 */
429static mod_arith_rv_t
430bigrv_print_conv(int bigcode)
431{
432 switch (bigcode) {
433 case BIG_OK: /* can't happen */
434 IMPL_WARNING(("modarith: bigrv_called with BIG_OK---shouldn't "
435 "happen"));
436 return (MOD_ARITH_DONE);
437 case BIG_NO_MEM:
438 IMPL_WARNING(("modarith: malloc failed"));
439 return (MOD_ARITH_FATAL);
440 case BIG_INVALID_ARGS:
441 EXEC_WARNING(("modarith: bignum package complains of invalid "
442 "args"));
443 return (MOD_ARITH_DONE);
444 case BIG_DIV_BY_0:
445 EXEC_WARNING(("modarith: bignum package complains of zero "
446 "divide"));
447 return (MOD_ARITH_DONE);
448 default:
449 IMPL_WARNING(("modarith: bignum package returned unexpected "
450 "error %d", bigcode));
451 return (MOD_ARITH_DONE);
452 }
453}
454
455
456/*
457 * Initializes *numberp from reg.
458 */
459static BIG_ERR_CODE
460big_init2(BIGNUM *numberp, mod_arith_t *masp, uint_t reg)
461{
462 int rv;
463 int i;
464 uint_t len64 = masp->length; /* in 64 bit words */
465 uint64_t *p = &masp->ma_mem[masp->ADDR[reg]];
466
467 ASSERT(reg < MA_N_ADDR);
468 ASSERT((masp->ADDR[reg] + len64) <= MA_MEM_XWORDS);
469/*
470 * This code depends on the bignum value being an array of 32 bit
471 * words. Verify that this is so.
472 */
473#if BIG_CHUNK_SIZE != 32
474#error
475#endif
476 ASSERT(sizeof (numberp->value[0]) == BIG_CHUNK_SIZE / 8);
477
478 rv = big_init(numberp, BIGSIZE);
479 if (rv) {
480 return (rv);
481 }
482
483 ASSERT(64 * len64 <= 4096);
484
485 for (i = 0; i < len64; ++i) {
486 numberp->value[2 * i] = p[i] & 0xffffffffULL;
487 numberp->value[2 * i + 1] = p[i] >> 32;
488 }
489 numberp->len = len64 * 64 / BIG_CHUNK_SIZE;
490 return (BIG_OK);
491}
492
493/*
494 * Copies the value out to reg and destroys *numberp. It there is
495 * insufficient room, BIG_INVALID_ARGS is returned, and *numberp is
496 * destroyed anyway. This stores the result little-endian by word.
497 * Thus it must not be used for the exponent.
498 */
499static BIG_ERR_CODE
500big_flush(BIGNUM *numberp, mod_arith_t *masp, uint_t reg)
501{
502
503 int i;
504 uint64_t *tgt = &masp->ma_mem[masp->ADDR[reg]];
505 uint_t tgtsize = masp->length; /* in words */
506 uint64_t overflow = 0;
507
508 ASSERT(reg < MA_N_ADDR);
509 ASSERT((masp->ADDR[reg] + tgtsize) <= MA_MEM_XWORDS);
510
511/*
512 * This code depends on the bignum value being an array of 32 bit
513 * words. Verify that this is so.
514 */
515#if BIG_CHUNK_SIZE != 32
516#error
517#endif
518 ASSERT(sizeof (numberp->value[0]) == BIG_CHUNK_SIZE / 8);
519
520 memset(tgt, 0, 8 * tgtsize);
521 for (i = 0; i < numberp->len; ++i) {
522 if (i/2 < tgtsize) {
523 tgt[i/2] |= ((uint64_t)(numberp->value[i])) <<
524 (32 * (i & 1));
525 } else {
526 overflow |= !!numberp->value[i];
527 }
528 }
529 big_finish(numberp);
530 numberp->malloced = 0;
531 return (overflow ? BIG_INVALID_ARGS : BIG_OK);
532}
533
534
535/*
536 * Thre is a gigantic hack in all the code below. the N-Prime value
537 * for Niagara is the inverse of the modulus mod 2^64. But the
538 * N-Prime value that is needed by the 32 bit big number package is
539 * the inverse of the modulus mod 2^32. But the latter value is just
540 * the 32 lower bits of the former. So we just pass the former, and
541 * it gets cut down to 32 bits in the parameter passing mechanism.
542 * Can you spell "sleazy hack"? We really need to convert to a 64 bit
543 * bignum library so we test the upper bits. Oh well.
544 */
545
546/*
547 * Montogomery multiplications, ie R = A * B * 2^-modbits mod N, where
548 * modbits is rounded up to a multiple of the wordsize.
549 */
550static mod_arith_rv_t
551mod_mpy(mod_arith_t *masp)
552{
553 BIG_ERR_CODE rv;
554 BIGNUM A; /* multiplier */
555 BIGNUM B; /* multiplicand */
556 BIGNUM N; /* modulus */
557 BIGNUM X; /* result */
558
559 A.malloced = 0;
560 B.malloced = 0;
561 N.malloced = 0;
562 X.malloced = 0;
563
564 /* masp->lock must always be held when getting to cleanexit */
565
566 rv = big_init2(&A, masp, 0);
567 if (rv) {
568 goto cleanexit;
569 }
570 rv = big_init2(&B, masp, 1);
571 if (rv) {
572 goto cleanexit;
573 }
574 rv = big_init2(&N, masp, 2);
575 if (rv) {
576 goto cleanexit;
577 }
578 rv = big_init(&X, BIGSIZE);
579 if (rv) {
580 goto cleanexit;
581 }
582
583 /* release lock */
584 pthread_mutex_unlock(&masp->lock);
585
586 rv = big_mont_mul(&X, &A, &B, &N, masp->n_prime);
587 if (rv) {
588 pthread_mutex_lock(&masp->lock);
589 goto cleanexit;
590 }
591
592 /* Do calls to free with lock released */
593 big_finish(&A);
594 big_finish(&B);
595 big_finish(&N);
596
597 pthread_mutex_lock(&masp->lock);
598
599 big_flush(&X, masp, 4);
600
601
602cleanexit:
603
604 big_finish(&A); /* idempotent and fast if nothing to do */
605 big_finish(&B);
606 big_finish(&N);
607 big_finish(&X);
608
609 ASSERT((masp->ADDR[4] + (masp->length + sizeof (uint64_t) - 1) / sizeof (uint64_t)) <= MA_MEM_XWORDS);
610 /* the tmp reg (X) gets destroyed; we just set it to a bogus value */
611 memset(&masp->ma_mem[masp->ADDR[4]], 0x57, masp->length);
612
613 if (rv) {
614 return (bigrv_print_conv(rv));
615 } else {
616 return (MOD_ARITH_DONE);
617 }
618}
619
620
621/*
622 * mod_reduce is just a Montgomery multiply by 1, i.e. R = A * 2^
623 * -modbits mod N, where modbits is rounded up to a multpiple of the
624 * wordsize.
625 */
626static mod_arith_rv_t
627mod_reduce(mod_arith_t *masp)
628{
629 BIG_ERR_CODE rv;
630 BIGNUM A; /* operand */
631 BIGNUM N; /* modulus */
632 BIGNUM R; /* result */
633
634 A.malloced = 0;
635 N.malloced = 0;
636 R.malloced = 0;
637
638 rv = big_init2(&A, masp, 0);
639 if (rv) {
640 goto cleanexit;
641
642 }
643 rv = big_init2(&N, masp, 1);
644 if (rv) {
645 goto cleanexit;
646 }
647
648 pthread_mutex_unlock(&masp->lock);
649
650 if (big_cmp_abs(&A, &N) < 0) {
651 /* A < N; so do R = A */
652 big_copy(&R, &A);
653 } else {
654 /* A >= N, so do R = A - N */
655 big_sub_pos(&R, &A, &N);
656 }
657
658 big_finish(&A);
659 big_finish(&N);
660
661 pthread_mutex_lock(&masp->lock);
662
663 rv = big_flush(&R, masp, 2);
664
665cleanexit:
666 big_finish(&A); /* only do work in error cases */
667 big_finish(&N);
668 big_finish(&R);
669
670 /* masp->lock must be held when we get here */
671 if (rv) {
672 return (bigrv_print_conv(rv));
673 } else {
674 return (MOD_ARITH_DONE);
675 }
676}
677
678static int
679exponentbit(uint64_t *exponent, int exponentsize, int bit)
680{
681 int wordfromleft = bit / 64;
682 int bitpos = 63 - bit % 64; /* lsb is bitpos 0 */
683 return ((exponent[wordfromleft] >> bitpos) & 1);
684}
685
686
687static mod_arith_rv_t
688mod_exp(mod_arith_t *masp)
689{
690 BIG_ERR_CODE rv;
691 int i;
692 int explen = 8 * (masp->ADDR[5] + 1);
693 uint64_t *exponentp = &masp->ma_mem[masp->ADDR[4]];
694 int masplocked = 1;
695 BIGNUM A; /* base */
696 BIGNUM N; /* modulus */
697 BIGNUM X; /* result */
698
699 ASSERT((masp->ADDR[4] + (explen / 8)) <= MA_MEM_XWORDS);
700
701 A.malloced = 0; /* make safe to call big_finish */
702 N.malloced = 0;
703 X.malloced = 0;
704
705 rv = big_init2(&A, masp, 0);
706 if (rv) {
707 goto cleanexit;
708 }
709
710 rv = big_init2(&N, masp, 2);
711 if (rv) {
712 goto cleanexit;
713 }
714
715 rv = big_init2(&X, masp, 3);
716 if (rv) {
717 goto cleanexit;
718 }
719
720 pthread_mutex_unlock(&masp->lock);
721 masplocked = 0;
722
723 for (i = 0; i < explen; ++i) {
724 rv = big_mont_mul(&X, &X, &X, &N, masp->n_prime);
725 if (rv) {
726 goto cleanexit;
727 }
728 if (exponentbit(exponentp, explen, i)) {
729 rv = big_mont_mul(&X, &X, &A, &N, masp->n_prime);
730 if (rv) {
731 goto cleanexit;
732 }
733 }
734 }
735
736 big_finish(&A);
737 big_finish(&N);
738
739 pthread_mutex_lock(&masp->lock);
740 masplocked = 1;
741
742 ASSERT((masp->ADDR[1] + masp->length) <= MA_MEM_XWORDS);
743 /*
744 * The tmp reg (M) gets destroyed; we just set it to an
745 * intentional bogus value, every byte 0x87.
746 */
747 memset(&masp->ma_mem[masp->ADDR[1]], 0x87, 8 * masp->length);
748
749 rv = big_flush(&X, masp, 3); /* copies out and finishes X */
750
751cleanexit:
752
753 big_finish(&A); /* these only do work in error branches */
754 big_finish(&N);
755 big_finish(&X);
756
757 if (!masplocked) {
758 pthread_mutex_lock(&masp->lock);
759 }
760 if (rv) {
761 return (bigrv_print_conv(rv));
762 } else {
763 return (MOD_ARITH_DONE);
764 }
765}