Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: modarith.c | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | /* | |
24 | * Copyright 2007 Sun Microsystems, Inc. All rights reserved. | |
25 | * Use is subject to license terms. | |
26 | */ | |
27 | ||
28 | #pragma ident "@(#)modarith.c 1.8 07/02/26 SMI" | |
29 | ||
30 | /* | |
31 | * The module implements the modular arithmetic unit. | |
32 | */ | |
33 | ||
34 | /* exact cut and paste from ss_common.c */ | |
35 | ||
36 | #include <stdio.h> | |
37 | #include <stdlib.h> | |
38 | #include <unistd.h> | |
39 | #include <string.h> /* memcpy/memset */ | |
40 | #include <strings.h> | |
41 | #include <thread.h> | |
42 | ||
43 | #include "ss_common.h" | |
44 | ||
45 | #ifdef NIAGARA1 | |
46 | #include "niagara.h" | |
47 | #endif | |
48 | #ifdef NIAGARA2 | |
49 | #include "niagara2.h" | |
50 | #endif | |
51 | ||
52 | #include "modarith.h" | |
53 | #include "bignum.h" | |
54 | ||
55 | static mod_arith_rv_t mod_mpy(mod_arith_t *masp); | |
56 | static mod_arith_rv_t mod_reduce(mod_arith_t *masp); | |
57 | static mod_arith_rv_t mod_exp(mod_arith_t *masp); | |
58 | ||
59 | extern BIGNUM One; | |
60 | ||
61 | #define DBGX(s) do { } while (0) | |
62 | ||
63 | /* | |
64 | * Make bignums be twice the largest possible size + 1. (Size is in | |
65 | * words determined by the BIG_CHUNK_SIZE.) | |
66 | */ | |
67 | #define BIG_CHUNK_SIZE 32 | |
68 | #define BIGSIZE (2 * 2048 / BIG_CHUNK_SIZE + 1) | |
69 | #define EXTR(val, hibit, lobit) \ | |
70 | (((val) >> (lobit)) & ((1ULL << ((hibit) - (lobit) + 1)) - 1)) | |
71 | ||
72 | /* | |
73 | * Registers | |
74 | * addr RW SZ Function Comment | |
75 | * ----- -- ------ -------------- -------------------------------- | |
76 | * 0x80 RW 64-bit ASI_MA_CONTROL_REG strand, busy, int, opcode, len + | |
77 | * 0x88 RW 64-bit ASI_MA_MPA_REG pointer to MA memory region | |
78 | * 0x90 RW 64-bit ASI_MA_ADDR_REG 6 8-bit offsets into MA mem reg'n | |
79 | * 0x98 RW 64-bit ASI_MA_NP_REG N' (for Montgomery Mpy) | |
80 | * 0xa0 R 64-bit ASI_MA_SYNC_REG load blocks until op done | |
81 | */ | |
82 | ||
83 | ||
84 | ||
85 | static void | |
86 | send_interrupt(simcpu_t *sp, int thread, sparcv9_trap_type_t type) | |
87 | { | |
88 | sparcv9_cpu_t *v9p = (sparcv9_cpu_t *)(sp->specificp); | |
89 | ss_strand_t *nsp = v9p->impl_specificp; | |
90 | ss_proc_t *npp = (ss_proc_t *)(sp->config_procp->procp); | |
91 | int core = nsp->core; | |
92 | int idx = STRANDID2IDX(npp, | |
93 | ((core & 0x7) << 2) | (thread & 0x3)); | |
94 | sparcv9_cpu_t *target_v9p = npp->strand[idx]; | |
95 | simcpu_t *target_sp = v9p->simp; | |
96 | ||
97 | target_v9p->post_precise_trap(target_sp, type); | |
98 | } | |
99 | ||
100 | /* | |
101 | * Loads from the ASI_MY_SYNC_REG (it appears as a word in the address | |
102 | * space) are tricky. A load blocks until the current MA operation | |
103 | * completes or is aborted. If it completes normally, a zero is | |
104 | * returned. If it is aborted, the target register of the load is | |
105 | * unchanged. | |
106 | */ | |
107 | ||
108 | /* | |
109 | * Some operations need to be continued back in ss_common.c. So if | |
110 | * lstmp->mtxp is set, the mutex is still held and needs to be | |
111 | * released in the calling environment. (It seems like offset should | |
112 | * be tpaddr_t. XXX) | |
113 | */ | |
114 | mod_arith_rv_t | |
115 | modarith_cpu_access(simcpu_t *sp, tvaddr_t offset, maccess_t op, | |
116 | uint64_t *valp) | |
117 | { | |
118 | int size; | |
119 | int len; /* count in words */ | |
120 | int i; | |
121 | mod_arith_rv_t rv = MOD_ARITH_FATAL; | |
122 | sparcv9_cpu_t *v9p; | |
123 | ss_strand_t *nsp; | |
124 | ss_proc_t *npp; | |
125 | mod_arith_t *masp; | |
126 | domain_t *domainp; | |
127 | int bytes_moved; | |
128 | int core; | |
129 | ||
130 | v9p = (sparcv9_cpu_t *)(sp->specificp); | |
131 | nsp = v9p->impl_specificp; | |
132 | npp = (ss_proc_t *)(sp->config_procp->procp); | |
133 | domainp = sp->config_procp->domainp; | |
134 | core = nsp->core; | |
135 | masp = &npp->mod_arith_p[core]; | |
136 | ||
137 | size = op & MA_Size_Mask; | |
138 | op &= MA_Op_Mask; | |
139 | ||
140 | pthread_mutex_lock(&masp->lock); | |
141 | ||
142 | switch (offset) { /* control register (ASI_MA_CONTROL_REG) */ | |
143 | case 0x80: | |
144 | switch (op) { | |
145 | case MA_Ld: | |
146 | case MA_LdSigned: | |
147 | *valp = masp->strand << 11 | | |
148 | masp->busy << 10 | | |
149 | masp->do_interrupt << 9 | | |
150 | masp->op << 6 | | |
151 | (masp->length - 1); | |
152 | rv = MOD_ARITH_LD_COMPLETE; | |
153 | break; | |
154 | case MA_St: | |
155 | if (EXTR(*valp, 63, 14)) { | |
156 | EXEC_WARNING(("modarith store to control reg: " | |
157 | "reserved bits set")); | |
158 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
159 | goto cleanexit; | |
160 | } | |
161 | /* | |
162 | * We are supposed to do an abort if the | |
163 | * ma_unit is busy, but we are going to just | |
164 | * issue a warning and wait. | |
165 | */ | |
166 | if (masp->busy) { | |
167 | EXEC_WARNING(("store to ASI_MA_CONTROL_REG " | |
168 | "while mod_arith unit is busy")); | |
169 | /* | |
170 | * Should use a cv, but that is a big | |
171 | * pain (had to do a cond_broadcast | |
172 | * every place busy is cleared), and | |
173 | * we are treating this as an error | |
174 | * case anyway. | |
175 | */ | |
176 | while (masp->busy) { | |
177 | pthread_mutex_unlock(&masp->lock); | |
178 | sleep(1); | |
179 | pthread_mutex_lock(&masp->lock); | |
180 | } | |
181 | } | |
182 | masp->busy = 1; | |
183 | masp->do_interrupt = EXTR(*valp, 9, 9); | |
184 | masp->op = EXTR(*valp, 8, 6); | |
185 | masp->length = EXTR(*valp, 5, 0) + 1; | |
186 | switch (masp->op) { | |
187 | case 0: /* load MA memory */ | |
188 | len = masp->length; | |
189 | if (len > MA_MEM_XWORDS) { | |
190 | /* | |
191 | * Niagara PRM 20.3 says MA | |
192 | * loads and stores with | |
193 | * length_field + 1 > 160 will | |
194 | * produce undefined results. | |
195 | */ | |
196 | EXEC_WARNING(("modarith load ma_mem: " | |
197 | "length = %d, set to 160", | |
198 | len)); | |
199 | len = MA_MEM_XWORDS; | |
200 | } | |
201 | rv = MOD_ARITH_DONE; | |
202 | ASSERT((masp->ADDR[0] + len) <= MA_MEM_XWORDS); | |
203 | bytes_moved = ss_cpu_mem(domainp, | |
204 | npp, v9p, | |
205 | NA_mem_read, | |
206 | masp->ma_data_p, /* physaddr */ | |
207 | 0, /* already physical */ | |
208 | (unsigned char *) &masp->ma_mem[ | |
209 | masp->ADDR[0]], /* buffer */ | |
210 | 8 * len); | |
211 | if (bytes_moved != 8 * len) { | |
212 | IMPL_WARNING(("modarith: ma_load " | |
213 | "moved %d bytes, " | |
214 | "expected %d\n", | |
215 | bytes_moved, 8 * len)); | |
216 | } | |
217 | if (masp->do_interrupt) { | |
218 | send_interrupt(sp, masp->strand, | |
219 | (sparcv9_trap_type_t) | |
220 | N1_trap_modular_arithmetic); | |
221 | ||
222 | } | |
223 | break; | |
224 | case 1: /* store MA memory */ | |
225 | len = masp->length; | |
226 | if (len > MA_MEM_XWORDS) { | |
227 | /* | |
228 | * Niagara PRM 20.3 says MA | |
229 | * loads and stores with | |
230 | * length_field + 1 > 160 will | |
231 | * produce undefined results. | |
232 | */ | |
233 | EXEC_WARNING(("modarith store ma_mem: " | |
234 | "length = %d, set to 160", | |
235 | len)); | |
236 | len = MA_MEM_XWORDS; | |
237 | } | |
238 | rv = MOD_ARITH_DONE; | |
239 | ASSERT((masp->ADDR[0] + len) <= MA_MEM_XWORDS); | |
240 | bytes_moved = ss_cpu_mem(domainp, | |
241 | npp, v9p, | |
242 | NA_mem_write, | |
243 | masp->ma_data_p, /* phys addr */ | |
244 | 0, /* already physical */ | |
245 | (unsigned char *) &masp->ma_mem[ | |
246 | masp->ADDR[0]], /* buffer */ | |
247 | 8 * len); /* size */ | |
248 | if (bytes_moved != 8 * len) { | |
249 | IMPL_WARNING(("modarith: ma_store " | |
250 | "moved %d bytes, expected %d\n", | |
251 | bytes_moved, 8 * len)); | |
252 | } | |
253 | if (masp->do_interrupt) { | |
254 | send_interrupt(sp, masp->strand, | |
255 | (sparcv9_trap_type_t) | |
256 | N1_trap_modular_arithmetic); | |
257 | } | |
258 | break; | |
259 | case 2: /* modular multiply */ | |
260 | rv = mod_mpy(masp); | |
261 | if (masp->do_interrupt) { | |
262 | send_interrupt(sp, masp->strand, | |
263 | (sparcv9_trap_type_t) | |
264 | N1_trap_modular_arithmetic); | |
265 | } | |
266 | break; | |
267 | case 3: | |
268 | rv = mod_reduce(masp); | |
269 | if (masp->do_interrupt) { | |
270 | send_interrupt(sp,masp->strand, | |
271 | (sparcv9_trap_type_t) | |
272 | N1_trap_modular_arithmetic); | |
273 | } | |
274 | break; | |
275 | case 4: | |
276 | rv = mod_exp(masp); | |
277 | if (masp->do_interrupt) { | |
278 | send_interrupt(sp, masp->strand, | |
279 | (sparcv9_trap_type_t) | |
280 | N1_trap_modular_arithmetic); | |
281 | } | |
282 | break; | |
283 | default: | |
284 | EXEC_WARNING(("modarith store to control reg: " | |
285 | "Illegal opcode %d", masp->op)); | |
286 | masp->busy = 0; | |
287 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
288 | } | |
289 | break; | |
290 | default: | |
291 | EXEC_WARNING(("modarith: Illegal memory access type " | |
292 | "%d", op)); | |
293 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
294 | } | |
295 | break; | |
296 | case 0x88: /* Address register (ASI_MA_MPA_REG) */ | |
297 | switch (op) { | |
298 | case MA_Ld: | |
299 | case MA_LdSigned: | |
300 | *valp = masp->ma_data_p; | |
301 | rv = MOD_ARITH_LD_COMPLETE; | |
302 | break; | |
303 | case MA_St: | |
304 | if (*valp & (0 - (1ULL << 48))) { | |
305 | EXEC_WARNING(("modarith: attempt to set " | |
306 | "reserved bits in ASI_MA_MPA_REG")); | |
307 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
308 | goto cleanexit; | |
309 | } | |
310 | if (*valp & ((1ULL << 39) || 0x7)) { | |
311 | EXEC_WARNING(("modarith: zeroing bits in " | |
312 | "ASI_MA_MPA_REG")); | |
313 | } | |
314 | masp->ma_data_p = *valp & ~((1ULL << 39) || 0x7); | |
315 | rv = MOD_ARITH_DONE; | |
316 | break; | |
317 | default: | |
318 | EXEC_WARNING(("modarith: Illegal memory access type " | |
319 | "%d", op)); | |
320 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
321 | } | |
322 | break; | |
323 | case 0x90: /* offsets register (ASI_MA_ADDR_REG) */ | |
324 | switch (op) { | |
325 | case MA_Ld: | |
326 | case MA_LdSigned: | |
327 | *valp = 0; | |
328 | for (i = 0; i < MA_N_ADDR; ++i) { | |
329 | *valp |= masp->ADDR[i] << (8 * i); | |
330 | } | |
331 | rv = MOD_ARITH_LD_COMPLETE; | |
332 | break; | |
333 | case MA_St: | |
334 | if (*valp & (0 - (1ULL << (MA_N_ADDR*8)))) { | |
335 | EXEC_WARNING(("modarith offsets: reserved " | |
336 | "bits set")); | |
337 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
338 | break; | |
339 | } | |
340 | for (i = 0; i < MA_N_ADDR; ++i) { | |
341 | masp->ADDR[i] = EXTR(*valp, 8 * i + 7, 8 * i); | |
342 | } | |
343 | rv = MOD_ARITH_DONE; | |
344 | break; | |
345 | default: | |
346 | EXEC_WARNING(("modarith: Illegal memory access type " | |
347 | "%d", op)); | |
348 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
349 | } | |
350 | break; | |
351 | case 0x98: /* N' register (ASI_MA_NP_REG)---Montgomery mpy, exp, etc */ | |
352 | switch (op) { | |
353 | case MA_Ld: | |
354 | case MA_LdSigned: | |
355 | *valp = masp->n_prime; | |
356 | rv = MOD_ARITH_DONE; | |
357 | break; | |
358 | case MA_St: | |
359 | masp->n_prime = *valp; | |
360 | rv = MOD_ARITH_DONE; | |
361 | break; | |
362 | default: | |
363 | EXEC_WARNING(("modarith: Illegal memory access type " | |
364 | "%d", op)); | |
365 | rv = MOD_ARITH_ILLEGAL_INST_TRAP; | |
366 | } | |
367 | break; | |
368 | case 0xa0: /* sync register (ASI_MA_SYNC_REG) */ | |
369 | switch (op) { | |
370 | sparcv9_cpu_t *v9p; | |
371 | case MA_Ld: | |
372 | case MA_LdSigned: | |
373 | v9p = (sparcv9_cpu_t *)(sp->specificp); | |
374 | if (nsp->vcore_id == masp->strand) { | |
375 | /* | |
376 | * Normal case. We are supposed to | |
377 | * wait until the operation is done. | |
378 | * But for now we have all operations | |
379 | * complete instantly, so no waiting | |
380 | * is necessary. If the calling | |
381 | * strand does not match the STRAND | |
382 | * field in the control register, we | |
383 | * do not update the register. | |
384 | */ | |
385 | *valp = 0; | |
386 | rv = MOD_ARITH_LD_COMPLETE; | |
387 | } else { | |
388 | rv = MOD_ARITH_DONE; | |
389 | } | |
390 | ||
391 | break; | |
392 | default: | |
393 | /* | |
394 | * Should cause a data_access_exception trap. | |
395 | * Do that when we learn how. | |
396 | */ | |
397 | EXEC_WARNING(("modarith: Illegal access - only " | |
398 | "loads allowed to ASI_MA_SYNC_REG")); | |
399 | rv = MOD_ARITH_DATA_ACCESS_EX_TRAP; | |
400 | } | |
401 | break; | |
402 | default: | |
403 | if (offset & 0x7) { | |
404 | /* | |
405 | * Should take a mem_address_not_aligned trap. | |
406 | * We'll do that when we learn how. | |
407 | */ | |
408 | EXEC_WARNING(("modarith: unaligned memory access")); | |
409 | rv = MOD_ARITH_MEM_ALIGN_TRAP; | |
410 | } else { | |
411 | /* | |
412 | * Something else wrong. | |
413 | */ | |
414 | EXEC_WARNING(("modarith: access to illegal or " | |
415 | "unimplmented address")); | |
416 | rv = MOD_ARITH_UNIMPLEMENTED; | |
417 | } | |
418 | } | |
419 | cleanexit: | |
420 | masp->busy = 0; | |
421 | pthread_mutex_unlock(&masp->lock); | |
422 | return (rv); | |
423 | } | |
424 | ||
425 | /* | |
426 | * print messages for BEG_ERROR_CODE values and return a mod_arith | |
427 | * return code | |
428 | */ | |
429 | static mod_arith_rv_t | |
430 | bigrv_print_conv(int bigcode) | |
431 | { | |
432 | switch (bigcode) { | |
433 | case BIG_OK: /* can't happen */ | |
434 | IMPL_WARNING(("modarith: bigrv_called with BIG_OK---shouldn't " | |
435 | "happen")); | |
436 | return (MOD_ARITH_DONE); | |
437 | case BIG_NO_MEM: | |
438 | IMPL_WARNING(("modarith: malloc failed")); | |
439 | return (MOD_ARITH_FATAL); | |
440 | case BIG_INVALID_ARGS: | |
441 | EXEC_WARNING(("modarith: bignum package complains of invalid " | |
442 | "args")); | |
443 | return (MOD_ARITH_DONE); | |
444 | case BIG_DIV_BY_0: | |
445 | EXEC_WARNING(("modarith: bignum package complains of zero " | |
446 | "divide")); | |
447 | return (MOD_ARITH_DONE); | |
448 | default: | |
449 | IMPL_WARNING(("modarith: bignum package returned unexpected " | |
450 | "error %d", bigcode)); | |
451 | return (MOD_ARITH_DONE); | |
452 | } | |
453 | } | |
454 | ||
455 | ||
456 | /* | |
457 | * Initializes *numberp from reg. | |
458 | */ | |
459 | static BIG_ERR_CODE | |
460 | big_init2(BIGNUM *numberp, mod_arith_t *masp, uint_t reg) | |
461 | { | |
462 | int rv; | |
463 | int i; | |
464 | uint_t len64 = masp->length; /* in 64 bit words */ | |
465 | uint64_t *p = &masp->ma_mem[masp->ADDR[reg]]; | |
466 | ||
467 | ASSERT(reg < MA_N_ADDR); | |
468 | ASSERT((masp->ADDR[reg] + len64) <= MA_MEM_XWORDS); | |
469 | /* | |
470 | * This code depends on the bignum value being an array of 32 bit | |
471 | * words. Verify that this is so. | |
472 | */ | |
473 | #if BIG_CHUNK_SIZE != 32 | |
474 | #error | |
475 | #endif | |
476 | ASSERT(sizeof (numberp->value[0]) == BIG_CHUNK_SIZE / 8); | |
477 | ||
478 | rv = big_init(numberp, BIGSIZE); | |
479 | if (rv) { | |
480 | return (rv); | |
481 | } | |
482 | ||
483 | ASSERT(64 * len64 <= 4096); | |
484 | ||
485 | for (i = 0; i < len64; ++i) { | |
486 | numberp->value[2 * i] = p[i] & 0xffffffffULL; | |
487 | numberp->value[2 * i + 1] = p[i] >> 32; | |
488 | } | |
489 | numberp->len = len64 * 64 / BIG_CHUNK_SIZE; | |
490 | return (BIG_OK); | |
491 | } | |
492 | ||
493 | /* | |
494 | * Copies the value out to reg and destroys *numberp. It there is | |
495 | * insufficient room, BIG_INVALID_ARGS is returned, and *numberp is | |
496 | * destroyed anyway. This stores the result little-endian by word. | |
497 | * Thus it must not be used for the exponent. | |
498 | */ | |
499 | static BIG_ERR_CODE | |
500 | big_flush(BIGNUM *numberp, mod_arith_t *masp, uint_t reg) | |
501 | { | |
502 | ||
503 | int i; | |
504 | uint64_t *tgt = &masp->ma_mem[masp->ADDR[reg]]; | |
505 | uint_t tgtsize = masp->length; /* in words */ | |
506 | uint64_t overflow = 0; | |
507 | ||
508 | ASSERT(reg < MA_N_ADDR); | |
509 | ASSERT((masp->ADDR[reg] + tgtsize) <= MA_MEM_XWORDS); | |
510 | ||
511 | /* | |
512 | * This code depends on the bignum value being an array of 32 bit | |
513 | * words. Verify that this is so. | |
514 | */ | |
515 | #if BIG_CHUNK_SIZE != 32 | |
516 | #error | |
517 | #endif | |
518 | ASSERT(sizeof (numberp->value[0]) == BIG_CHUNK_SIZE / 8); | |
519 | ||
520 | memset(tgt, 0, 8 * tgtsize); | |
521 | for (i = 0; i < numberp->len; ++i) { | |
522 | if (i/2 < tgtsize) { | |
523 | tgt[i/2] |= ((uint64_t)(numberp->value[i])) << | |
524 | (32 * (i & 1)); | |
525 | } else { | |
526 | overflow |= !!numberp->value[i]; | |
527 | } | |
528 | } | |
529 | big_finish(numberp); | |
530 | numberp->malloced = 0; | |
531 | return (overflow ? BIG_INVALID_ARGS : BIG_OK); | |
532 | } | |
533 | ||
534 | ||
535 | /* | |
536 | * Thre is a gigantic hack in all the code below. the N-Prime value | |
537 | * for Niagara is the inverse of the modulus mod 2^64. But the | |
538 | * N-Prime value that is needed by the 32 bit big number package is | |
539 | * the inverse of the modulus mod 2^32. But the latter value is just | |
540 | * the 32 lower bits of the former. So we just pass the former, and | |
541 | * it gets cut down to 32 bits in the parameter passing mechanism. | |
542 | * Can you spell "sleazy hack"? We really need to convert to a 64 bit | |
543 | * bignum library so we test the upper bits. Oh well. | |
544 | */ | |
545 | ||
546 | /* | |
547 | * Montogomery multiplications, ie R = A * B * 2^-modbits mod N, where | |
548 | * modbits is rounded up to a multiple of the wordsize. | |
549 | */ | |
550 | static mod_arith_rv_t | |
551 | mod_mpy(mod_arith_t *masp) | |
552 | { | |
553 | BIG_ERR_CODE rv; | |
554 | BIGNUM A; /* multiplier */ | |
555 | BIGNUM B; /* multiplicand */ | |
556 | BIGNUM N; /* modulus */ | |
557 | BIGNUM X; /* result */ | |
558 | ||
559 | A.malloced = 0; | |
560 | B.malloced = 0; | |
561 | N.malloced = 0; | |
562 | X.malloced = 0; | |
563 | ||
564 | /* masp->lock must always be held when getting to cleanexit */ | |
565 | ||
566 | rv = big_init2(&A, masp, 0); | |
567 | if (rv) { | |
568 | goto cleanexit; | |
569 | } | |
570 | rv = big_init2(&B, masp, 1); | |
571 | if (rv) { | |
572 | goto cleanexit; | |
573 | } | |
574 | rv = big_init2(&N, masp, 2); | |
575 | if (rv) { | |
576 | goto cleanexit; | |
577 | } | |
578 | rv = big_init(&X, BIGSIZE); | |
579 | if (rv) { | |
580 | goto cleanexit; | |
581 | } | |
582 | ||
583 | /* release lock */ | |
584 | pthread_mutex_unlock(&masp->lock); | |
585 | ||
586 | rv = big_mont_mul(&X, &A, &B, &N, masp->n_prime); | |
587 | if (rv) { | |
588 | pthread_mutex_lock(&masp->lock); | |
589 | goto cleanexit; | |
590 | } | |
591 | ||
592 | /* Do calls to free with lock released */ | |
593 | big_finish(&A); | |
594 | big_finish(&B); | |
595 | big_finish(&N); | |
596 | ||
597 | pthread_mutex_lock(&masp->lock); | |
598 | ||
599 | big_flush(&X, masp, 4); | |
600 | ||
601 | ||
602 | cleanexit: | |
603 | ||
604 | big_finish(&A); /* idempotent and fast if nothing to do */ | |
605 | big_finish(&B); | |
606 | big_finish(&N); | |
607 | big_finish(&X); | |
608 | ||
609 | ASSERT((masp->ADDR[4] + (masp->length + sizeof (uint64_t) - 1) / sizeof (uint64_t)) <= MA_MEM_XWORDS); | |
610 | /* the tmp reg (X) gets destroyed; we just set it to a bogus value */ | |
611 | memset(&masp->ma_mem[masp->ADDR[4]], 0x57, masp->length); | |
612 | ||
613 | if (rv) { | |
614 | return (bigrv_print_conv(rv)); | |
615 | } else { | |
616 | return (MOD_ARITH_DONE); | |
617 | } | |
618 | } | |
619 | ||
620 | ||
621 | /* | |
622 | * mod_reduce is just a Montgomery multiply by 1, i.e. R = A * 2^ | |
623 | * -modbits mod N, where modbits is rounded up to a multpiple of the | |
624 | * wordsize. | |
625 | */ | |
626 | static mod_arith_rv_t | |
627 | mod_reduce(mod_arith_t *masp) | |
628 | { | |
629 | BIG_ERR_CODE rv; | |
630 | BIGNUM A; /* operand */ | |
631 | BIGNUM N; /* modulus */ | |
632 | BIGNUM R; /* result */ | |
633 | ||
634 | A.malloced = 0; | |
635 | N.malloced = 0; | |
636 | R.malloced = 0; | |
637 | ||
638 | rv = big_init2(&A, masp, 0); | |
639 | if (rv) { | |
640 | goto cleanexit; | |
641 | ||
642 | } | |
643 | rv = big_init2(&N, masp, 1); | |
644 | if (rv) { | |
645 | goto cleanexit; | |
646 | } | |
647 | ||
648 | pthread_mutex_unlock(&masp->lock); | |
649 | ||
650 | if (big_cmp_abs(&A, &N) < 0) { | |
651 | /* A < N; so do R = A */ | |
652 | big_copy(&R, &A); | |
653 | } else { | |
654 | /* A >= N, so do R = A - N */ | |
655 | big_sub_pos(&R, &A, &N); | |
656 | } | |
657 | ||
658 | big_finish(&A); | |
659 | big_finish(&N); | |
660 | ||
661 | pthread_mutex_lock(&masp->lock); | |
662 | ||
663 | rv = big_flush(&R, masp, 2); | |
664 | ||
665 | cleanexit: | |
666 | big_finish(&A); /* only do work in error cases */ | |
667 | big_finish(&N); | |
668 | big_finish(&R); | |
669 | ||
670 | /* masp->lock must be held when we get here */ | |
671 | if (rv) { | |
672 | return (bigrv_print_conv(rv)); | |
673 | } else { | |
674 | return (MOD_ARITH_DONE); | |
675 | } | |
676 | } | |
677 | ||
678 | static int | |
679 | exponentbit(uint64_t *exponent, int exponentsize, int bit) | |
680 | { | |
681 | int wordfromleft = bit / 64; | |
682 | int bitpos = 63 - bit % 64; /* lsb is bitpos 0 */ | |
683 | return ((exponent[wordfromleft] >> bitpos) & 1); | |
684 | } | |
685 | ||
686 | ||
687 | static mod_arith_rv_t | |
688 | mod_exp(mod_arith_t *masp) | |
689 | { | |
690 | BIG_ERR_CODE rv; | |
691 | int i; | |
692 | int explen = 8 * (masp->ADDR[5] + 1); | |
693 | uint64_t *exponentp = &masp->ma_mem[masp->ADDR[4]]; | |
694 | int masplocked = 1; | |
695 | BIGNUM A; /* base */ | |
696 | BIGNUM N; /* modulus */ | |
697 | BIGNUM X; /* result */ | |
698 | ||
699 | ASSERT((masp->ADDR[4] + (explen / 8)) <= MA_MEM_XWORDS); | |
700 | ||
701 | A.malloced = 0; /* make safe to call big_finish */ | |
702 | N.malloced = 0; | |
703 | X.malloced = 0; | |
704 | ||
705 | rv = big_init2(&A, masp, 0); | |
706 | if (rv) { | |
707 | goto cleanexit; | |
708 | } | |
709 | ||
710 | rv = big_init2(&N, masp, 2); | |
711 | if (rv) { | |
712 | goto cleanexit; | |
713 | } | |
714 | ||
715 | rv = big_init2(&X, masp, 3); | |
716 | if (rv) { | |
717 | goto cleanexit; | |
718 | } | |
719 | ||
720 | pthread_mutex_unlock(&masp->lock); | |
721 | masplocked = 0; | |
722 | ||
723 | for (i = 0; i < explen; ++i) { | |
724 | rv = big_mont_mul(&X, &X, &X, &N, masp->n_prime); | |
725 | if (rv) { | |
726 | goto cleanexit; | |
727 | } | |
728 | if (exponentbit(exponentp, explen, i)) { | |
729 | rv = big_mont_mul(&X, &X, &A, &N, masp->n_prime); | |
730 | if (rv) { | |
731 | goto cleanexit; | |
732 | } | |
733 | } | |
734 | } | |
735 | ||
736 | big_finish(&A); | |
737 | big_finish(&N); | |
738 | ||
739 | pthread_mutex_lock(&masp->lock); | |
740 | masplocked = 1; | |
741 | ||
742 | ASSERT((masp->ADDR[1] + masp->length) <= MA_MEM_XWORDS); | |
743 | /* | |
744 | * The tmp reg (M) gets destroyed; we just set it to an | |
745 | * intentional bogus value, every byte 0x87. | |
746 | */ | |
747 | memset(&masp->ma_mem[masp->ADDR[1]], 0x87, 8 * masp->length); | |
748 | ||
749 | rv = big_flush(&X, masp, 3); /* copies out and finishes X */ | |
750 | ||
751 | cleanexit: | |
752 | ||
753 | big_finish(&A); /* these only do work in error branches */ | |
754 | big_finish(&N); | |
755 | big_finish(&X); | |
756 | ||
757 | if (!masplocked) { | |
758 | pthread_mutex_lock(&masp->lock); | |
759 | } | |
760 | if (rv) { | |
761 | return (bigrv_print_conv(rv)); | |
762 | } else { | |
763 | return (MOD_ARITH_DONE); | |
764 | } | |
765 | } |