Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / legion / src / host / sparcv9native.S
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: sparcv9native.S
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23/*
24 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28#pragma ident "@(#)sparcv9native.S 1.25 07/02/20 SMI"
29
30#include <sys/asm_linkage.h>
31#include "assembly.h"
32
33
34 /*
35 * Assembly support functions required by the simulator
36 */
37
38
39 .section ".text"
40
41 ENTRY(sim_atomic_add_32)
42 ALTENTRY(sim_atomic_add_32_nv)
43 ALTENTRY(sim_atomic_add_long)
44 ALTENTRY(sim_atomic_add_long_nv)
45 ld [%o0], %o2
461:
47 add %o2, %o1, %o3
48 cas [%o0], %o2, %o3
49 cmp %o2, %o3
50 bne,a,pn %icc, 1b
51 ld [%o0], %o2
52 retl
53 add %o2, %o1, %o0 ! return new value
54 SET_SIZE(sim_atomic_add_32_nv)
55 SET_SIZE(sim_atomic_add_32)
56 SET_SIZE(sim_atomic_add_long_nv)
57 SET_SIZE(sim_atomic_add_long)
58
59 /*
60 * o0 = pointer to memory location
61 * o1 = value to compare with
62 * o2 = value to swap in if equal
63 * returns:
64 * o0 = original contents of memory location
65 */
66 .global host_ldstub
67host_ldstub:
68 ldstub [%o0], %o2
69 retl
70 mov %o2, %o0
71
72 /*
73 * o0 = pointer to memory location
74 * o1 = value to compare with
75 * o2 = value to swap in if equal
76 * returns:
77 * o0 = original contents of memory location
78 */
79 .global host_cas32
80host_cas32:
81 cas [%o0], %o1, %o2
82 retl
83 mov %o2, %o0
84
85 .global host_cas64
86host_cas64:
87 casx [%o0], %o1, %o2
88 retl
89 mov %o2, %o0
90
91 .global host_swap
92host_swap:
93 swap [%o0], %o1
94 retl
95 mov %o1, %o0
96
97
98 /*
99 * Atomic load of 128 bits big endian into two 64bit registers.
100 * Have to do this kludge because SPARC doesnt provide a 128bit atomic
101 * fetch that executes at user level.
102 * I just hope the 64byte block load is atomic on all architectures.
103 * %o0 points to memory location (128 bit aligned).
104 * %o1 points to high 64bits of result register (big endian)
105 * %o2 points to low 64bits of result register (big endian)
106 */
107
108#define ASI_BLK_P 0xf0 /* VIS 1.0 block load from primary AS */
109
110 .global host_atomic_get128be
111host_atomic_get128be:
112 /* align the memory address for a block load */
113 andn %o0, 0x3f, %o3
114 membar #Sync
115 ldda [%o3]ASI_BLK_P, %f0
116 membar #Sync /* ensure the data is present */
117 and %o0, 0x30, %o4 /* figure out which of the 4 128bit blocks we want */
118_base:
119 rd %pc, %o0
120 add %o4, %o0, %o0
121 jmpl %o0 + (_xxword0 - _base), %g0
122_xxword0:
123 std %f0, [ %o1 ]
124 retl
125 std %f2, [ %o2 ]
126 nop
127_xxword1:
128 std %f4, [ %o1 ]
129 retl
130 std %f6, [ %o2 ]
131 nop
132_xxword2:
133 std %f8, [ %o1 ]
134 retl
135 std %f10, [ %o2 ]
136 nop
137_xxword3:
138 std %f12, [ %o1 ]
139 retl
140 std %f14, [ %o2 ]
141 nop
142
143
144
145
146
147
148
149 /*
150 * Assembly version of certain simulator instruction implementations.
151 */
152
153
154
155
156
157
158
159
160
161
162#define ld_simm16( _Rxip, _offset, _Rdest ) \
163 ldsh [ _Rxip + _offset ], _Rdest /* 1 instn only !! */
164
165#define ld_simm32( _Rxip, _offset, _Rdest ) \
166 ldsw [ _Rxip + _offset ], _Rdest /* 1 instn only !! */
167
168#define ldx_ireg( _Rcpup, _Rxip, _offset, _Rdest, _Rscratch ) \
169 lduh [ _Rxip + _offset ], _Rscratch NL\
170 ldx [ _Rcpup + _Rscratch ], _Rdest
171
172#define stx_ireg( _Rcpup, _Rxip, _offset, _Rval, _Rscratch ) \
173 lduh [ _Rxip + _offset ], _Rscratch NL\
174 stx _Rval, [ _Rcpup + _Rscratch ]
175
176#define ld_fpreg( _ldt, _Rcpup, _Rxip, _Rfprp, _offset, _Rdest, _Rscratch ) \
177 lduh [ _Rxip + _offset ], _Rscratch NL\
178 _ldt [ _Rfprp + _Rscratch ], _Rdest
179
180#define st_fpreg( _stt, _Rcpup, _Rxip, _Rfprp, _offset, _Rdest, _Rscratch ) \
181 lduh [ _Rxip + _offset ], _Rscratch NL\
182 _stt _Rdest, [ _Rfprp + _Rscratch ]
183
184 /*
185 * %o0 = simcpu_t *
186 * %o1 = xicache_instn_t *
187 * %g1 is used as a scratch register by these macros
188 */
189
190#define ldx_Rsrc1( _r ) ldx_ireg( %o0, %o1, XIC_INTREG_SRC1_OFFSET, _r, %g1 )
191#define ldx_Rsrc2( _r ) ldx_ireg( %o0, %o1, XIC_INTREG_SRC2_OFFSET, _r, %g1 )
192#define ld_Simm16( _r ) ld_simm16( %o1, XIC_SIMM16_OFFSET, _r )
193
194#define ld_BrOff32( _r ) ld_simm32( %o1, XIC_BROFF32_OFFSET, _r )
195
196#define ld_BrRegOff32( _r ) ld_simm32( %o1, XIC_BREGOFF32_OFFSET, _r )
197
198#define stx_Rdest( _r ) stx_ireg( %o0, %o1, XIC_INTREG_DEST_OFFSET, _r, %g1 )
199
200#define ldx_Rccr( _r ) ldx [ %o0 + SIMCPU_v9CCR_OFFSET ], _r
201#define stx_Rccr( _r ) stx _r, [ %o0 + SIMCPU_v9CCR_OFFSET ]
202
203#define ldx_Rpc( _r ) ldx [ %o0 + SIMCPU_PC_OFFSET ], _r
204#define stx_Rpc( _r ) stx _r, [ %o0 + SIMCPU_PC_OFFSET ]
205#define ldx_Rnpc( _r ) ldx [ %o0 + SIMCPU_NPC_OFFSET ], _r
206#define stx_Rnpc( _r ) stx _r, [ %o0 + SIMCPU_NPC_OFFSET ]
207
208
209 /* FP support */
210
211#define ld_FPsrc1( _ldt, _r ) ld_fpreg( _ldt, %o0, %o1, %o0, XIC_FPREG_SRC1_OFFSET, _r, %g1 )
212#define ld_FPsrc2( _ldt, _r ) ld_fpreg( _ldt, %o0, %o1, %o0, XIC_FPREG_SRC2_OFFSET, _r, %g1 )
213
214
215#define IMPL( _name ) \
216 NL\
217 .global decoded_impl_##_name NL\
218 .align 8 NL\
219decoded_impl_##_name:
220
221
222
223#define ENDI \
224 ldx [ %o0 + SIMCPU_NPC_OFFSET ], %o1 NL\
225 add %o1, 4, %o2 NL\
226 stx %o1, [ %o0 + SIMCPU_PC_OFFSET ] NL\
227 retl NL\
228 stx %o2, [ %o0 + SIMCPU_NPC_OFFSET ] NL\
229 ENDINSTN
230
231#define ENDINSTN /* nada */
232
233
234
235 /*
236 * For executing floating point operations on SPARC ..
237 * .. specifically SPARC on SPARC we use the FSR for the
238 * cpu being emulated, but disable all floating point traps.
239 * Then we test whether an exception has occurred after the
240 * instruction execution, and update the simulated FSR
241 * accordingly, then finally signal a trap if the simulated
242 * machine actually desired one.
243 *
244 * the simcpu_t simulated registers are used as follows:
245 * v9_fsr_ctrl holds the SPARC fsr control bits ..
246 * for condition codes, rounding etc. The execution results
247 * (errors and accumulated errors) are held in the the
248 * v9_fsr_exc pseudo register, and the trap enable bits
249 * (TEM) are held in the v9_fsr_tem pseudo reg.
250 *
251 * Note: we have to use the simcpu_t scratch64 value because we
252 * can't get the fsr value out of the cpu without using a
253 * store instruction. This prob. should be per exec_thread, but
254 * its harder to get at than per simcpu in here.
255 */
256
257
258
259#define FPOP_setup_fsr \
260 ldx [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ], %o4 NL\
261 set 0xff, %o3 NL\
262 sllx %o3, 30, %o3 /* FCC[321] + RD mask */ NL\
263 or %o3, (3 << 10), %o3 /* | FCC0 mask */ NL\
264 and %o4, %o3, %o4 NL\
265 stx %o4, [ %o0 + SIMCPU_SCRATCH64_OFFSET ] NL\
266 ldx [ %o0 + SIMCPU_SCRATCH64_OFFSET ], %fsr NL\
267 ldx [ %o0 + SIMCPU_v9GSR_CTRL_OFFSET ], %o4 NL\
268 wr %o4, %gsr
269
270#ifdef FP_DECODE_DISABLED
271#define FPOP_fpu_on_check
272#else /* FP_DECODE_DISABLED */
273#define FPOP_fpu_on_check \
274 ldx [ %o0 + SIMCPU_SPECIFICP_OFFSET ], %o4 NL\
275 ld [ %o4 + SPARCV9_FPU_ON_OFFSET ], %o4 NL\
276 brz %o4, sparcv9_deliver_fp_disabled_exception NL\
277 nop
278#endif /* FP_DECODE_DISABLED */
279
280#define FPOP_setup \
281 FPOP_fpu_on_check NL\
282 FPOP_setup_fsr
283
284#define FPOP_cmp( _ldt, _fpop, _fcc ) \
285 FPOP_setup NL\
286 ld_FPsrc1( _ldt, %f0 ) NL\
287 ld_FPsrc2( _ldt, %f4 ) NL\
288 _fpop %_fcc, %f0, %f4 NL\
289 FPOP_cleanup NL\
290 FPOP_save_fcc NL\
291 FPOP_ENDI
292
293/* FPOP_save_fcc assumes FPOP_cleanup stored %fsr in scratch */
294#define FPOP_save_fcc \
295 ldx [ %o0 + SIMCPU_SCRATCH64_OFFSET ], %o4 NL\
296 set 0x3f, %o3 NL\
297 sllx %o3, 32, %o3 /* FCC[321] mask */ NL\
298 or %o3, (3 << 10), %o3 /* | FCC0 mask */ NL\
299 ldx [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ], %o2 NL\
300 andn %o2, %o3, %o2 NL\
301 and %o4, %o3, %o4 NL\
302 or %o2, %o4, %o2 NL\
303 stx %o2, [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ]
304
305
306 /*
307 * Since we're running as a user process, we're not
308 * going to see anything here other than ieee754 exceptions
309 *
310 * But these have to be handled carefully, since the simulated FSR
311 * configuration may require that a proper exception is generated
312 * ...
313 * annoyingly we have to save the fsr somewhere in order to get access to
314 * the execution results - we use a per-cpu scratch area so we avoid MT conflicts
315 *
316 * So retrieve the FSR, stash it back into ctrl sans error bits
317 * (tem bits should still be zero).
318 * Then, look for errors from the last executed instrucion .. if none, then
319 * do nothing. If some then accumulate or generate a trap as necessary.
320 */
321
322 /* Must not modify %o0 or %o1 */
323#define FPOP_cleanup \
324 stx %fsr, [ %o0 + SIMCPU_SCRATCH64_OFFSET ] NL\
325 ldx [ %o0 + SIMCPU_SCRATCH64_OFFSET ], %o4 NL\
326 /* must clear cexec field if no exceptions */ NL\
327 ldx [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ], %o3 NL\
328 and %o3, 0x1f<<5, %o3 NL\
329 stx %o3, [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ] NL\
330 andcc %o4, 0x1f, %o2 NL\
331 bne,a,pt %xcc, sparcv9_fsr_exception_update NL\
332 rd %pc, %o5 NL\
333 /* fall through to the update part of the instruction */
334
335#define FPOP_ENDI \
336 ENDI
337
338
339 /*
340 * Hand off routine for floating point closure
341 * If any IEEE exception occurred, we need now to check and see if the simulated
342 * FSR required a trap to be generated, or the error to be accumulated.
343 * NOTE: error is not accumulated if a trap is to be delivered.
344 */
345
346 .section ".text"
347 .align 8
348sparcv9_fsr_exception_update:
349 ldx [ %o0 + SIMCPU_v9FSR_TEM_OFFSET ], %o3
350 ldx [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ], %o4
351 srlx %o4, 14, %o4 /* FTT field - no trap if non-zero */
352 and %o4, 7, %o4
353 movrnz %o4, %g0, %o3
354 andcc %o2, %o3, %g0
355 /* OK build the EXC group ... */
356 sllx %o2, 5, %o3
357 /* clear the accumulation if trap to be delivered */
358 movne %xcc, %g0, %o3
359 ldx [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ], %o4
360 /* build error and accum bits */
361 or %o2, %o3, %o2
362 /* mask out previous accum bits */
363 and %o4, 0x1f<<5, %o4
364 /* combine new error and old accum bits */
365 or %o2, %o4, %o2
366 /* update the execution FSR state */
367 stx %o2, [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ]
368 /* now that the status is updated, branch into the
369 * C function to deliver the IEEE trap if appropriate
370 */
371 bne,pn %xcc, sparcv9_deliver_ieee_exception
372 nop
373 jmp %o5 + 4 /* finish instruction */
374 nop
375
376
377
378 /*
379 * instruction targets
380 * implemented in assembly language to improve performance on
381 * certain host machines.
382 *
383 * This file is for a sparcv9 host.
384 */
385
386 .section ".text"
387 .align 8
388
389 /* Args are: %o0 = simcpu_t*, %o1 = xicache_instn_t * */
390
391
392
393 /*
394 * Sparc v9 add and substract instructions
395 */
396
397IMPL( sparcv9_add_co_imm )
398 ba internal_add_co;
399 ld_Simm16(%o3)
400
401IMPL( sparcv9_add_co_rrr )
402 ldx_Rsrc2(%o3)
403internal_add_co:
404 ldx_Rsrc1(%o2)
405 addcc %o2, %o3, %o2
406 stx_Rdest(%o2)
407 rd %ccr, %o3
408 stx_Rccr(%o3)
409 ENDI
410
411
412
413
414IMPL( sparcv9_add_co_imm_rd0 )
415 ba internal_add_co_rd0;
416 ld_Simm16(%o3)
417
418IMPL( sparcv9_add_co_rrr_rd0 )
419 ldx_Rsrc2(%o3)
420internal_add_co_rd0:
421 ldx_Rsrc1(%o2)
422 addcc %o2, %o3, %g0
423 rd %ccr, %o3
424 stx_Rccr(%o3)
425 ENDI
426
427
428
429
430IMPL( sparcv9_add_ci_imm )
431 ba internal_add_ci;
432 ld_Simm16(%o3)
433
434IMPL( sparcv9_add_ci_rrr )
435 ldx_Rsrc2(%o3)
436internal_add_ci:
437 ldx_Rccr(%o4)
438 wr %o4, %ccr
439 ldx_Rsrc1(%o2)
440 addc %o2, %o3, %o2
441 stx_Rdest(%o2)
442 ENDI
443
444
445
446
447IMPL( sparcv9_add_cico_imm )
448 ba internal_add_cico;
449 ld_Simm16(%o3)
450
451IMPL( sparcv9_add_cico_rrr )
452 ldx_Rsrc2(%o3)
453internal_add_cico:
454 ldx_Rccr(%o4)
455 wr %o4, %ccr
456 ldx_Rsrc1(%o2)
457 addccc %o2, %o3, %o2
458 stx_Rdest(%o2)
459 rd %ccr, %o3
460 stx_Rccr(%o3)
461 ENDI
462
463
464
465
466IMPL( sparcv9_add_cico_imm_rd0 )
467 ba internal_add_cico_rd0;
468 ld_Simm16(%o3)
469
470IMPL( sparcv9_add_cico_rrr_rd0 )
471 ldx_Rsrc2(%o3)
472internal_add_cico_rd0:
473 ldx_Rccr(%o4)
474 wr %o4, %ccr
475 ldx_Rsrc1(%o2)
476 addccc %o2, %o3, %g0
477 rd %ccr, %o3
478 stx_Rccr(%o3)
479 ENDI
480
481
482
483
484
485
486
487IMPL( sparcv9_sub_co_imm )
488 ba internal_sub_co;
489 ld_Simm16(%o3)
490
491IMPL( sparcv9_sub_co_rrr )
492 ldx_Rsrc2(%o3)
493internal_sub_co:
494 ldx_Rsrc1(%o2)
495 subcc %o2, %o3, %o2
496 stx_Rdest(%o2)
497 rd %ccr, %o3
498 stx_Rccr(%o3)
499 ENDI
500
501
502
503
504IMPL( sparcv9_sub_co_imm_rd0 )
505 ba internal_sub_co_rd0;
506 ld_Simm16(%o3)
507
508IMPL( sparcv9_sub_co_rrr_rd0 )
509 ldx_Rsrc2(%o3)
510internal_sub_co_rd0:
511 ldx_Rsrc1(%o2)
512 subcc %o2, %o3, %g0
513 rd %ccr, %o3
514 stx_Rccr(%o3)
515 ENDI
516
517
518
519
520IMPL( sparcv9_sub_ci_imm )
521 ba internal_sub_ci;
522 ld_Simm16(%o3)
523
524IMPL( sparcv9_sub_ci_rrr )
525 ldx_Rsrc2(%o3)
526internal_sub_ci:
527 ldx_Rccr(%o4)
528 wr %o4, %ccr
529 ldx_Rsrc1(%o2)
530 subc %o2, %o3, %o2
531 stx_Rdest(%o2)
532 ENDI
533
534
535
536
537IMPL( sparcv9_sub_cico_imm )
538 ba internal_sub_cico;
539 ld_Simm16(%o3)
540
541IMPL( sparcv9_sub_cico_rrr )
542 ldx_Rsrc2(%o3)
543internal_sub_cico:
544 ldx_Rccr(%o4)
545 wr %o4, %ccr
546 ldx_Rsrc1(%o2)
547 subccc %o2, %o3, %o2
548 stx_Rdest(%o2)
549 rd %ccr, %o3
550 stx_Rccr(%o3)
551 ENDI
552
553
554
555
556IMPL( sparcv9_sub_cico_imm_rd0 )
557 ba internal_sub_cico_rd0;
558 ld_Simm16(%o3)
559
560IMPL( sparcv9_sub_cico_rrr_rd0 )
561 ldx_Rsrc2(%o3)
562internal_sub_cico_rd0:
563 ldx_Rccr(%o4)
564 wr %o4, %ccr
565 ldx_Rsrc1(%o2)
566 subccc %o2, %o3, %g0
567 rd %ccr, %o3
568 stx_Rccr(%o3)
569 ENDI
570
571
572
573
574 /*
575 * Logic CC instructions ...
576 */
577
578IMPL( sparcv9_and_cc_imm )
579 ba internal_and_cc;
580 ld_Simm16(%o3)
581
582IMPL( sparcv9_and_cc_rrr )
583 ldx_Rsrc2(%o3)
584internal_and_cc:
585 ldx_Rsrc1(%o2)
586 andcc %o2, %o3, %o2
587 stx_Rdest(%o2)
588 rd %ccr, %o3
589 stx_Rccr(%o3)
590 ENDI
591
592
593
594
595IMPL( sparcv9_and_cc_imm_rd0 )
596 ba internal_and_cc_rd0;
597 ld_Simm16(%o3)
598
599IMPL( sparcv9_and_cc_rrr_rd0 )
600 ldx_Rsrc2(%o3)
601internal_and_cc_rd0:
602 ldx_Rsrc1(%o2)
603 andcc %o2, %o3, %g0
604 rd %ccr, %o3
605 stx_Rccr(%o3)
606 ENDI
607
608
609
610
611 /* sparcv9_andn_cc_imm - synthesysed by inverting imm for andcc */
612
613IMPL( sparcv9_andn_cc_rrr )
614 ldx_Rsrc2(%o3)
615 ldx_Rsrc1(%o2)
616 andncc %o2, %o3, %o2
617 stx_Rdest(%o2)
618 rd %ccr, %o3
619 stx_Rccr(%o3)
620 ENDI
621
622
623
624
625 /* sparcv9_andn_cc_imm_rd0 - synthesysed by inverting imm for andcc */
626
627IMPL( sparcv9_andn_cc_rrr_rd0 )
628 ldx_Rsrc2(%o3)
629 ldx_Rsrc1(%o2)
630 andncc %o2, %o3, %g0
631 rd %ccr, %o3
632 stx_Rccr(%o3)
633 ENDI
634
635
636
637IMPL( sparcv9_or_cc_imm )
638 ba internal_or_cc;
639 ld_Simm16(%o3)
640
641IMPL( sparcv9_or_cc_rrr )
642 ldx_Rsrc2(%o3)
643internal_or_cc:
644 ldx_Rsrc1(%o2)
645 orcc %o2, %o3, %o2
646 stx_Rdest(%o2)
647 rd %ccr, %o3
648 stx_Rccr(%o3)
649 ENDI
650
651
652
653
654IMPL( sparcv9_or_cc_imm_rd0 )
655 ba internal_or_cc_rd0;
656 ld_Simm16(%o3)
657
658IMPL( sparcv9_or_cc_rrr_rd0 )
659 ldx_Rsrc2(%o3)
660internal_or_cc_rd0:
661 ldx_Rsrc1(%o2)
662 orcc %o2, %o3, %g0
663 rd %ccr, %o3
664 stx_Rccr(%o3)
665 ENDI
666
667
668
669
670
671 /* sparcv9_orn_cc_imm - synth by inverting imm field for orcc */
672
673IMPL( sparcv9_orn_cc_rrr )
674 ldx_Rsrc2(%o3)
675 ldx_Rsrc1(%o2)
676 orncc %o2, %o3, %o2
677 stx_Rdest(%o2)
678 rd %ccr, %o3
679 stx_Rccr(%o3)
680 ENDI
681
682
683
684
685 /* sparcv9_orn_cc_imm_rd0 - synth by inverting imm field for orcc */
686
687IMPL( sparcv9_orn_cc_rrr_rd0 )
688 ldx_Rsrc2(%o3)
689 ldx_Rsrc1(%o2)
690 orncc %o2, %o3, %g0
691 rd %ccr, %o3
692 stx_Rccr(%o3)
693 ENDI
694
695
696
697
698
699IMPL( sparcv9_xor_cc_imm )
700 ba internal_xor_cc;
701 ld_Simm16(%o3)
702
703IMPL( sparcv9_xor_cc_rrr )
704 ldx_Rsrc2(%o3)
705internal_xor_cc:
706 ldx_Rsrc1(%o2)
707 xorcc %o2, %o3, %o2
708 stx_Rdest(%o2)
709 rd %ccr, %o3
710 stx_Rccr(%o3)
711 ENDI
712
713
714
715
716IMPL( sparcv9_xor_cc_imm_rd0 )
717 ba internal_xor_cc_rd0;
718 ld_Simm16(%o3)
719
720IMPL( sparcv9_xor_cc_rrr_rd0 )
721 ldx_Rsrc2(%o3)
722internal_xor_cc_rd0:
723 ldx_Rsrc1(%o2)
724 xorcc %o2, %o3, %g0
725 rd %ccr, %o3
726 stx_Rccr(%o3)
727 ENDI
728
729
730
731
732
733 /* sparcv9_xnor_cc_imm - can synth by ~imm using orcc imm */
734
735IMPL( sparcv9_xnor_cc_rrr )
736 ldx_Rsrc2(%o3)
737 ldx_Rsrc1(%o2)
738 xnorcc %o2, %o3, %o2
739 stx_Rdest(%o2)
740 rd %ccr, %o3
741 stx_Rccr(%o3)
742 ENDI
743
744
745
746
747 /* sparcv9_xnor_cc_imm_rd0 - can synth by ~imm using xorcc imm */
748
749IMPL( sparcv9_xnor_cc_rrr_rd0 )
750 ldx_Rsrc2(%o3)
751 ldx_Rsrc1(%o2)
752 xnorcc %o2, %o3, %g0
753 rd %ccr, %o3
754 stx_Rccr(%o3)
755 ENDI
756
757
758
759 /*
760 * Branch instructions change the value of npc
761 * Could encode a mask into the xi immediate, but that would
762 * be slow to extract, and leave us with a less than useful
763 * immediate field.
764 */
765
766 /* delay slot:
767 * always executed - IF a branch IS taken
768 * annulled - if a bit set and branch IS NOT taken
769 */
770
771 /* Sparc branches are bloody awful - delay slots plus mutiple
772 * condition varients ...
773 *
774 * X: br Y
775 * br Z
776 *
777 * Y: slot instn
778 *
779 * Z:
780 *
781 * ... instn @X executes instn @Y, but then X+4 in DS of X causes branch to Z
782 */
783
784
785 /*
786 * Policy
787 annul:
788 not taken: taken:
789 pc = oldnpc+4 pc = oldnpc
790 npc = oldnpc + 8; npc = target
791
792 no annul:
793 pc = npc
794 npc = target | npc + 4;
795
796 */
797
798
799
800#define BRANCH( _opc, _cc ) \
801 ldx_Rccr (%o2) NL\
802 wr %o2, %ccr NL\
803 ldx_Rpc (%o3) NL\
804 ldx_Rnpc(%o4) NL\
805 ld_BrOff32(%o5) NL\
806 stx_Rpc (%o4) NL\
807 add %o3, %o5, %o5 /* branch target */ NL\
808 add %o4, 4, %o4 /* npc + 4 */ NL\
809 mov##_opc _cc, %o5, %o4 /* overwrite npc if branch taken */ NL\
810 retl NL\
811 stx_Rnpc(%o4)
812
813
814#define BRANCH_an( _opc, _cc ) \
815 ldx_Rccr (%o2) NL\
816 wr %o2, %ccr NL\
817 ldx_Rpc (%o3) NL\
818 ldx_Rnpc(%o4) NL\
819 ld_BrOff32(%o2) NL\
820 add %o3, %o2, %o3 /* branch target */ NL\
821 add %o4, 4, %o5 /* oldnpc + 4 */ NL\
822 add %o4, 8, %g1 /* oldnpc + 8 */ NL\
823 mov##_opc _cc, %o4, %o5 /* overwrite pc if branch taken */ NL\
824 mov##_opc _cc, %o3, %g1 /* overwrite npc if branch taken */ NL\
825 stx_Rpc (%o5) /* no annul ds not squashed */ NL\
826 retl NL\
827 stx_Rnpc(%g1)
828
829
830 /* There has to be a better way than to ennunciate every instruction form !! */
831
832 /* icc version */
833
834IMPL( sparcv9_bne_icc )
835 BRANCH( ne, %icc )
836 ENDINSTN
837
838IMPL( sparcv9_be_icc )
839 BRANCH( e, %icc )
840 ENDINSTN
841
842IMPL( sparcv9_bg_icc )
843 BRANCH( g, %icc )
844 ENDINSTN
845
846IMPL( sparcv9_ble_icc )
847 BRANCH( le, %icc )
848 ENDINSTN
849
850IMPL( sparcv9_bge_icc )
851 BRANCH( ge, %icc )
852 ENDINSTN
853
854IMPL( sparcv9_bl_icc )
855 BRANCH( l, %icc )
856 ENDINSTN
857
858IMPL( sparcv9_bgu_icc )
859 BRANCH( gu, %icc )
860 ENDINSTN
861
862IMPL( sparcv9_bleu_icc )
863 BRANCH( leu, %icc )
864 ENDINSTN
865
866IMPL( sparcv9_bcc_icc )
867 BRANCH( cc, %icc )
868 ENDINSTN
869
870IMPL( sparcv9_bcs_icc )
871 BRANCH( cs, %icc )
872 ENDINSTN
873
874IMPL( sparcv9_bpos_icc )
875 BRANCH( pos, %icc )
876 ENDINSTN
877
878IMPL( sparcv9_bneg_icc )
879 BRANCH( neg, %icc )
880 ENDINSTN
881
882IMPL( sparcv9_bvc_icc )
883 BRANCH( vc, %icc )
884 ENDINSTN
885
886IMPL( sparcv9_bvs_icc )
887 BRANCH( vs, %icc )
888 ENDINSTN
889
890
891 /* xcc versions */
892
893IMPL( sparcv9_bne_xcc )
894 BRANCH( ne, %xcc )
895 ENDINSTN
896
897IMPL( sparcv9_be_xcc )
898 BRANCH( e, %xcc )
899 ENDINSTN
900
901IMPL( sparcv9_bg_xcc )
902 BRANCH( g, %xcc )
903 ENDINSTN
904
905IMPL( sparcv9_ble_xcc )
906 BRANCH( le, %xcc )
907 ENDINSTN
908
909IMPL( sparcv9_bge_xcc )
910 BRANCH( ge, %xcc )
911 ENDINSTN
912
913IMPL( sparcv9_bl_xcc )
914 BRANCH( l, %xcc )
915 ENDINSTN
916
917IMPL( sparcv9_bgu_xcc )
918 BRANCH( gu, %xcc )
919 ENDINSTN
920
921IMPL( sparcv9_bleu_xcc )
922 BRANCH( leu, %xcc )
923 ENDINSTN
924
925IMPL( sparcv9_bcc_xcc )
926 BRANCH( cc, %xcc )
927 ENDINSTN
928
929IMPL( sparcv9_bcs_xcc )
930 BRANCH( cs, %xcc )
931 ENDINSTN
932
933IMPL( sparcv9_bpos_xcc )
934 BRANCH( pos, %xcc )
935 ENDINSTN
936
937IMPL( sparcv9_bneg_xcc )
938 BRANCH( neg, %xcc )
939 ENDINSTN
940
941IMPL( sparcv9_bvc_xcc )
942 BRANCH( vc, %xcc )
943 ENDINSTN
944
945IMPL( sparcv9_bvs_xcc )
946 BRANCH( vs, %xcc )
947 ENDINSTN
948
949
950
951
952 /*
953 * Annulled delay slot versions !!
954 */
955
956
957 /* icc version */
958
959IMPL( sparcv9_bne_icc_an )
960 BRANCH_an( ne, %icc )
961 ENDINSTN
962
963IMPL( sparcv9_be_icc_an )
964 BRANCH_an( e, %icc )
965 ENDINSTN
966
967IMPL( sparcv9_bg_icc_an )
968 BRANCH_an( g, %icc )
969 ENDINSTN
970
971IMPL( sparcv9_ble_icc_an )
972 BRANCH_an( le, %icc )
973 ENDINSTN
974
975IMPL( sparcv9_bge_icc_an )
976 BRANCH_an( ge, %icc )
977 ENDINSTN
978
979IMPL( sparcv9_bl_icc_an )
980 BRANCH_an( l, %icc )
981 ENDINSTN
982
983IMPL( sparcv9_bgu_icc_an )
984 BRANCH_an( gu, %icc )
985 ENDINSTN
986
987IMPL( sparcv9_bleu_icc_an )
988 BRANCH_an( leu, %icc )
989 ENDINSTN
990
991IMPL( sparcv9_bcc_icc_an )
992 BRANCH_an( cc, %icc )
993 ENDINSTN
994
995IMPL( sparcv9_bcs_icc_an )
996 BRANCH_an( cs, %icc )
997 ENDINSTN
998
999IMPL( sparcv9_bpos_icc_an )
1000 BRANCH_an( pos, %icc )
1001 ENDINSTN
1002
1003IMPL( sparcv9_bneg_icc_an )
1004 BRANCH_an( neg, %icc )
1005 ENDINSTN
1006
1007IMPL( sparcv9_bvc_icc_an )
1008 BRANCH_an( vc, %icc )
1009 ENDINSTN
1010
1011IMPL( sparcv9_bvs_icc_an )
1012 BRANCH_an( vs, %icc )
1013 ENDINSTN
1014
1015
1016 /* xcc versions */
1017
1018IMPL( sparcv9_bne_xcc_an )
1019 BRANCH_an( ne, %xcc )
1020 ENDINSTN
1021
1022IMPL( sparcv9_be_xcc_an )
1023 BRANCH_an( e, %xcc )
1024 ENDINSTN
1025
1026IMPL( sparcv9_bg_xcc_an )
1027 BRANCH_an( g, %xcc )
1028 ENDINSTN
1029
1030IMPL( sparcv9_ble_xcc_an )
1031 BRANCH_an( le, %xcc )
1032 ENDINSTN
1033
1034IMPL( sparcv9_bge_xcc_an )
1035 BRANCH_an( ge, %xcc )
1036 ENDINSTN
1037
1038IMPL( sparcv9_bl_xcc_an )
1039 BRANCH_an( l, %xcc )
1040 ENDINSTN
1041
1042IMPL( sparcv9_bgu_xcc_an )
1043 BRANCH_an( gu, %xcc )
1044 ENDINSTN
1045
1046IMPL( sparcv9_bleu_xcc_an )
1047 BRANCH_an( leu, %xcc )
1048 ENDINSTN
1049
1050IMPL( sparcv9_bcc_xcc_an )
1051 BRANCH_an( cc, %xcc )
1052 ENDINSTN
1053
1054IMPL( sparcv9_bcs_xcc_an )
1055 BRANCH_an( cs, %xcc )
1056 ENDINSTN
1057
1058IMPL( sparcv9_bpos_xcc_an )
1059 BRANCH_an( pos, %xcc )
1060 ENDINSTN
1061
1062IMPL( sparcv9_bneg_xcc_an )
1063 BRANCH_an( neg, %xcc )
1064 ENDINSTN
1065
1066IMPL( sparcv9_bvc_xcc_an )
1067 BRANCH_an( vc, %xcc )
1068 ENDINSTN
1069
1070IMPL( sparcv9_bvs_xcc_an )
1071 BRANCH_an( vs, %xcc )
1072 ENDINSTN
1073
1074
1075#undef BRANCH
1076#undef BRANCH_an
1077
1078
1079 /*
1080 * versions for the branch on register value operations
1081 */
1082
1083
1084
1085 /*
1086 * Policy
1087 no annul:
1088 pc = npc
1089 npc = target | npc + 4;
1090
1091 annul:
1092 not taken: taken:
1093 pc = oldnpc+4 pc = oldnpc
1094 npc = oldnpc + 8; npc = target
1095
1096 */
1097
1098
1099
1100
1101#define BRANCH( _opc ) \
1102 ldx_Rsrc1(%o2) NL\
1103 ldx_Rpc (%o3) NL\
1104 ldx_Rnpc(%o4) NL\
1105 ld_BrRegOff32(%o5) NL\
1106 stx_Rpc (%o4) /* pc = npc */ NL\
1107 add %o3, %o5, %o5 /* branch target */ NL\
1108 add %o4, 4, %o4 /* npc + 4 */ NL\
1109 movr##_opc %o2, %o5, %o4 /* overwrite npc if branch taken */ NL\
1110 retl NL\
1111 stx_Rnpc(%o4)
1112
1113
1114#define BRANCH_an( _opc ) \
1115 ldx_Rsrc1(%o2) NL\
1116 ldx_Rpc (%o3) NL\
1117 ldx_Rnpc(%o4) NL\
1118 add %o4, 4, %o5 /* oldnpc + 4 */ NL\
1119 movr##_opc %o2, %o4, %o5 /* overwrite pc if branch taken */ NL\
1120 stx_Rpc (%o5) /* no annul ds not squashed */ NL\
1121 ld_BrRegOff32(%o5) NL\
1122 add %o3, %o5, %o3 /* branch target */ NL\
1123 add %o4, 8, %o5 /* oldnpc + 8 */ NL\
1124 movr##_opc %o2, %o3, %o5 /* overwrite npc if branch taken */ NL\
1125 retl NL\
1126 stx_Rnpc(%o5)
1127
1128
1129IMPL( sparcv9_brz )
1130 BRANCH( z )
1131 ENDINSTN
1132
1133IMPL( sparcv9_brlez )
1134 BRANCH( lez )
1135 ENDINSTN
1136
1137IMPL( sparcv9_brlz )
1138 BRANCH( lz )
1139 ENDINSTN
1140
1141IMPL( sparcv9_brnz )
1142 BRANCH( nz )
1143 ENDINSTN
1144
1145IMPL( sparcv9_brgz )
1146 BRANCH( gz )
1147 ENDINSTN
1148
1149IMPL( sparcv9_brgez )
1150 BRANCH( gez )
1151 ENDINSTN
1152
1153IMPL( sparcv9_brz_an )
1154 BRANCH_an( z )
1155 ENDINSTN
1156
1157IMPL( sparcv9_brlez_an )
1158 BRANCH_an( lez )
1159 ENDINSTN
1160
1161IMPL( sparcv9_brlz_an )
1162 BRANCH_an( lz )
1163 ENDINSTN
1164
1165IMPL( sparcv9_brnz_an )
1166 BRANCH_an( nz )
1167 ENDINSTN
1168
1169IMPL( sparcv9_brgz_an )
1170 BRANCH_an( gz )
1171 ENDINSTN
1172
1173IMPL( sparcv9_brgez_an )
1174 BRANCH_an( gez )
1175 ENDINSTN
1176
1177
1178
1179
1180 /*
1181 * SPARC floating point compares
1182 */
1183
1184IMPL( sparcv9_fcmps_fcc0 )
1185 FPOP_cmp( ld, fcmps, fcc0 )
1186 ENDINSTN
1187
1188IMPL( sparcv9_fcmps_fcc1 )
1189 FPOP_cmp( ld, fcmps, fcc1 )
1190 ENDINSTN
1191
1192IMPL( sparcv9_fcmps_fcc2 )
1193 FPOP_cmp( ld, fcmps, fcc2 )
1194 ENDINSTN
1195
1196IMPL( sparcv9_fcmps_fcc3 )
1197 FPOP_cmp( ld, fcmps, fcc3 )
1198 ENDINSTN
1199
1200IMPL( sparcv9_fcmpd_fcc0 )
1201 FPOP_cmp( ldd, fcmpd, fcc0 )
1202 ENDINSTN
1203
1204IMPL( sparcv9_fcmpd_fcc1 )
1205 FPOP_cmp( ldd, fcmpd, fcc1 )
1206 ENDINSTN
1207
1208IMPL( sparcv9_fcmpd_fcc2 )
1209 FPOP_cmp( ldd, fcmpd, fcc2 )
1210 ENDINSTN
1211
1212IMPL( sparcv9_fcmpd_fcc3 )
1213 FPOP_cmp( ldd, fcmpd, fcc3 )
1214 ENDINSTN
1215
1216IMPL( sparcv9_fcmpes_fcc0 )
1217 FPOP_cmp( ld, fcmpes, fcc0 )
1218 ENDINSTN
1219
1220IMPL( sparcv9_fcmpes_fcc1 )
1221 FPOP_cmp( ld, fcmpes, fcc1 )
1222 ENDINSTN
1223
1224IMPL( sparcv9_fcmpes_fcc2 )
1225 FPOP_cmp( ld, fcmpes, fcc2 )
1226 ENDINSTN
1227
1228IMPL( sparcv9_fcmpes_fcc3 )
1229 FPOP_cmp( ld, fcmpes, fcc3 )
1230 ENDINSTN
1231
1232IMPL( sparcv9_fcmped_fcc0 )
1233 FPOP_cmp( ldd, fcmped, fcc0 )
1234 ENDINSTN
1235
1236IMPL( sparcv9_fcmped_fcc1 )
1237 FPOP_cmp( ldd, fcmped, fcc1 )
1238 ENDINSTN
1239
1240IMPL( sparcv9_fcmped_fcc2 )
1241 FPOP_cmp( ldd, fcmped, fcc2 )
1242 ENDINSTN
1243
1244IMPL( sparcv9_fcmped_fcc3 )
1245 FPOP_cmp( ldd, fcmped, fcc3 )
1246 ENDINSTN
1247