Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / obp / obp / pkg / fcode / sparc / fcode32.fth
CommitLineData
920dae64
AT
1\ ========== Copyright Header Begin ==========================================
2\
3\ Hypervisor Software File: fcode32.fth
4\
5\ Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6\
7\ - Do no alter or remove copyright notices
8\
9\ - Redistribution and use of this software in source and binary forms, with
10\ or without modification, are permitted provided that the following
11\ conditions are met:
12\
13\ - Redistribution of source code must retain the above copyright notice,
14\ this list of conditions and the following disclaimer.
15\
16\ - Redistribution in binary form must reproduce the above copyright notice,
17\ this list of conditions and the following disclaimer in the
18\ documentation and/or other materials provided with the distribution.
19\
20\ Neither the name of Sun Microsystems, Inc. or the names of contributors
21\ may be used to endorse or promote products derived from this software
22\ without specific prior written permission.
23\
24\ This software is provided "AS IS," without a warranty of any kind.
25\ ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
26\ INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
27\ PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
28\ MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
29\ ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
30\ DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
31\ OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
32\ FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
33\ DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
34\ ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
35\ SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
36\
37\ You acknowledge that this software is not designed, licensed or
38\ intended for use in the design, construction, operation or maintenance of
39\ any nuclear facility.
40\
41\ ========== Copyright Header End ============================================
42id: @(#)fcode32.fth 1.15 06/10/26
43purpose:
44copyright: Copyright 2006 Sun Microsystems, Inc. All rights reserved.
45copyright: Use is subject to license terms.
46
47headers
48
49code 2l->n ( l1 l2 -- x1 x2 )
50 tos 0 tos sra
51 sp 0 scr ldx
52 scr 0 scr sra
53 scr sp 0 stx
54c;
55
56: l-$number ( adr len -- true | n false ) swap n->l swap $number ;
57
58: l-move ( adr1 adr2 cnt -- ) rot n->l rot n->l rot n->l move ;
59: l-fill ( adr cnt byte -- ) rot n->l rot n->l rot fill ;
60
61: lcpeek ( adr -- { byte true } | false ) n->l cpeek ;
62: lwpeek ( adr -- { byte true } | false ) n->l wpeek ;
63: llpeek ( adr -- { byte true } | false ) n->l lpeek ;
64
65: lcpoke ( byte adr -- ok? ) n->l cpoke ;
66: lwpoke ( byte adr -- ok? ) n->l wpoke ;
67: llpoke ( byte adr -- ok? ) n->l lpoke ;
68
69: lb?branch ( [ <mark ] -- [ >mark ] )
70
71 \ New feature of IEEE 1275
72 state @ 0= if ( flag )
73 l->n if get-offset drop else skip-bytes then
74 exit
75 then
76
77 compile l->n
78
79 get-offset 0< if ( )
80 \ The get-backward-mark is needed in case of the following valid
81 \ ANS Forth construct: BEGIN .. WHILE .. UNTIL .. THEN
82 get-backward-mark [compile] until
83 else
84 [compile] if
85 then
86; immediate
87
88code l+! ( n addr -- )
89 tos 0 tos srl
90 sp scr get
91\dtc tos sc1 lget
92
93\itc tos 0 sc1 lduh
94\itc tos 2 sc2 lduh
95\itc sc1 10 sc1 sll
96\itc sc1 sc2 sc1 add
97 bubble
98 sc1 scr sc1 add
99\dtc sc1 tos lput
100\itc sc1 tos 2 sth
101\itc sc1 10 sc1 srl
102\itc sc1 tos 0 sth
103 sp 1 /n* tos nget
104 sp 2 /n* sp add
105c;
106
107code l>>a ( n1 cnt -- n2 )
108 sp scr pop
109 scr tos tos sra
110c;
111code lrshift ( n1 cnt -- n2 )
112 sp scr pop
113 scr tos tos srl
114c;
115
116: lb(of) ( marks -- marks )
117 drop-offset +level compile 2l->n -level [compile] of
118; immediate
119: lb(do) ( -- )
120 drop-offset +level compile 2l->n -level [compile] do
121; immediate
122: lb(?do) ( -- )
123 drop-offset +level compile 2l->n -level [compile] ?do
124; immediate
125: lb(+loop) ( -- )
126 drop-offset +level compile l->n -level [compile] +loop
127; immediate
128
129transient
130also assembler definitions
131: compare
132 sp scr pop
133 scr tos cmp
134;
135: leaveflag ( condition -- )
136\ macro to assemble code to leave a flag on the stack
137 if ,%icc
138 0 tos move \ Delay slot
139 -1 tos move
140 then
141;
142previous definitions
143resident
144warning @ warning off
145\ Note: l0= and l= clash with the link defs in kernport.fth
146code l0= ( n -- f ) tos test 0= leaveflag c;
147code l0<> ( n -- f ) tos test 0<> leaveflag c;
148code l0< ( n -- f ) tos test 0< leaveflag c;
149code l0<= ( n -- f ) tos test <= leaveflag c;
150code l0> ( n -- f ) tos test > leaveflag c;
151code l0>= ( n -- f ) tos test 0>= leaveflag c;
152
153code l< ( n1 n2 -- f ) compare < leaveflag c;
154code l> ( n1 n2 -- f ) compare > leaveflag c;
155code l= ( n1 n2 -- f ) compare 0= leaveflag c;
156code l<> ( n1 n2 -- f ) compare <> leaveflag c;
157code lu> ( n1 n2 -- f ) compare u> leaveflag c;
158code lu<= ( n1 n2 -- f ) compare u<= leaveflag c;
159code lu< ( n1 n2 -- f ) compare u< leaveflag c;
160code lu>= ( n1 n2 -- f ) compare u>= leaveflag c;
161code l>= ( n1 n2 -- f ) compare >= leaveflag c;
162code l<= ( n1 n2 -- f ) compare <= leaveflag c;
163warning !
164
165: l-between ( n1 n2 n3 -- flag ) >r over l<= swap r> l<= and ;
166: l-within ( n1 n2 n3 -- flag ) over - >r - r> lu< ;
167: l-max ( n1 n2 -- max ) 2dup l< if swap then drop ;
168: l-min ( n1 n2 -- min ) 2dup l> if swap then drop ;
169: l-abs ( n -- |n| ) dup l0< if negate then ;
170
171code l-@ ( addr -- n )
172 tos 0 tos srl
173\dtc tos 0 tos ld
174\itc tos 2 scr lduh tos 0 tos lduh
175\itc tos h# 10 tos sll scr tos tos add
176c;
177
178code l-! ( n addr -- )
179 tos 0 tos srl
180 sp 0 scr nget
181\dtc scr tos 0 st
182\itc scr tos 2 sth
183\itc scr 10 scr srl
184\itc scr tos 0 sth
185
186 sp 1 /n* tos nget
187 sp 2 /n* sp add
188c;
189
190code l2@ ( addr -- d )
191 tos 0 tos srl
192 tos /n sc1 lduh
193 tos /n 2 + scr lduh sc1 10 sc1 sll scr sc1 sc1 add
194 tos /n 4 + scr lduh sc1 10 sc1 sllx scr sc1 sc1 add
195 tos /n 6 + scr lduh sc1 10 sc1 sllx scr sc1 scr add
196
197 scr sp push
198
199 tos 0 sc1 lduh
200 tos 2 scr lduh sc1 10 sc1 sll scr sc1 sc1 add
201 tos 4 scr lduh sc1 10 sc1 sllx scr sc1 sc1 add
202 tos 6 scr lduh sc1 10 sc1 sllx
203
204 scr sc1 tos add
205c;
206code l2! ( d addr -- )
207 tos 0 tos srl
208 sp 0 scr nget
209 bubble
210
211 scr tos 6 sth scr 10 scr srlx
212 scr tos 4 sth scr 10 scr srlx
213 scr tos 2 sth scr 10 scr srl
214 scr tos 0 sth
215
216 sp /n scr nget
217
218 bubble
219
220 scr tos /n 6 + sth scr 10 scr srlx
221 scr tos /n 4 + sth scr 10 scr srlx
222 scr tos /n 2 + sth scr 10 scr srl
223 scr tos /n 0 + sth
224
225 sp 2 /n* tos nget
226 sp 3 /n* sp add
227c;
228
229code ll@ ( addr -- l ) \ longword aligned
230 tos 0 tos srl
231 tos tos lget
232c;
233code ll! ( n addr -- )
234 tos 0 tos srl
235 sp 0 scr nget
236 bubble
237 scr tos 0 st
238 sp 1 /n* tos nget
239 sp 2 /n* sp add
240c;
241
242code l<w@ ( addr -- w )
243 tos 0 tos srl
244 tos 0 tos ldsh
245 tos 0 tos sra
246c;
247
248code lw@ ( addr -- w ) \ 16-bit word aligned
249 tos 0 tos srl
250 tos 0 tos lduh
251c;
252
253code lw! ( w addr -- )
254 tos 0 tos srl
255 sp 0 scr nget
256 bubble
257 scr tos 0 sth
258 sp 1 /n* tos nget
259 sp 2 /n* sp add
260c;
261
262code lc@ ( addr -- c )
263 tos 0 tos srl
264 tos 0 tos ldub
265c;
266code lc! ( c addr -- )
267 tos 0 tos srl
268 sp 0 scr nget
269 bubble
270 scr tos 0 stb
271 sp 1 /n* tos nget
272 sp 2 /n* sp add
273c;
274
275code lon ( addr -- )
276 tos 0 tos srl
277 -1 scr move
278\dtc scr tos 0 st
279\itc scr tos 0 sth
280\itc scr tos 2 sth
281 sp tos pop
282c;
283code loff ( addr -- )
284 tos 0 tos srl
285\dtc %g0 tos 0 st
286\itc %g0 tos 0 sth
287\itc %g0 tos 2 sth
288 sp tos pop
289c;
290
291: lbase ( -- adr ) +level compile base compile la1+ -level ; immediate
292: l#out ( -- adr ) +level compile #out compile la1+ -level ; immediate
293: l#line ( -- adr ) +level compile #line compile la1+ -level ; immediate
294: lspan ( -- adr ) +level compile span compile la1+ -level ; immediate
295
296code lrl@ ( addr -- l ) \ longword aligned
297 tos 0 tos srl
298 tos tos lget
299c;
300code lrl! ( n addr -- )
301 tos 0 tos srl
302 sp 0 scr nget
303 bubble
304 scr tos 0 st
305 sp 1 /n* tos nget
306 sp 2 /n* sp add
307c;
308
309code lrw@ ( addr -- w ) \ 16-bit word aligned
310 tos 0 tos srl
311 tos 0 tos lduh
312c;
313
314code lrw! ( w addr -- )
315 tos 0 tos srl
316 sp 0 scr nget
317 bubble
318 scr tos 0 sth
319 sp 1 /n* tos nget
320 sp 2 /n* sp add
321c;
322
323code lrb@ ( addr -- c )
324 tos 0 tos srl
325 tos 0 tos ldub
326c;
327code lrb! ( c addr -- )
328 tos 0 tos srl
329 sp 0 scr nget
330 bubble
331 scr tos 0 stb
332 sp 1 /n* tos nget
333 sp 2 /n* sp add
334c;
335
336\ Double word stuff we need to implement with 32-bit only math.
337code ld+ ( x1 x2 -- x3 )
338 sp 0 /n* sc1 nget \ x2.low
339 sp 2 /n* sc3 nget \ x1.low
340 sp 1 /n* sc2 nget \ x1.high
341 sp 2 /n* sp add \ Pop args
342 sc3 sc1 sc1 addcc \ x3.low
343 sc2 tos tos addc \ x3.high
344 sc1 sp 0 nput \ Push result (x3.high already in tos)
345c;
346code ld- ( x1 x2 -- x3 )
347 sp 0 /n* sc1 nget \ x2.low
348 sp 2 /n* sc3 nget \ x1.low
349 sp 1 /n* sc2 nget \ x1.high
350 sp 2 /n* sp add \ Pop args
351 sc3 sc1 sc1 subcc \ x3.low
352 sc2 tos tos subc \ x3.high
353 sc1 sp 0 nput \ Push result (x3.high already in tos)
354c;
355
356: l-u/mod ( u1 u2 -- u.rem u.quot ) n->l swap n->l swap u/mod ;
357: l-/mod ( n1 n2 -- n.rem n.quot ) l->n swap l->n swap /mod ;
358
359headerless
360
361\ The following is from obp/fm/kernel/dmuldiv.fth and obp/fm/kernel/dmul.fth:
362
363/l 4 * constant bits/half-l
364: l-scale-up ( n -- h ) bits/half-l << ;
365: l-scale-down ( n -- h ) bits/half-l >> ;
366alias l-split-halves lwsplit
367: l-half* ( h1 h2 -- low<< high ) * l-split-halves swap l-scale-up swap ;
368
369headers
370
371\ Implement:
372\
373\ AB * CD = BD + (BC + DA)<<bits/half-cell + AC<<bits/cell
374\
375\ Where A, B, C, D are half "l" (or 16 bit in this case) values.
376
377: lum* ( n1 n2 -- xlo xhi )
378 l-split-halves rot l-split-halves ( b a d c )
379
380 \ Easy case - high halves are both 0, so result is just BD
381 2 pick over or 0= if drop nip * 0 exit then
382
383 3 pick 2 pick * 0 2>r ( b a d c ) ( r: d.low )
384
385 \ Check for C = 0 and optimize if so
386 dup if ( b a d c ) ( r: d.low )
387 \ C is not zero, so compute and add BC<<
388 3 pick over l-half*
389 2r> ld+ 2>r ( b a d c ) ( r: d.intermed )
390
391 \ We are done with B
392 2swap nip ( d c a )
393 \ Check for A = 0 and optimize if so
394 dup if ( d c a )
395 \ A is not zero, so compute and add DA<< and AC<<<
396 rot over l-half* ( c a da.low da.high )
397 2r> ld+ 2>r ( c a ) ( r: d.intermed' )
398 * 0 swap 2r> ld+
399 else
400 \ A is zero, so we are finished
401 3drop 2r>
402 then
403 else
404 \ C is zero, so all we have to do is compute and add DA<<
405 drop rot drop ( a d ) ( r: d.low )
406 l-half* ( low1 high1 )
407 2r> ld+
408 then
409 n->l
410;
411\ This is the elementary school long-division algorithm, base 2^^16 (on a
412\ 32-bit system) or 2^32 (on a 64-bit system).
413\ It depends on the assumption that "/" can accurately divide a single-cell
414\ (i.e. 32 or 64 bit) number by a half-cell (i.e. 16 or 32 bit) number.
415\ Each "digit" is a half-cell number; thus the dividend is a 4-digit
416\ number "ABCD" and the divisor is a 2-digit number "EF".
417
418\ It would be interesting to compare the performance of this to a
419\ "bit-banging" non-restoring division loop.
420: l-um/mod ( ud u -- urem uquot )
421 2dup u>= if \ Overflow; return max-uint for quotient
422 0= if 0 / then \ Force divide by zero trap
423 2drop 0 -1 n->l exit ( 0 max-u )
424 then ( ud u )
425
426 \ Split the divisor into two 16-bit "digits"
427 dup l-split-halves ( ud u ulow uhigh )
428
429 \ If the high "digit" of the divisor is zero, we can skip a lot
430 \ of the steps. In this case, we only have to worry about the
431 \ middle two digits of the dividend in developing the quotient.
432 ?dup 0= if ( ud u ulow )
433
434 \ Approximate the high digit of the quotient by dividing the "BC"
435 \ digits by the "F" digit. The answer could by low by one, but if
436 \ so it will be fixed in the next step.
437 2over swap l-scale-down swap l-scale-up + over / l-scale-up
438 ( ud u ulow guess<< )
439
440 \ Multiply the trial quotient by the divisor
441 rot over lum* ( ud ulow guess<< udtemp )
442
443 \ Subtract the trial product from the dividend, giving the remainder
444 2swap >r >r ld- drop ( error ) ( r: guess<< ulow )
445
446 \ Divide the remainder by the divisor, giving the rest of the
447 \ quotient.
448 dup r@ l-u/mod nip ( error guess1 )
449 r> r> ( error guess1 ulow guess<< )
450
451 \ Merge the two halves of the quotient
452 2 pick + >r ( error guess1 ulow ) ( r: uquot )
453
454 \ Calculate the remainder
455 * - r> ( urem uquot )
456 exit
457 then ( ud u ulow uhigh )
458
459 \ The high divisor digit is non-zero, so we have to deal with
460 \ both digits, dividing "ABCD" by "EF".
461
462 \ Approximate the high digit of the quotient.
463 3 pick over l-u/mod nip ( ud u ulow uhigh guess )
464
465 \ Reduce guess by one if "E" = "A"
466 dup 1 l-scale-up = if 1- then ( ud u ulow uhigh guess' )
467
468 \ Multiply the trial quotient by the divisor
469 3 pick over l-scale-up lum* ( ud u ulow uhigh guess' ud.temp )
470
471 \ Subtract the trial product from the dividend, giving the remainder
472 >r >r 2rot r> r> ld- ( u ulow uhigh guess' d.resid )
473
474 \ If the remainder is negative, add the divisor and reduce the trial
475 \ quotient by one. The following loop executes at most twice.
476 begin dup 0< while ( u ulow uhigh guess' d.resid )
477 rot 1- -rot ( u ulow uhigh guess+ d.resid )
478 4 pick l-scale-up 4 pick ld+ ( u ulow uhigh guess+ d.resid' )
479 repeat ( u ulow uhigh guess+ +d.resid )
480
481 \ Now we have the correct high quotient digit; save it for later
482 rot l-scale-up >r ( u ulow uhigh +d.resid ) ( r: q.high )
483
484 \ Repeat the above process, using the partial remainder as the
485 \ dividend. Ulow is no longer needed
486 3 roll drop ( u uhigh +d.resid )
487
488 \ Trial quotient digit...
489 2dup l-scale-up swap l-scale-down + 3 roll l-u/mod nip
490 ( u +d.resid guess1 ) ( r: q.high )
491 dup 1 l-scale-up = if 1- then ( u +d.resid guess1' )
492
493 \ Trial product
494 3 pick over lum* ( u +d.resid guess1' d.err )
495
496 \ New partial remainder
497 rot >r ld- ( u d.resid' ) ( r: q.high guess1' )
498
499 \ Adjust quotient digit until partial remainder is positive
500 begin dup 0< while ( u d.resid' ) ( r: q.high guess1' )
501 r> 1- >r ( u d.resid' ) ( r: q.high guess1' )
502 \ There is no l-m+, so use "0< ld+" instead
503 2 pick dup 0< ld+ ( u d.resid'' ) ( r: q.high guess1' )
504 repeat ( u +d.resid ) ( r: q.high guess1' )
505
506 \ Discard divisior and high cell of quotient (which must be zero)
507 rot 2drop ( u.rem )
508
509 \ Merge quotient digits
510 r> r> + ( u.rem u.quot )
511;
512
513\ Need to fix # and #s
514
515: l-mu/mod (s d n1 -- rem d.quot )
516 >r 0 r@ l-um/mod r> swap >r l-um/mod r>
517;
518: l-# (s ud1 -- ud2 )
519 base @ l-mu/mod ( nrem ud2 )
520 rot >digit hold ( ud2 )
521;
522: l-#s (s ud -- 0 0 ) begin l-# 2dup or 0= until ;
523
524transient
525vocabulary fcode32
526also fcode32 definitions
527
528alias , l,
529alias /n /l
530alias na+ la+
531alias cell+ la1+
532alias cells /l*
533alias b?branch lb?branch
534alias +! l+!
535alias >>a l>>a
536alias rshift lrshift
537alias b(of) lb(of)
538alias b(do) lb(do)
539alias b(?do) lb(?do)
540alias b(+loop) lb(+loop)
541alias 0= l0=
542alias 0<> l0<>
543alias 0< l0<
544alias 0<= l0<=
545alias 0> l0>
546alias 0>= l0>=
547alias < l<
548alias > l>
549alias = l=
550alias <> l<>
551alias u> lu>
552alias u<= lu<=
553alias u< lu<
554alias u>= lu>=
555alias >= l>=
556alias <= l<=
557alias between l-between
558alias within l-within
559alias max l-max
560alias min l-min
561alias abs l-abs
562alias @ l-@
563alias ! l-!
564alias 2@ l2@
565alias 2! l2!
566alias l@ ll@
567alias l! ll!
568alias <w@ l<w@
569alias w@ lw@
570alias w! lw!
571alias c@ lc@
572alias c! lc!
573alias on lon
574alias off loff
575alias base lbase
576alias #out l#out
577alias #line l#line
578alias span lspan
579
580alias rl! lrl!
581alias rl@ lrl@
582alias rw! lrw!
583alias rw@ lrw@
584alias rb! lrb!
585alias rb@ lrb@
586
587alias $number l-$number
588alias move l-move
589alias fill l-fill
590alias cpeek lcpeek
591alias wpeek lwpeek
592alias lpeek llpeek
593alias cpoke lcpoke
594alias wpoke lwpoke
595alias lpoke llpoke
596
597alias d+ ld+
598alias d- ld-
599alias um* lum*
600alias um/mod l-um/mod
601alias u/mod l-u/mod
602alias /mod l-/mod
603
604alias # l-#
605alias #s l-#s
606
607previous definitions
608resident
609
610headerless
611variable token-table0-64
612variable token-table2-64
613variable token-table0-32
614variable token-table2-32
615
616token-tables 0 ta+ token@ token-table0-64 token!
617token-tables 0 ta+ !null-token
618
619token-tables 2 ta+ token@ token-table2-64 token!
620token-tables 2 ta+ !null-token
621
622headers
623also fcode32 definitions
624fload ${BP}/pkg/fcode/primlist.fth \ Codes for kernel primitives
625fload ${BP}/pkg/fcode/sysprims-nofb.fth \ Codes for system primitives
626fload ${BP}/pkg/fcode/obsfcod2.fth
627fload ${BP}/pkg/fcode/sysprm64.fth
628fload ${BP}/pkg/fcode/regcodes.fth
629previous definitions
630
631token-tables 0 ta+ token@ token-table0-32 token!
632token-tables 2 ta+ token@ token-table2-32 token!
633
634headers
635: fcode-32 ( -- )
636 token-table0-32 token@ token-tables 0 ta+ token!
637 token-table2-32 token@ token-tables 2 ta+ token!
638;
639
640: fcode-64 ( -- )
641 token-table0-64 token@ token-tables 0 ta+ token!
642 token-table2-64 token@ token-tables 2 ta+ token!
643;
644
645fcode-64
646
647stand-init: Chose fcode32 mode
648 fcode-32
649;