Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | # Assembler.pm |
2 | # | |
3 | # Copyright (c) 1996 Malcolm Beattie | |
4 | # | |
5 | # You may distribute under the terms of either the GNU General Public | |
6 | # License or the Artistic License, as specified in the README file. | |
7 | ||
8 | package B::Assembler; | |
9 | use Exporter; | |
10 | use B qw(ppname); | |
11 | use B::Asmdata qw(%insn_data @insn_name); | |
12 | use Config qw(%Config); | |
13 | require ByteLoader; # we just need its $VERSIOM | |
14 | ||
15 | @ISA = qw(Exporter); | |
16 | @EXPORT_OK = qw(assemble_fh newasm endasm assemble); | |
17 | $VERSION = 0.04; | |
18 | ||
19 | use strict; | |
20 | my %opnumber; | |
21 | my ($i, $opname); | |
22 | for ($i = 0; defined($opname = ppname($i)); $i++) { | |
23 | $opnumber{$opname} = $i; | |
24 | } | |
25 | ||
26 | my($linenum, $errors, $out); # global state, set up by newasm | |
27 | ||
28 | sub error { | |
29 | my $str = shift; | |
30 | warn "$linenum: $str\n"; | |
31 | $errors++; | |
32 | } | |
33 | ||
34 | my $debug = 0; | |
35 | sub debug { $debug = shift } | |
36 | ||
37 | sub limcheck($$$$){ | |
38 | my( $val, $lo, $hi, $loc ) = @_; | |
39 | if( $val < $lo || $hi < $val ){ | |
40 | error "argument for $loc outside [$lo, $hi]: $val"; | |
41 | $val = $hi; | |
42 | } | |
43 | return $val; | |
44 | } | |
45 | ||
46 | # | |
47 | # First define all the data conversion subs to which Asmdata will refer | |
48 | # | |
49 | ||
50 | sub B::Asmdata::PUT_U8 { | |
51 | my $arg = shift; | |
52 | my $c = uncstring($arg); | |
53 | if (defined($c)) { | |
54 | if (length($c) != 1) { | |
55 | error "argument for U8 is too long: $c"; | |
56 | $c = substr($c, 0, 1); | |
57 | } | |
58 | } else { | |
59 | $arg = limcheck( $arg, 0, 0xff, 'U8' ); | |
60 | $c = chr($arg); | |
61 | } | |
62 | return $c; | |
63 | } | |
64 | ||
65 | sub B::Asmdata::PUT_U16 { | |
66 | my $arg = limcheck( $_[0], 0, 0xffff, 'U16' ); | |
67 | pack("S", $arg); | |
68 | } | |
69 | sub B::Asmdata::PUT_U32 { | |
70 | my $arg = limcheck( $_[0], 0, 0xffffffff, 'U32' ); | |
71 | pack("L", $arg); | |
72 | } | |
73 | sub B::Asmdata::PUT_I32 { | |
74 | my $arg = limcheck( $_[0], -0x80000000, 0x7fffffff, 'I32' ); | |
75 | pack("l", $arg); | |
76 | } | |
77 | sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" looses precision and pack('d',...) | |
78 | # may not even be portable between compilers | |
79 | sub B::Asmdata::PUT_objindex { # could allow names here | |
80 | my $arg = limcheck( $_[0], 0, 0xffffffff, '*index' ); | |
81 | pack("L", $arg); | |
82 | } | |
83 | sub B::Asmdata::PUT_svindex { &B::Asmdata::PUT_objindex } | |
84 | sub B::Asmdata::PUT_opindex { &B::Asmdata::PUT_objindex } | |
85 | sub B::Asmdata::PUT_pvindex { &B::Asmdata::PUT_objindex } | |
86 | ||
87 | sub B::Asmdata::PUT_strconst { | |
88 | my $arg = shift; | |
89 | my $str = uncstring($arg); | |
90 | if (!defined($str)) { | |
91 | error "bad string constant: $arg"; | |
92 | $str = ''; | |
93 | } | |
94 | if ($str =~ s/\0//g) { | |
95 | error "string constant argument contains NUL: $arg"; | |
96 | $str = ''; | |
97 | } | |
98 | return $str . "\0"; | |
99 | } | |
100 | ||
101 | sub B::Asmdata::PUT_pvcontents { | |
102 | my $arg = shift; | |
103 | error "extraneous argument: $arg" if defined $arg; | |
104 | return ""; | |
105 | } | |
106 | sub B::Asmdata::PUT_PV { | |
107 | my $arg = shift; | |
108 | my $str = uncstring($arg); | |
109 | if( ! defined($str) ){ | |
110 | error "bad string argument: $arg"; | |
111 | $str = ''; | |
112 | } | |
113 | return pack("L", length($str)) . $str; | |
114 | } | |
115 | sub B::Asmdata::PUT_comment_t { | |
116 | my $arg = shift; | |
117 | $arg = uncstring($arg); | |
118 | error "bad string argument: $arg" unless defined($arg); | |
119 | if ($arg =~ s/\n//g) { | |
120 | error "comment argument contains linefeed: $arg"; | |
121 | } | |
122 | return $arg . "\n"; | |
123 | } | |
124 | sub B::Asmdata::PUT_double { sprintf("%s\0", $_[0]) } # see PUT_NV above | |
125 | sub B::Asmdata::PUT_none { | |
126 | my $arg = shift; | |
127 | error "extraneous argument: $arg" if defined $arg; | |
128 | return ""; | |
129 | } | |
130 | sub B::Asmdata::PUT_op_tr_array { | |
131 | my $arg = shift; | |
132 | my @ary = split(/\s*,\s*/, $arg); | |
133 | if (@ary != 256) { | |
134 | error "wrong number of arguments to op_tr_array"; | |
135 | @ary = (0) x 256; | |
136 | } | |
137 | return pack("S256", @ary); | |
138 | } | |
139 | # XXX Check this works | |
140 | # Note: $arg >> 32 is a no-op on 32-bit systems | |
141 | sub B::Asmdata::PUT_IV64 { | |
142 | my $arg = shift; | |
143 | return pack("LL", ($arg >> 16) >>16 , $arg & 0xffffffff); | |
144 | } | |
145 | ||
146 | sub B::Asmdata::PUT_IV { | |
147 | $Config{ivsize} == 4 ? &B::Asmdata::PUT_I32 : &B::Asmdata::PUT_IV64; | |
148 | } | |
149 | ||
150 | my %unesc = (n => "\n", r => "\r", t => "\t", a => "\a", | |
151 | b => "\b", f => "\f", v => "\013"); | |
152 | ||
153 | sub uncstring { | |
154 | my $s = shift; | |
155 | $s =~ s/^"// and $s =~ s/"$// or return undef; | |
156 | $s =~ s/\\(\d\d\d|.)/length($1) == 3 ? chr(oct($1)) : ($unesc{$1}||$1)/eg; | |
157 | return $s; | |
158 | } | |
159 | ||
160 | sub strip_comments { | |
161 | my $stmt = shift; | |
162 | # Comments only allowed in instructions which don't take string arguments | |
163 | # Treat string as a single line so .* eats \n characters. | |
164 | $stmt =~ s{ | |
165 | ^\s* # Ignore leading whitespace | |
166 | ( | |
167 | [^"]* # A double quote '"' indicates a string argument. If we | |
168 | # find a double quote, the match fails and we strip nothing. | |
169 | ) | |
170 | \s*\# # Any amount of whitespace plus the comment marker... | |
171 | .*$ # ...which carries on to end-of-string. | |
172 | }{$1}sx; # Keep only the instruction and optional argument. | |
173 | return $stmt; | |
174 | } | |
175 | ||
176 | # create the ByteCode header: magic, archname, ByteLoader $VERSION, ivsize, | |
177 | # ptrsize, byteorder | |
178 | # nvtype is irrelevant (floats are stored as strings) | |
179 | # byteorder is strconst not U32 because of varying size issues | |
180 | ||
181 | sub gen_header { | |
182 | my $header = ""; | |
183 | ||
184 | $header .= B::Asmdata::PUT_U32(0x43424c50); # 'PLBC' | |
185 | $header .= B::Asmdata::PUT_strconst('"' . $Config{archname}. '"'); | |
186 | $header .= B::Asmdata::PUT_strconst(qq["$ByteLoader::VERSION"]); | |
187 | $header .= B::Asmdata::PUT_U32($Config{ivsize}); | |
188 | $header .= B::Asmdata::PUT_U32($Config{ptrsize}); | |
189 | $header .= B::Asmdata::PUT_strconst(sprintf(qq["0x%s"], $Config{byteorder})); | |
190 | ||
191 | $header; | |
192 | } | |
193 | ||
194 | sub parse_statement { | |
195 | my $stmt = shift; | |
196 | my ($insn, $arg) = $stmt =~ m{ | |
197 | ^\s* # allow (but ignore) leading whitespace | |
198 | (.*?) # Instruction continues up until... | |
199 | (?: # ...an optional whitespace+argument group | |
200 | \s+ # first whitespace. | |
201 | (.*) # The argument is all the rest (newlines included). | |
202 | )?$ # anchor at end-of-line | |
203 | }sx; | |
204 | if (defined($arg)) { | |
205 | if ($arg =~ s/^0x(?=[0-9a-fA-F]+$)//) { | |
206 | $arg = hex($arg); | |
207 | } elsif ($arg =~ s/^0(?=[0-7]+$)//) { | |
208 | $arg = oct($arg); | |
209 | } elsif ($arg =~ /^pp_/) { | |
210 | $arg =~ s/\s*$//; # strip trailing whitespace | |
211 | my $opnum = $opnumber{$arg}; | |
212 | if (defined($opnum)) { | |
213 | $arg = $opnum; | |
214 | } else { | |
215 | error qq(No such op type "$arg"); | |
216 | $arg = 0; | |
217 | } | |
218 | } | |
219 | } | |
220 | return ($insn, $arg); | |
221 | } | |
222 | ||
223 | sub assemble_insn { | |
224 | my ($insn, $arg) = @_; | |
225 | my $data = $insn_data{$insn}; | |
226 | if (defined($data)) { | |
227 | my ($bytecode, $putsub) = @{$data}[0, 1]; | |
228 | my $argcode = &$putsub($arg); | |
229 | return chr($bytecode).$argcode; | |
230 | } else { | |
231 | error qq(no such instruction "$insn"); | |
232 | return ""; | |
233 | } | |
234 | } | |
235 | ||
236 | sub assemble_fh { | |
237 | my ($fh, $out) = @_; | |
238 | my $line; | |
239 | my $asm = newasm($out); | |
240 | while ($line = <$fh>) { | |
241 | assemble($line); | |
242 | } | |
243 | endasm(); | |
244 | } | |
245 | ||
246 | sub newasm { | |
247 | my($outsub) = @_; | |
248 | ||
249 | die "Invalid printing routine for B::Assembler\n" unless ref $outsub eq 'CODE'; | |
250 | die <<EOD if ref $out; | |
251 | Can't have multiple byteassembly sessions at once! | |
252 | (perhaps you forgot an endasm()?) | |
253 | EOD | |
254 | ||
255 | $linenum = $errors = 0; | |
256 | $out = $outsub; | |
257 | ||
258 | $out->(gen_header()); | |
259 | } | |
260 | ||
261 | sub endasm { | |
262 | if ($errors) { | |
263 | die "There were $errors assembly errors\n"; | |
264 | } | |
265 | $linenum = $errors = $out = 0; | |
266 | } | |
267 | ||
268 | sub assemble { | |
269 | my($line) = @_; | |
270 | my ($insn, $arg); | |
271 | $linenum++; | |
272 | chomp $line; | |
273 | if ($debug) { | |
274 | my $quotedline = $line; | |
275 | $quotedline =~ s/\\/\\\\/g; | |
276 | $quotedline =~ s/"/\\"/g; | |
277 | $out->(assemble_insn("comment", qq("$quotedline"))); | |
278 | } | |
279 | if( $line = strip_comments($line) ){ | |
280 | ($insn, $arg) = parse_statement($line); | |
281 | $out->(assemble_insn($insn, $arg)); | |
282 | if ($debug) { | |
283 | $out->(assemble_insn("nop", undef)); | |
284 | } | |
285 | } | |
286 | } | |
287 | ||
288 | 1; | |
289 | ||
290 | __END__ | |
291 | ||
292 | =head1 NAME | |
293 | ||
294 | B::Assembler - Assemble Perl bytecode | |
295 | ||
296 | =head1 SYNOPSIS | |
297 | ||
298 | use B::Assembler qw(newasm endasm assemble); | |
299 | newasm(\&printsub); # sets up for assembly | |
300 | assemble($buf); # assembles one line | |
301 | endasm(); # closes down | |
302 | ||
303 | use B::Assembler qw(assemble_fh); | |
304 | assemble_fh($fh, \&printsub); # assemble everything in $fh | |
305 | ||
306 | =head1 DESCRIPTION | |
307 | ||
308 | See F<ext/B/B/Assembler.pm>. | |
309 | ||
310 | =head1 AUTHORS | |
311 | ||
312 | Malcolm Beattie, C<mbeattie@sable.ox.ac.uk> | |
313 | Per-statement interface by Benjamin Stuhl, C<sho_pi@hotmail.com> | |
314 | ||
315 | =cut |