| 1 | # Assembler.pm |
| 2 | # |
| 3 | # Copyright (c) 1996 Malcolm Beattie |
| 4 | # |
| 5 | # You may distribute under the terms of either the GNU General Public |
| 6 | # License or the Artistic License, as specified in the README file. |
| 7 | |
| 8 | package B::Assembler; |
| 9 | use Exporter; |
| 10 | use B qw(ppname); |
| 11 | use B::Asmdata qw(%insn_data @insn_name); |
| 12 | use Config qw(%Config); |
| 13 | require ByteLoader; # we just need its $VERSIOM |
| 14 | |
| 15 | @ISA = qw(Exporter); |
| 16 | @EXPORT_OK = qw(assemble_fh newasm endasm assemble); |
| 17 | $VERSION = 0.04; |
| 18 | |
| 19 | use strict; |
| 20 | my %opnumber; |
| 21 | my ($i, $opname); |
| 22 | for ($i = 0; defined($opname = ppname($i)); $i++) { |
| 23 | $opnumber{$opname} = $i; |
| 24 | } |
| 25 | |
| 26 | my($linenum, $errors, $out); # global state, set up by newasm |
| 27 | |
| 28 | sub error { |
| 29 | my $str = shift; |
| 30 | warn "$linenum: $str\n"; |
| 31 | $errors++; |
| 32 | } |
| 33 | |
| 34 | my $debug = 0; |
| 35 | sub debug { $debug = shift } |
| 36 | |
| 37 | sub limcheck($$$$){ |
| 38 | my( $val, $lo, $hi, $loc ) = @_; |
| 39 | if( $val < $lo || $hi < $val ){ |
| 40 | error "argument for $loc outside [$lo, $hi]: $val"; |
| 41 | $val = $hi; |
| 42 | } |
| 43 | return $val; |
| 44 | } |
| 45 | |
| 46 | # |
| 47 | # First define all the data conversion subs to which Asmdata will refer |
| 48 | # |
| 49 | |
| 50 | sub B::Asmdata::PUT_U8 { |
| 51 | my $arg = shift; |
| 52 | my $c = uncstring($arg); |
| 53 | if (defined($c)) { |
| 54 | if (length($c) != 1) { |
| 55 | error "argument for U8 is too long: $c"; |
| 56 | $c = substr($c, 0, 1); |
| 57 | } |
| 58 | } else { |
| 59 | $arg = limcheck( $arg, 0, 0xff, 'U8' ); |
| 60 | $c = chr($arg); |
| 61 | } |
| 62 | return $c; |
| 63 | } |
| 64 | |
| 65 | sub B::Asmdata::PUT_U16 { |
| 66 | my $arg = limcheck( $_[0], 0, 0xffff, 'U16' ); |
| 67 | pack("S", $arg); |
| 68 | } |
| 69 | sub B::Asmdata::PUT_U32 { |
| 70 | my $arg = limcheck( $_[0], 0, 0xffffffff, 'U32' ); |
| 71 | pack("L", $arg); |
| 72 | } |
| 73 | sub B::Asmdata::PUT_I32 { |
| 74 | my $arg = limcheck( $_[0], -0x80000000, 0x7fffffff, 'I32' ); |
| 75 | pack("l", $arg); |
| 76 | } |
| 77 | sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" looses precision and pack('d',...) |
| 78 | # may not even be portable between compilers |
| 79 | sub B::Asmdata::PUT_objindex { # could allow names here |
| 80 | my $arg = limcheck( $_[0], 0, 0xffffffff, '*index' ); |
| 81 | pack("L", $arg); |
| 82 | } |
| 83 | sub B::Asmdata::PUT_svindex { &B::Asmdata::PUT_objindex } |
| 84 | sub B::Asmdata::PUT_opindex { &B::Asmdata::PUT_objindex } |
| 85 | sub B::Asmdata::PUT_pvindex { &B::Asmdata::PUT_objindex } |
| 86 | |
| 87 | sub B::Asmdata::PUT_strconst { |
| 88 | my $arg = shift; |
| 89 | my $str = uncstring($arg); |
| 90 | if (!defined($str)) { |
| 91 | error "bad string constant: $arg"; |
| 92 | $str = ''; |
| 93 | } |
| 94 | if ($str =~ s/\0//g) { |
| 95 | error "string constant argument contains NUL: $arg"; |
| 96 | $str = ''; |
| 97 | } |
| 98 | return $str . "\0"; |
| 99 | } |
| 100 | |
| 101 | sub B::Asmdata::PUT_pvcontents { |
| 102 | my $arg = shift; |
| 103 | error "extraneous argument: $arg" if defined $arg; |
| 104 | return ""; |
| 105 | } |
| 106 | sub B::Asmdata::PUT_PV { |
| 107 | my $arg = shift; |
| 108 | my $str = uncstring($arg); |
| 109 | if( ! defined($str) ){ |
| 110 | error "bad string argument: $arg"; |
| 111 | $str = ''; |
| 112 | } |
| 113 | return pack("L", length($str)) . $str; |
| 114 | } |
| 115 | sub B::Asmdata::PUT_comment_t { |
| 116 | my $arg = shift; |
| 117 | $arg = uncstring($arg); |
| 118 | error "bad string argument: $arg" unless defined($arg); |
| 119 | if ($arg =~ s/\n//g) { |
| 120 | error "comment argument contains linefeed: $arg"; |
| 121 | } |
| 122 | return $arg . "\n"; |
| 123 | } |
| 124 | sub B::Asmdata::PUT_double { sprintf("%s\0", $_[0]) } # see PUT_NV above |
| 125 | sub B::Asmdata::PUT_none { |
| 126 | my $arg = shift; |
| 127 | error "extraneous argument: $arg" if defined $arg; |
| 128 | return ""; |
| 129 | } |
| 130 | sub B::Asmdata::PUT_op_tr_array { |
| 131 | my $arg = shift; |
| 132 | my @ary = split(/\s*,\s*/, $arg); |
| 133 | if (@ary != 256) { |
| 134 | error "wrong number of arguments to op_tr_array"; |
| 135 | @ary = (0) x 256; |
| 136 | } |
| 137 | return pack("S256", @ary); |
| 138 | } |
| 139 | # XXX Check this works |
| 140 | # Note: $arg >> 32 is a no-op on 32-bit systems |
| 141 | sub B::Asmdata::PUT_IV64 { |
| 142 | my $arg = shift; |
| 143 | return pack("LL", ($arg >> 16) >>16 , $arg & 0xffffffff); |
| 144 | } |
| 145 | |
| 146 | sub B::Asmdata::PUT_IV { |
| 147 | $Config{ivsize} == 4 ? &B::Asmdata::PUT_I32 : &B::Asmdata::PUT_IV64; |
| 148 | } |
| 149 | |
| 150 | my %unesc = (n => "\n", r => "\r", t => "\t", a => "\a", |
| 151 | b => "\b", f => "\f", v => "\013"); |
| 152 | |
| 153 | sub uncstring { |
| 154 | my $s = shift; |
| 155 | $s =~ s/^"// and $s =~ s/"$// or return undef; |
| 156 | $s =~ s/\\(\d\d\d|.)/length($1) == 3 ? chr(oct($1)) : ($unesc{$1}||$1)/eg; |
| 157 | return $s; |
| 158 | } |
| 159 | |
| 160 | sub strip_comments { |
| 161 | my $stmt = shift; |
| 162 | # Comments only allowed in instructions which don't take string arguments |
| 163 | # Treat string as a single line so .* eats \n characters. |
| 164 | $stmt =~ s{ |
| 165 | ^\s* # Ignore leading whitespace |
| 166 | ( |
| 167 | [^"]* # A double quote '"' indicates a string argument. If we |
| 168 | # find a double quote, the match fails and we strip nothing. |
| 169 | ) |
| 170 | \s*\# # Any amount of whitespace plus the comment marker... |
| 171 | .*$ # ...which carries on to end-of-string. |
| 172 | }{$1}sx; # Keep only the instruction and optional argument. |
| 173 | return $stmt; |
| 174 | } |
| 175 | |
| 176 | # create the ByteCode header: magic, archname, ByteLoader $VERSION, ivsize, |
| 177 | # ptrsize, byteorder |
| 178 | # nvtype is irrelevant (floats are stored as strings) |
| 179 | # byteorder is strconst not U32 because of varying size issues |
| 180 | |
| 181 | sub gen_header { |
| 182 | my $header = ""; |
| 183 | |
| 184 | $header .= B::Asmdata::PUT_U32(0x43424c50); # 'PLBC' |
| 185 | $header .= B::Asmdata::PUT_strconst('"' . $Config{archname}. '"'); |
| 186 | $header .= B::Asmdata::PUT_strconst(qq["$ByteLoader::VERSION"]); |
| 187 | $header .= B::Asmdata::PUT_U32($Config{ivsize}); |
| 188 | $header .= B::Asmdata::PUT_U32($Config{ptrsize}); |
| 189 | $header .= B::Asmdata::PUT_strconst(sprintf(qq["0x%s"], $Config{byteorder})); |
| 190 | |
| 191 | $header; |
| 192 | } |
| 193 | |
| 194 | sub parse_statement { |
| 195 | my $stmt = shift; |
| 196 | my ($insn, $arg) = $stmt =~ m{ |
| 197 | ^\s* # allow (but ignore) leading whitespace |
| 198 | (.*?) # Instruction continues up until... |
| 199 | (?: # ...an optional whitespace+argument group |
| 200 | \s+ # first whitespace. |
| 201 | (.*) # The argument is all the rest (newlines included). |
| 202 | )?$ # anchor at end-of-line |
| 203 | }sx; |
| 204 | if (defined($arg)) { |
| 205 | if ($arg =~ s/^0x(?=[0-9a-fA-F]+$)//) { |
| 206 | $arg = hex($arg); |
| 207 | } elsif ($arg =~ s/^0(?=[0-7]+$)//) { |
| 208 | $arg = oct($arg); |
| 209 | } elsif ($arg =~ /^pp_/) { |
| 210 | $arg =~ s/\s*$//; # strip trailing whitespace |
| 211 | my $opnum = $opnumber{$arg}; |
| 212 | if (defined($opnum)) { |
| 213 | $arg = $opnum; |
| 214 | } else { |
| 215 | error qq(No such op type "$arg"); |
| 216 | $arg = 0; |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | return ($insn, $arg); |
| 221 | } |
| 222 | |
| 223 | sub assemble_insn { |
| 224 | my ($insn, $arg) = @_; |
| 225 | my $data = $insn_data{$insn}; |
| 226 | if (defined($data)) { |
| 227 | my ($bytecode, $putsub) = @{$data}[0, 1]; |
| 228 | my $argcode = &$putsub($arg); |
| 229 | return chr($bytecode).$argcode; |
| 230 | } else { |
| 231 | error qq(no such instruction "$insn"); |
| 232 | return ""; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | sub assemble_fh { |
| 237 | my ($fh, $out) = @_; |
| 238 | my $line; |
| 239 | my $asm = newasm($out); |
| 240 | while ($line = <$fh>) { |
| 241 | assemble($line); |
| 242 | } |
| 243 | endasm(); |
| 244 | } |
| 245 | |
| 246 | sub newasm { |
| 247 | my($outsub) = @_; |
| 248 | |
| 249 | die "Invalid printing routine for B::Assembler\n" unless ref $outsub eq 'CODE'; |
| 250 | die <<EOD if ref $out; |
| 251 | Can't have multiple byteassembly sessions at once! |
| 252 | (perhaps you forgot an endasm()?) |
| 253 | EOD |
| 254 | |
| 255 | $linenum = $errors = 0; |
| 256 | $out = $outsub; |
| 257 | |
| 258 | $out->(gen_header()); |
| 259 | } |
| 260 | |
| 261 | sub endasm { |
| 262 | if ($errors) { |
| 263 | die "There were $errors assembly errors\n"; |
| 264 | } |
| 265 | $linenum = $errors = $out = 0; |
| 266 | } |
| 267 | |
| 268 | sub assemble { |
| 269 | my($line) = @_; |
| 270 | my ($insn, $arg); |
| 271 | $linenum++; |
| 272 | chomp $line; |
| 273 | if ($debug) { |
| 274 | my $quotedline = $line; |
| 275 | $quotedline =~ s/\\/\\\\/g; |
| 276 | $quotedline =~ s/"/\\"/g; |
| 277 | $out->(assemble_insn("comment", qq("$quotedline"))); |
| 278 | } |
| 279 | if( $line = strip_comments($line) ){ |
| 280 | ($insn, $arg) = parse_statement($line); |
| 281 | $out->(assemble_insn($insn, $arg)); |
| 282 | if ($debug) { |
| 283 | $out->(assemble_insn("nop", undef)); |
| 284 | } |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | 1; |
| 289 | |
| 290 | __END__ |
| 291 | |
| 292 | =head1 NAME |
| 293 | |
| 294 | B::Assembler - Assemble Perl bytecode |
| 295 | |
| 296 | =head1 SYNOPSIS |
| 297 | |
| 298 | use B::Assembler qw(newasm endasm assemble); |
| 299 | newasm(\&printsub); # sets up for assembly |
| 300 | assemble($buf); # assembles one line |
| 301 | endasm(); # closes down |
| 302 | |
| 303 | use B::Assembler qw(assemble_fh); |
| 304 | assemble_fh($fh, \&printsub); # assemble everything in $fh |
| 305 | |
| 306 | =head1 DESCRIPTION |
| 307 | |
| 308 | See F<ext/B/B/Assembler.pm>. |
| 309 | |
| 310 | =head1 AUTHORS |
| 311 | |
| 312 | Malcolm Beattie, C<mbeattie@sable.ox.ac.uk> |
| 313 | Per-statement interface by Benjamin Stuhl, C<sho_pi@hotmail.com> |
| 314 | |
| 315 | =cut |