Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | package Opcode; |
2 | ||
3 | use 5.006_001; | |
4 | ||
5 | use strict; | |
6 | ||
7 | our($VERSION, $XS_VERSION, @ISA, @EXPORT_OK); | |
8 | ||
9 | $VERSION = "1.06"; | |
10 | $XS_VERSION = "1.03"; | |
11 | ||
12 | use Carp; | |
13 | use Exporter (); | |
14 | use XSLoader (); | |
15 | ||
16 | BEGIN { | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT_OK = qw( | |
19 | opset ops_to_opset | |
20 | opset_to_ops opset_to_hex invert_opset | |
21 | empty_opset full_opset | |
22 | opdesc opcodes opmask define_optag | |
23 | opmask_add verify_opset opdump | |
24 | ); | |
25 | } | |
26 | ||
27 | sub opset (;@); | |
28 | sub opset_to_hex ($); | |
29 | sub opdump (;$); | |
30 | use subs @EXPORT_OK; | |
31 | ||
32 | XSLoader::load 'Opcode', $XS_VERSION; | |
33 | ||
34 | _init_optags(); | |
35 | ||
36 | sub ops_to_opset { opset @_ } # alias for old name | |
37 | ||
38 | sub opset_to_hex ($) { | |
39 | return "(invalid opset)" unless verify_opset($_[0]); | |
40 | unpack("h*",$_[0]); | |
41 | } | |
42 | ||
43 | sub opdump (;$) { | |
44 | my $pat = shift; | |
45 | # handy utility: perl -MOpcode=opdump -e 'opdump File' | |
46 | foreach(opset_to_ops(full_opset)) { | |
47 | my $op = sprintf " %12s %s\n", $_, opdesc($_); | |
48 | next if defined $pat and $op !~ m/$pat/i; | |
49 | print $op; | |
50 | } | |
51 | } | |
52 | ||
53 | ||
54 | ||
55 | sub _init_optags { | |
56 | my(%all, %seen); | |
57 | @all{opset_to_ops(full_opset)} = (); # keys only | |
58 | ||
59 | local($_); | |
60 | local($/) = "\n=cut"; # skip to optags definition section | |
61 | <DATA>; | |
62 | $/ = "\n="; # now read in 'pod section' chunks | |
63 | while(<DATA>) { | |
64 | next unless m/^item\s+(:\w+)/; | |
65 | my $tag = $1; | |
66 | ||
67 | # Split into lines, keep only indented lines | |
68 | my @lines = grep { m/^\s/ } split(/\n/); | |
69 | foreach (@lines) { s/--.*// } # delete comments | |
70 | my @ops = map { split ' ' } @lines; # get op words | |
71 | ||
72 | foreach(@ops) { | |
73 | warn "$tag - $_ already tagged in $seen{$_}\n" if $seen{$_}; | |
74 | $seen{$_} = $tag; | |
75 | delete $all{$_}; | |
76 | } | |
77 | # opset will croak on invalid names | |
78 | define_optag($tag, opset(@ops)); | |
79 | } | |
80 | close(DATA); | |
81 | warn "Untagged opnames: ".join(' ',keys %all)."\n" if %all; | |
82 | } | |
83 | ||
84 | ||
85 | 1; | |
86 | ||
87 | __DATA__ | |
88 | ||
89 | =head1 NAME | |
90 | ||
91 | Opcode - Disable named opcodes when compiling perl code | |
92 | ||
93 | =head1 SYNOPSIS | |
94 | ||
95 | use Opcode; | |
96 | ||
97 | ||
98 | =head1 DESCRIPTION | |
99 | ||
100 | Perl code is always compiled into an internal format before execution. | |
101 | ||
102 | Evaluating perl code (e.g. via "eval" or "do 'file'") causes | |
103 | the code to be compiled into an internal format and then, | |
104 | provided there was no error in the compilation, executed. | |
105 | The internal format is based on many distinct I<opcodes>. | |
106 | ||
107 | By default no opmask is in effect and any code can be compiled. | |
108 | ||
109 | The Opcode module allow you to define an I<operator mask> to be in | |
110 | effect when perl I<next> compiles any code. Attempting to compile code | |
111 | which contains a masked opcode will cause the compilation to fail | |
112 | with an error. The code will not be executed. | |
113 | ||
114 | =head1 NOTE | |
115 | ||
116 | The Opcode module is not usually used directly. See the ops pragma and | |
117 | Safe modules for more typical uses. | |
118 | ||
119 | =head1 WARNING | |
120 | ||
121 | The authors make B<no warranty>, implied or otherwise, about the | |
122 | suitability of this software for safety or security purposes. | |
123 | ||
124 | The authors shall not in any case be liable for special, incidental, | |
125 | consequential, indirect or other similar damages arising from the use | |
126 | of this software. | |
127 | ||
128 | Your mileage will vary. If in any doubt B<do not use it>. | |
129 | ||
130 | ||
131 | =head1 Operator Names and Operator Lists | |
132 | ||
133 | The canonical list of operator names is the contents of the array | |
134 | PL_op_name defined and initialised in file F<opcode.h> of the Perl | |
135 | source distribution (and installed into the perl library). | |
136 | ||
137 | Each operator has both a terse name (its opname) and a more verbose or | |
138 | recognisable descriptive name. The opdesc function can be used to | |
139 | return a list of descriptions for a list of operators. | |
140 | ||
141 | Many of the functions and methods listed below take a list of | |
142 | operators as parameters. Most operator lists can be made up of several | |
143 | types of element. Each element can be one of | |
144 | ||
145 | =over 8 | |
146 | ||
147 | =item an operator name (opname) | |
148 | ||
149 | Operator names are typically small lowercase words like enterloop, | |
150 | leaveloop, last, next, redo etc. Sometimes they are rather cryptic | |
151 | like gv2cv, i_ncmp and ftsvtx. | |
152 | ||
153 | =item an operator tag name (optag) | |
154 | ||
155 | Operator tags can be used to refer to groups (or sets) of operators. | |
156 | Tag names always begin with a colon. The Opcode module defines several | |
157 | optags and the user can define others using the define_optag function. | |
158 | ||
159 | =item a negated opname or optag | |
160 | ||
161 | An opname or optag can be prefixed with an exclamation mark, e.g., !mkdir. | |
162 | Negating an opname or optag means remove the corresponding ops from the | |
163 | accumulated set of ops at that point. | |
164 | ||
165 | =item an operator set (opset) | |
166 | ||
167 | An I<opset> as a binary string of approximately 44 bytes which holds a | |
168 | set or zero or more operators. | |
169 | ||
170 | The opset and opset_to_ops functions can be used to convert from | |
171 | a list of operators to an opset and I<vice versa>. | |
172 | ||
173 | Wherever a list of operators can be given you can use one or more opsets. | |
174 | See also Manipulating Opsets below. | |
175 | ||
176 | =back | |
177 | ||
178 | ||
179 | =head1 Opcode Functions | |
180 | ||
181 | The Opcode package contains functions for manipulating operator names | |
182 | tags and sets. All are available for export by the package. | |
183 | ||
184 | =over 8 | |
185 | ||
186 | =item opcodes | |
187 | ||
188 | In a scalar context opcodes returns the number of opcodes in this | |
189 | version of perl (around 350 for perl-5.7.0). | |
190 | ||
191 | In a list context it returns a list of all the operator names. | |
192 | (Not yet implemented, use @names = opset_to_ops(full_opset).) | |
193 | ||
194 | =item opset (OP, ...) | |
195 | ||
196 | Returns an opset containing the listed operators. | |
197 | ||
198 | =item opset_to_ops (OPSET) | |
199 | ||
200 | Returns a list of operator names corresponding to those operators in | |
201 | the set. | |
202 | ||
203 | =item opset_to_hex (OPSET) | |
204 | ||
205 | Returns a string representation of an opset. Can be handy for debugging. | |
206 | ||
207 | =item full_opset | |
208 | ||
209 | Returns an opset which includes all operators. | |
210 | ||
211 | =item empty_opset | |
212 | ||
213 | Returns an opset which contains no operators. | |
214 | ||
215 | =item invert_opset (OPSET) | |
216 | ||
217 | Returns an opset which is the inverse set of the one supplied. | |
218 | ||
219 | =item verify_opset (OPSET, ...) | |
220 | ||
221 | Returns true if the supplied opset looks like a valid opset (is the | |
222 | right length etc) otherwise it returns false. If an optional second | |
223 | parameter is true then verify_opset will croak on an invalid opset | |
224 | instead of returning false. | |
225 | ||
226 | Most of the other Opcode functions call verify_opset automatically | |
227 | and will croak if given an invalid opset. | |
228 | ||
229 | =item define_optag (OPTAG, OPSET) | |
230 | ||
231 | Define OPTAG as a symbolic name for OPSET. Optag names always start | |
232 | with a colon C<:>. | |
233 | ||
234 | The optag name used must not be defined already (define_optag will | |
235 | croak if it is already defined). Optag names are global to the perl | |
236 | process and optag definitions cannot be altered or deleted once | |
237 | defined. | |
238 | ||
239 | It is strongly recommended that applications using Opcode should use a | |
240 | leading capital letter on their tag names since lowercase names are | |
241 | reserved for use by the Opcode module. If using Opcode within a module | |
242 | you should prefix your tags names with the name of your module to | |
243 | ensure uniqueness and thus avoid clashes with other modules. | |
244 | ||
245 | =item opmask_add (OPSET) | |
246 | ||
247 | Adds the supplied opset to the current opmask. Note that there is | |
248 | currently I<no> mechanism for unmasking ops once they have been masked. | |
249 | This is intentional. | |
250 | ||
251 | =item opmask | |
252 | ||
253 | Returns an opset corresponding to the current opmask. | |
254 | ||
255 | =item opdesc (OP, ...) | |
256 | ||
257 | This takes a list of operator names and returns the corresponding list | |
258 | of operator descriptions. | |
259 | ||
260 | =item opdump (PAT) | |
261 | ||
262 | Dumps to STDOUT a two column list of op names and op descriptions. | |
263 | If an optional pattern is given then only lines which match the | |
264 | (case insensitive) pattern will be output. | |
265 | ||
266 | It's designed to be used as a handy command line utility: | |
267 | ||
268 | perl -MOpcode=opdump -e opdump | |
269 | perl -MOpcode=opdump -e 'opdump Eval' | |
270 | ||
271 | =back | |
272 | ||
273 | =head1 Manipulating Opsets | |
274 | ||
275 | Opsets may be manipulated using the perl bit vector operators & (and), | (or), | |
276 | ^ (xor) and ~ (negate/invert). | |
277 | ||
278 | However you should never rely on the numerical position of any opcode | |
279 | within the opset. In other words both sides of a bit vector operator | |
280 | should be opsets returned from Opcode functions. | |
281 | ||
282 | Also, since the number of opcodes in your current version of perl might | |
283 | not be an exact multiple of eight, there may be unused bits in the last | |
284 | byte of an upset. This should not cause any problems (Opcode functions | |
285 | ignore those extra bits) but it does mean that using the ~ operator | |
286 | will typically not produce the same 'physical' opset 'string' as the | |
287 | invert_opset function. | |
288 | ||
289 | ||
290 | =head1 TO DO (maybe) | |
291 | ||
292 | $bool = opset_eq($opset1, $opset2) true if opsets are logically eqiv | |
293 | ||
294 | $yes = opset_can($opset, @ops) true if $opset has all @ops set | |
295 | ||
296 | @diff = opset_diff($opset1, $opset2) => ('foo', '!bar', ...) | |
297 | ||
298 | =cut | |
299 | ||
300 | # the =cut above is used by _init_optags() to get here quickly | |
301 | ||
302 | =head1 Predefined Opcode Tags | |
303 | ||
304 | =over 5 | |
305 | ||
306 | =item :base_core | |
307 | ||
308 | null stub scalar pushmark wantarray const defined undef | |
309 | ||
310 | rv2sv sassign | |
311 | ||
312 | rv2av aassign aelem aelemfast aslice av2arylen | |
313 | ||
314 | rv2hv helem hslice each values keys exists delete | |
315 | ||
316 | preinc i_preinc predec i_predec postinc i_postinc postdec i_postdec | |
317 | int hex oct abs pow multiply i_multiply divide i_divide | |
318 | modulo i_modulo add i_add subtract i_subtract | |
319 | ||
320 | left_shift right_shift bit_and bit_xor bit_or negate i_negate | |
321 | not complement | |
322 | ||
323 | lt i_lt gt i_gt le i_le ge i_ge eq i_eq ne i_ne ncmp i_ncmp | |
324 | slt sgt sle sge seq sne scmp | |
325 | ||
326 | substr vec stringify study pos length index rindex ord chr | |
327 | ||
328 | ucfirst lcfirst uc lc quotemeta trans chop schop chomp schomp | |
329 | ||
330 | match split qr | |
331 | ||
332 | list lslice splice push pop shift unshift reverse | |
333 | ||
334 | cond_expr flip flop andassign orassign and or xor | |
335 | ||
336 | warn die lineseq nextstate scope enter leave setstate | |
337 | ||
338 | rv2cv anoncode prototype | |
339 | ||
340 | entersub leavesub leavesublv return method method_named -- XXX loops via recursion? | |
341 | ||
342 | leaveeval -- needed for Safe to operate, is safe without entereval | |
343 | ||
344 | =item :base_mem | |
345 | ||
346 | These memory related ops are not included in :base_core because they | |
347 | can easily be used to implement a resource attack (e.g., consume all | |
348 | available memory). | |
349 | ||
350 | concat repeat join range | |
351 | ||
352 | anonlist anonhash | |
353 | ||
354 | Note that despite the existence of this optag a memory resource attack | |
355 | may still be possible using only :base_core ops. | |
356 | ||
357 | Disabling these ops is a I<very> heavy handed way to attempt to prevent | |
358 | a memory resource attack. It's probable that a specific memory limit | |
359 | mechanism will be added to perl in the near future. | |
360 | ||
361 | =item :base_loop | |
362 | ||
363 | These loop ops are not included in :base_core because they can easily be | |
364 | used to implement a resource attack (e.g., consume all available CPU time). | |
365 | ||
366 | grepstart grepwhile | |
367 | mapstart mapwhile | |
368 | enteriter iter | |
369 | enterloop leaveloop unstack | |
370 | last next redo | |
371 | goto | |
372 | ||
373 | =item :base_io | |
374 | ||
375 | These ops enable I<filehandle> (rather than filename) based input and | |
376 | output. These are safe on the assumption that only pre-existing | |
377 | filehandles are available for use. To create new filehandles other ops | |
378 | such as open would need to be enabled. | |
379 | ||
380 | readline rcatline getc read | |
381 | ||
382 | formline enterwrite leavewrite | |
383 | ||
384 | print sysread syswrite send recv | |
385 | ||
386 | eof tell seek sysseek | |
387 | ||
388 | readdir telldir seekdir rewinddir | |
389 | ||
390 | =item :base_orig | |
391 | ||
392 | These are a hotchpotch of opcodes still waiting to be considered | |
393 | ||
394 | gvsv gv gelem | |
395 | ||
396 | padsv padav padhv padany | |
397 | ||
398 | rv2gv refgen srefgen ref | |
399 | ||
400 | bless -- could be used to change ownership of objects (reblessing) | |
401 | ||
402 | pushre regcmaybe regcreset regcomp subst substcont | |
403 | ||
404 | sprintf prtf -- can core dump | |
405 | ||
406 | crypt | |
407 | ||
408 | tie untie | |
409 | ||
410 | dbmopen dbmclose | |
411 | sselect select | |
412 | pipe_op sockpair | |
413 | ||
414 | getppid getpgrp setpgrp getpriority setpriority localtime gmtime | |
415 | ||
416 | entertry leavetry -- can be used to 'hide' fatal errors | |
417 | ||
418 | custom -- where should this go | |
419 | ||
420 | =item :base_math | |
421 | ||
422 | These ops are not included in :base_core because of the risk of them being | |
423 | used to generate floating point exceptions (which would have to be caught | |
424 | using a $SIG{FPE} handler). | |
425 | ||
426 | atan2 sin cos exp log sqrt | |
427 | ||
428 | These ops are not included in :base_core because they have an effect | |
429 | beyond the scope of the compartment. | |
430 | ||
431 | rand srand | |
432 | ||
433 | =item :base_thread | |
434 | ||
435 | These ops are related to multi-threading. | |
436 | ||
437 | lock threadsv | |
438 | ||
439 | =item :default | |
440 | ||
441 | A handy tag name for a I<reasonable> default set of ops. (The current ops | |
442 | allowed are unstable while development continues. It will change.) | |
443 | ||
444 | :base_core :base_mem :base_loop :base_io :base_orig :base_thread | |
445 | ||
446 | If safety matters to you (and why else would you be using the Opcode module?) | |
447 | then you should not rely on the definition of this, or indeed any other, optag! | |
448 | ||
449 | ||
450 | =item :filesys_read | |
451 | ||
452 | stat lstat readlink | |
453 | ||
454 | ftatime ftblk ftchr ftctime ftdir fteexec fteowned fteread | |
455 | ftewrite ftfile ftis ftlink ftmtime ftpipe ftrexec ftrowned | |
456 | ftrread ftsgid ftsize ftsock ftsuid fttty ftzero ftrwrite ftsvtx | |
457 | ||
458 | fttext ftbinary | |
459 | ||
460 | fileno | |
461 | ||
462 | =item :sys_db | |
463 | ||
464 | ghbyname ghbyaddr ghostent shostent ehostent -- hosts | |
465 | gnbyname gnbyaddr gnetent snetent enetent -- networks | |
466 | gpbyname gpbynumber gprotoent sprotoent eprotoent -- protocols | |
467 | gsbyname gsbyport gservent sservent eservent -- services | |
468 | ||
469 | gpwnam gpwuid gpwent spwent epwent getlogin -- users | |
470 | ggrnam ggrgid ggrent sgrent egrent -- groups | |
471 | ||
472 | =item :browse | |
473 | ||
474 | A handy tag name for a I<reasonable> default set of ops beyond the | |
475 | :default optag. Like :default (and indeed all the other optags) its | |
476 | current definition is unstable while development continues. It will change. | |
477 | ||
478 | The :browse tag represents the next step beyond :default. It it a | |
479 | superset of the :default ops and adds :filesys_read the :sys_db. | |
480 | The intent being that scripts can access more (possibly sensitive) | |
481 | information about your system but not be able to change it. | |
482 | ||
483 | :default :filesys_read :sys_db | |
484 | ||
485 | =item :filesys_open | |
486 | ||
487 | sysopen open close | |
488 | umask binmode | |
489 | ||
490 | open_dir closedir -- other dir ops are in :base_io | |
491 | ||
492 | =item :filesys_write | |
493 | ||
494 | link unlink rename symlink truncate | |
495 | ||
496 | mkdir rmdir | |
497 | ||
498 | utime chmod chown | |
499 | ||
500 | fcntl -- not strictly filesys related, but possibly as dangerous? | |
501 | ||
502 | =item :subprocess | |
503 | ||
504 | backtick system | |
505 | ||
506 | fork | |
507 | ||
508 | wait waitpid | |
509 | ||
510 | glob -- access to Cshell via <`rm *`> | |
511 | ||
512 | =item :ownprocess | |
513 | ||
514 | exec exit kill | |
515 | ||
516 | time tms -- could be used for timing attacks (paranoid?) | |
517 | ||
518 | =item :others | |
519 | ||
520 | This tag holds groups of assorted specialist opcodes that don't warrant | |
521 | having optags defined for them. | |
522 | ||
523 | SystemV Interprocess Communications: | |
524 | ||
525 | msgctl msgget msgrcv msgsnd | |
526 | ||
527 | semctl semget semop | |
528 | ||
529 | shmctl shmget shmread shmwrite | |
530 | ||
531 | =item :still_to_be_decided | |
532 | ||
533 | chdir | |
534 | flock ioctl | |
535 | ||
536 | socket getpeername ssockopt | |
537 | bind connect listen accept shutdown gsockopt getsockname | |
538 | ||
539 | sleep alarm -- changes global timer state and signal handling | |
540 | sort -- assorted problems including core dumps | |
541 | tied -- can be used to access object implementing a tie | |
542 | pack unpack -- can be used to create/use memory pointers | |
543 | ||
544 | entereval -- can be used to hide code from initial compile | |
545 | require dofile | |
546 | ||
547 | caller -- get info about calling environment and args | |
548 | ||
549 | reset | |
550 | ||
551 | dbstate -- perl -d version of nextstate(ment) opcode | |
552 | ||
553 | =item :dangerous | |
554 | ||
555 | This tag is simply a bucket for opcodes that are unlikely to be used via | |
556 | a tag name but need to be tagged for completeness and documentation. | |
557 | ||
558 | syscall dump chroot | |
559 | ||
560 | ||
561 | =back | |
562 | ||
563 | =head1 SEE ALSO | |
564 | ||
565 | ops(3) -- perl pragma interface to Opcode module. | |
566 | ||
567 | Safe(3) -- Opcode and namespace limited execution compartments | |
568 | ||
569 | =head1 AUTHORS | |
570 | ||
571 | Originally designed and implemented by Malcolm Beattie, | |
572 | mbeattie@sable.ox.ac.uk as part of Safe version 1. | |
573 | ||
574 | Split out from Safe module version 1, named opcode tags and other | |
575 | changes added by Tim Bunce. | |
576 | ||
577 | =cut | |
578 |