Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLFILTER 1" | |
132 | .TH PERLFILTER 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlfilter \- Source Filters | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | This article is about a little-known feature of Perl called | |
138 | \&\fIsource filters\fR. Source filters alter the program text of a module | |
139 | before Perl sees it, much as a C preprocessor alters the source text of | |
140 | a C program before the compiler sees it. This article tells you more | |
141 | about what source filters are, how they work, and how to write your | |
142 | own. | |
143 | .PP | |
144 | The original purpose of source filters was to let you encrypt your | |
145 | program source to prevent casual piracy. This isn't all they can do, as | |
146 | you'll soon learn. But first, the basics. | |
147 | .SH "CONCEPTS" | |
148 | .IX Header "CONCEPTS" | |
149 | Before the Perl interpreter can execute a Perl script, it must first | |
150 | read it from a file into memory for parsing and compilation. If that | |
151 | script itself includes other scripts with a \f(CW\*(C`use\*(C'\fR or \f(CW\*(C`require\*(C'\fR | |
152 | statement, then each of those scripts will have to be read from their | |
153 | respective files as well. | |
154 | .PP | |
155 | Now think of each logical connection between the Perl parser and an | |
156 | individual file as a \fIsource stream\fR. A source stream is created when | |
157 | the Perl parser opens a file, it continues to exist as the source code | |
158 | is read into memory, and it is destroyed when Perl is finished parsing | |
159 | the file. If the parser encounters a \f(CW\*(C`require\*(C'\fR or \f(CW\*(C`use\*(C'\fR statement in | |
160 | a source stream, a new and distinct stream is created just for that | |
161 | file. | |
162 | .PP | |
163 | The diagram below represents a single source stream, with the flow of | |
164 | source from a Perl script file on the left into the Perl parser on the | |
165 | right. This is how Perl normally operates. | |
166 | .PP | |
167 | .Vb 1 | |
168 | \& file -------> parser | |
169 | .Ve | |
170 | .PP | |
171 | There are two important points to remember: | |
172 | .IP "1." 5 | |
173 | Although there can be any number of source streams in existence at any | |
174 | given time, only one will be active. | |
175 | .IP "2." 5 | |
176 | Every source stream is associated with only one file. | |
177 | .PP | |
178 | A source filter is a special kind of Perl module that intercepts and | |
179 | modifies a source stream before it reaches the parser. A source filter | |
180 | changes our diagram like this: | |
181 | .PP | |
182 | .Vb 1 | |
183 | \& file ----> filter ----> parser | |
184 | .Ve | |
185 | .PP | |
186 | If that doesn't make much sense, consider the analogy of a command | |
187 | pipeline. Say you have a shell script stored in the compressed file | |
188 | \&\fItrial.gz\fR. The simple pipeline command below runs the script without | |
189 | needing to create a temporary file to hold the uncompressed file. | |
190 | .PP | |
191 | .Vb 1 | |
192 | \& gunzip -c trial.gz | sh | |
193 | .Ve | |
194 | .PP | |
195 | In this case, the data flow from the pipeline can be represented as follows: | |
196 | .PP | |
197 | .Vb 1 | |
198 | \& trial.gz ----> gunzip ----> sh | |
199 | .Ve | |
200 | .PP | |
201 | With source filters, you can store the text of your script compressed and use a source filter to uncompress it for Perl's parser: | |
202 | .PP | |
203 | .Vb 2 | |
204 | \& compressed gunzip | |
205 | \& Perl program ---> source filter ---> parser | |
206 | .Ve | |
207 | .SH "USING FILTERS" | |
208 | .IX Header "USING FILTERS" | |
209 | So how do you use a source filter in a Perl script? Above, I said that | |
210 | a source filter is just a special kind of module. Like all Perl | |
211 | modules, a source filter is invoked with a use statement. | |
212 | .PP | |
213 | Say you want to pass your Perl source through the C preprocessor before | |
214 | execution. You could use the existing \f(CW\*(C`\-P\*(C'\fR command line option to do | |
215 | this, but as it happens, the source filters distribution comes with a C | |
216 | preprocessor filter module called Filter::cpp. Let's use that instead. | |
217 | .PP | |
218 | Below is an example program, \f(CW\*(C`cpp_test\*(C'\fR, which makes use of this filter. | |
219 | Line numbers have been added to allow specific lines to be referenced | |
220 | easily. | |
221 | .PP | |
222 | .Vb 4 | |
223 | \& 1: use Filter::cpp; | |
224 | \& 2: #define TRUE 1 | |
225 | \& 3: $a = TRUE; | |
226 | \& 4: print "a = $a\en"; | |
227 | .Ve | |
228 | .PP | |
229 | When you execute this script, Perl creates a source stream for the | |
230 | file. Before the parser processes any of the lines from the file, the | |
231 | source stream looks like this: | |
232 | .PP | |
233 | .Vb 1 | |
234 | \& cpp_test ---------> parser | |
235 | .Ve | |
236 | .PP | |
237 | Line 1, \f(CW\*(C`use Filter::cpp\*(C'\fR, includes and installs the \f(CW\*(C`cpp\*(C'\fR filter | |
238 | module. All source filters work this way. The use statement is compiled | |
239 | and executed at compile time, before any more of the file is read, and | |
240 | it attaches the cpp filter to the source stream behind the scenes. Now | |
241 | the data flow looks like this: | |
242 | .PP | |
243 | .Vb 1 | |
244 | \& cpp_test ----> cpp filter ----> parser | |
245 | .Ve | |
246 | .PP | |
247 | As the parser reads the second and subsequent lines from the source | |
248 | stream, it feeds those lines through the \f(CW\*(C`cpp\*(C'\fR source filter before | |
249 | processing them. The \f(CW\*(C`cpp\*(C'\fR filter simply passes each line through the | |
250 | real C preprocessor. The output from the C preprocessor is then | |
251 | inserted back into the source stream by the filter. | |
252 | .PP | |
253 | .Vb 5 | |
254 | \& .-> cpp --. | |
255 | \& | | | |
256 | \& | | | |
257 | \& | <-' | |
258 | \& cpp_test ----> cpp filter ----> parser | |
259 | .Ve | |
260 | .PP | |
261 | The parser then sees the following code: | |
262 | .PP | |
263 | .Vb 3 | |
264 | \& use Filter::cpp; | |
265 | \& $a = 1; | |
266 | \& print "a = $a\en"; | |
267 | .Ve | |
268 | .PP | |
269 | Let's consider what happens when the filtered code includes another | |
270 | module with use: | |
271 | .PP | |
272 | .Vb 5 | |
273 | \& 1: use Filter::cpp; | |
274 | \& 2: #define TRUE 1 | |
275 | \& 3: use Fred; | |
276 | \& 4: $a = TRUE; | |
277 | \& 5: print "a = $a\en"; | |
278 | .Ve | |
279 | .PP | |
280 | The \f(CW\*(C`cpp\*(C'\fR filter does not apply to the text of the Fred module, only | |
281 | to the text of the file that used it (\f(CW\*(C`cpp_test\*(C'\fR). Although the use | |
282 | statement on line 3 will pass through the cpp filter, the module that | |
283 | gets included (\f(CW\*(C`Fred\*(C'\fR) will not. The source streams look like this | |
284 | after line 3 has been parsed and before line 4 is parsed: | |
285 | .PP | |
286 | .Vb 1 | |
287 | \& cpp_test ---> cpp filter ---> parser (INACTIVE) | |
288 | .Ve | |
289 | .PP | |
290 | .Vb 1 | |
291 | \& Fred.pm ----> parser | |
292 | .Ve | |
293 | .PP | |
294 | As you can see, a new stream has been created for reading the source | |
295 | from \f(CW\*(C`Fred.pm\*(C'\fR. This stream will remain active until all of \f(CW\*(C`Fred.pm\*(C'\fR | |
296 | has been parsed. The source stream for \f(CW\*(C`cpp_test\*(C'\fR will still exist, | |
297 | but is inactive. Once the parser has finished reading Fred.pm, the | |
298 | source stream associated with it will be destroyed. The source stream | |
299 | for \f(CW\*(C`cpp_test\*(C'\fR then becomes active again and the parser reads line 4 | |
300 | and subsequent lines from \f(CW\*(C`cpp_test\*(C'\fR. | |
301 | .PP | |
302 | You can use more than one source filter on a single file. Similarly, | |
303 | you can reuse the same filter in as many files as you like. | |
304 | .PP | |
305 | For example, if you have a uuencoded and compressed source file, it is | |
306 | possible to stack a uudecode filter and an uncompression filter like | |
307 | this: | |
308 | .PP | |
309 | .Vb 4 | |
310 | \& use Filter::uudecode; use Filter::uncompress; | |
311 | \& M'XL(".H<US4''V9I;F%L')Q;>7/;1I;_>_I3=&E=%:F*I"T?22Q/ | |
312 | \& M6]9*<IQCO*XFT"0[PL%%'Y+IG?WN^ZYN-$'J.[.JE$,20/?K=_[> | |
313 | \& ... | |
314 | .Ve | |
315 | .PP | |
316 | Once the first line has been processed, the flow will look like this: | |
317 | .PP | |
318 | .Vb 2 | |
319 | \& file ---> uudecode ---> uncompress ---> parser | |
320 | \& filter filter | |
321 | .Ve | |
322 | .PP | |
323 | Data flows through filters in the same order they appear in the source | |
324 | file. The uudecode filter appeared before the uncompress filter, so the | |
325 | source file will be uudecoded before it's uncompressed. | |
326 | .SH "WRITING A SOURCE FILTER" | |
327 | .IX Header "WRITING A SOURCE FILTER" | |
328 | There are three ways to write your own source filter. You can write it | |
329 | in C, use an external program as a filter, or write the filter in Perl. | |
330 | I won't cover the first two in any great detail, so I'll get them out | |
331 | of the way first. Writing the filter in Perl is most convenient, so | |
332 | I'll devote the most space to it. | |
333 | .SH "WRITING A SOURCE FILTER IN C" | |
334 | .IX Header "WRITING A SOURCE FILTER IN C" | |
335 | The first of the three available techniques is to write the filter | |
336 | completely in C. The external module you create interfaces directly | |
337 | with the source filter hooks provided by Perl. | |
338 | .PP | |
339 | The advantage of this technique is that you have complete control over | |
340 | the implementation of your filter. The big disadvantage is the | |
341 | increased complexity required to write the filter \- not only do you | |
342 | need to understand the source filter hooks, but you also need a | |
343 | reasonable knowledge of Perl guts. One of the few times it is worth | |
344 | going to this trouble is when writing a source scrambler. The | |
345 | \&\f(CW\*(C`decrypt\*(C'\fR filter (which unscrambles the source before Perl parses it) | |
346 | included with the source filter distribution is an example of a C | |
347 | source filter (see Decryption Filters, below). | |
348 | .IP "\fBDecryption Filters\fR" 5 | |
349 | .IX Item "Decryption Filters" | |
350 | All decryption filters work on the principle of \*(L"security through | |
351 | obscurity.\*(R" Regardless of how well you write a decryption filter and | |
352 | how strong your encryption algorithm, anyone determined enough can | |
353 | retrieve the original source code. The reason is quite simple \- once | |
354 | the decryption filter has decrypted the source back to its original | |
355 | form, fragments of it will be stored in the computer's memory as Perl | |
356 | parses it. The source might only be in memory for a short period of | |
357 | time, but anyone possessing a debugger, skill, and lots of patience can | |
358 | eventually reconstruct your program. | |
359 | .Sp | |
360 | That said, there are a number of steps that can be taken to make life | |
361 | difficult for the potential cracker. The most important: Write your | |
362 | decryption filter in C and statically link the decryption module into | |
363 | the Perl binary. For further tips to make life difficult for the | |
364 | potential cracker, see the file \fIdecrypt.pm\fR in the source filters | |
365 | module. | |
366 | .SH "CREATING A SOURCE FILTER AS A SEPARATE EXECUTABLE" | |
367 | .IX Header "CREATING A SOURCE FILTER AS A SEPARATE EXECUTABLE" | |
368 | An alternative to writing the filter in C is to create a separate | |
369 | executable in the language of your choice. The separate executable | |
370 | reads from standard input, does whatever processing is necessary, and | |
371 | writes the filtered data to standard output. \f(CW\*(C`Filter:cpp\*(C'\fR is an | |
372 | example of a source filter implemented as a separate executable \- the | |
373 | executable is the C preprocessor bundled with your C compiler. | |
374 | .PP | |
375 | The source filter distribution includes two modules that simplify this | |
376 | task: \f(CW\*(C`Filter::exec\*(C'\fR and \f(CW\*(C`Filter::sh\*(C'\fR. Both allow you to run any | |
377 | external executable. Both use a coprocess to control the flow of data | |
378 | into and out of the external executable. (For details on coprocesses, | |
379 | see Stephens, W.R. \*(L"Advanced Programming in the \s-1UNIX\s0 Environment.\*(R" | |
380 | Addison\-Wesley, \s-1ISBN\s0 0\-210\-56317\-7, pages 441\-445.) The difference | |
381 | between them is that \f(CW\*(C`Filter::exec\*(C'\fR spawns the external command | |
382 | directly, while \f(CW\*(C`Filter::sh\*(C'\fR spawns a shell to execute the external | |
383 | command. (Unix uses the Bourne shell; \s-1NT\s0 uses the cmd shell.) Spawning | |
384 | a shell allows you to make use of the shell metacharacters and | |
385 | redirection facilities. | |
386 | .PP | |
387 | Here is an example script that uses \f(CW\*(C`Filter::sh\*(C'\fR: | |
388 | .PP | |
389 | .Vb 3 | |
390 | \& use Filter::sh 'tr XYZ PQR'; | |
391 | \& $a = 1; | |
392 | \& print "XYZ a = $a\en"; | |
393 | .Ve | |
394 | .PP | |
395 | The output you'll get when the script is executed: | |
396 | .PP | |
397 | .Vb 1 | |
398 | \& PQR a = 1 | |
399 | .Ve | |
400 | .PP | |
401 | Writing a source filter as a separate executable works fine, but a | |
402 | small performance penalty is incurred. For example, if you execute the | |
403 | small example above, a separate subprocess will be created to run the | |
404 | Unix \f(CW\*(C`tr\*(C'\fR command. Each use of the filter requires its own subprocess. | |
405 | If creating subprocesses is expensive on your system, you might want to | |
406 | consider one of the other options for creating source filters. | |
407 | .SH "WRITING A SOURCE FILTER IN PERL" | |
408 | .IX Header "WRITING A SOURCE FILTER IN PERL" | |
409 | The easiest and most portable option available for creating your own | |
410 | source filter is to write it completely in Perl. To distinguish this | |
411 | from the previous two techniques, I'll call it a Perl source filter. | |
412 | .PP | |
413 | To help understand how to write a Perl source filter we need an example | |
414 | to study. Here is a complete source filter that performs rot13 | |
415 | decoding. (Rot13 is a very simple encryption scheme used in Usenet | |
416 | postings to hide the contents of offensive posts. It moves every letter | |
417 | forward thirteen places, so that A becomes N, B becomes O, and Z | |
418 | becomes M.) | |
419 | .PP | |
420 | .Vb 1 | |
421 | \& package Rot13; | |
422 | .Ve | |
423 | .PP | |
424 | .Vb 1 | |
425 | \& use Filter::Util::Call; | |
426 | .Ve | |
427 | .PP | |
428 | .Vb 5 | |
429 | \& sub import { | |
430 | \& my ($type) = @_; | |
431 | \& my ($ref) = []; | |
432 | \& filter_add(bless $ref); | |
433 | \& } | |
434 | .Ve | |
435 | .PP | |
436 | .Vb 3 | |
437 | \& sub filter { | |
438 | \& my ($self) = @_; | |
439 | \& my ($status); | |
440 | .Ve | |
441 | .PP | |
442 | .Vb 4 | |
443 | \& tr/n-za-mN-ZA-M/a-zA-Z/ | |
444 | \& if ($status = filter_read()) > 0; | |
445 | \& $status; | |
446 | \& } | |
447 | .Ve | |
448 | .PP | |
449 | .Vb 1 | |
450 | \& 1; | |
451 | .Ve | |
452 | .PP | |
453 | All Perl source filters are implemented as Perl classes and have the | |
454 | same basic structure as the example above. | |
455 | .PP | |
456 | First, we include the \f(CW\*(C`Filter::Util::Call\*(C'\fR module, which exports a | |
457 | number of functions into your filter's namespace. The filter shown | |
458 | above uses two of these functions, \f(CW\*(C`filter_add()\*(C'\fR and | |
459 | \&\f(CW\*(C`filter_read()\*(C'\fR. | |
460 | .PP | |
461 | Next, we create the filter object and associate it with the source | |
462 | stream by defining the \f(CW\*(C`import\*(C'\fR function. If you know Perl well | |
463 | enough, you know that \f(CW\*(C`import\*(C'\fR is called automatically every time a | |
464 | module is included with a use statement. This makes \f(CW\*(C`import\*(C'\fR the ideal | |
465 | place to both create and install a filter object. | |
466 | .PP | |
467 | In the example filter, the object (\f(CW$ref\fR) is blessed just like any | |
468 | other Perl object. Our example uses an anonymous array, but this isn't | |
469 | a requirement. Because this example doesn't need to store any context | |
470 | information, we could have used a scalar or hash reference just as | |
471 | well. The next section demonstrates context data. | |
472 | .PP | |
473 | The association between the filter object and the source stream is made | |
474 | with the \f(CW\*(C`filter_add()\*(C'\fR function. This takes a filter object as a | |
475 | parameter (\f(CW$ref\fR in this case) and installs it in the source stream. | |
476 | .PP | |
477 | Finally, there is the code that actually does the filtering. For this | |
478 | type of Perl source filter, all the filtering is done in a method | |
479 | called \f(CW\*(C`filter()\*(C'\fR. (It is also possible to write a Perl source filter | |
480 | using a closure. See the \f(CW\*(C`Filter::Util::Call\*(C'\fR manual page for more | |
481 | details.) It's called every time the Perl parser needs another line of | |
482 | source to process. The \f(CW\*(C`filter()\*(C'\fR method, in turn, reads lines from | |
483 | the source stream using the \f(CW\*(C`filter_read()\*(C'\fR function. | |
484 | .PP | |
485 | If a line was available from the source stream, \f(CW\*(C`filter_read()\*(C'\fR | |
486 | returns a status value greater than zero and appends the line to \f(CW$_\fR. | |
487 | A status value of zero indicates end\-of\-file, less than zero means an | |
488 | error. The filter function itself is expected to return its status in | |
489 | the same way, and put the filtered line it wants written to the source | |
490 | stream in \f(CW$_\fR. The use of \f(CW$_\fR accounts for the brevity of most Perl | |
491 | source filters. | |
492 | .PP | |
493 | In order to make use of the rot13 filter we need some way of encoding | |
494 | the source file in rot13 format. The script below, \f(CW\*(C`mkrot13\*(C'\fR, does | |
495 | just that. | |
496 | .PP | |
497 | .Vb 5 | |
498 | \& die "usage mkrot13 filename\en" unless @ARGV; | |
499 | \& my $in = $ARGV[0]; | |
500 | \& my $out = "$in.tmp"; | |
501 | \& open(IN, "<$in") or die "Cannot open file $in: $!\en"; | |
502 | \& open(OUT, ">$out") or die "Cannot open file $out: $!\en"; | |
503 | .Ve | |
504 | .PP | |
505 | .Vb 5 | |
506 | \& print OUT "use Rot13;\en"; | |
507 | \& while (<IN>) { | |
508 | \& tr/a-zA-Z/n-za-mN-ZA-M/; | |
509 | \& print OUT; | |
510 | \& } | |
511 | .Ve | |
512 | .PP | |
513 | .Vb 4 | |
514 | \& close IN; | |
515 | \& close OUT; | |
516 | \& unlink $in; | |
517 | \& rename $out, $in; | |
518 | .Ve | |
519 | .PP | |
520 | If we encrypt this with \f(CW\*(C`mkrot13\*(C'\fR: | |
521 | .PP | |
522 | .Vb 1 | |
523 | \& print " hello fred \en"; | |
524 | .Ve | |
525 | .PP | |
526 | the result will be this: | |
527 | .PP | |
528 | .Vb 2 | |
529 | \& use Rot13; | |
530 | \& cevag "uryyb serq\ea"; | |
531 | .Ve | |
532 | .PP | |
533 | Running it produces this output: | |
534 | .PP | |
535 | .Vb 1 | |
536 | \& hello fred | |
537 | .Ve | |
538 | .SH "USING CONTEXT: THE DEBUG FILTER" | |
539 | .IX Header "USING CONTEXT: THE DEBUG FILTER" | |
540 | The rot13 example was a trivial example. Here's another demonstration | |
541 | that shows off a few more features. | |
542 | .PP | |
543 | Say you wanted to include a lot of debugging code in your Perl script | |
544 | during development, but you didn't want it available in the released | |
545 | product. Source filters offer a solution. In order to keep the example | |
546 | simple, let's say you wanted the debugging output to be controlled by | |
547 | an environment variable, \f(CW\*(C`DEBUG\*(C'\fR. Debugging code is enabled if the | |
548 | variable exists, otherwise it is disabled. | |
549 | .PP | |
550 | Two special marker lines will bracket debugging code, like this: | |
551 | .PP | |
552 | .Vb 5 | |
553 | \& ## DEBUG_BEGIN | |
554 | \& if ($year > 1999) { | |
555 | \& warn "Debug: millennium bug in year $year\en"; | |
556 | \& } | |
557 | \& ## DEBUG_END | |
558 | .Ve | |
559 | .PP | |
560 | When the \f(CW\*(C`DEBUG\*(C'\fR environment variable exists, the filter ensures that | |
561 | Perl parses only the code between the \f(CW\*(C`DEBUG_BEGIN\*(C'\fR and \f(CW\*(C`DEBUG_END\*(C'\fR | |
562 | markers. That means that when \f(CW\*(C`DEBUG\*(C'\fR does exist, the code above | |
563 | should be passed through the filter unchanged. The marker lines can | |
564 | also be passed through as\-is, because the Perl parser will see them as | |
565 | comment lines. When \f(CW\*(C`DEBUG\*(C'\fR isn't set, we need a way to disable the | |
566 | debug code. A simple way to achieve that is to convert the lines | |
567 | between the two markers into comments: | |
568 | .PP | |
569 | .Vb 5 | |
570 | \& ## DEBUG_BEGIN | |
571 | \& #if ($year > 1999) { | |
572 | \& # warn "Debug: millennium bug in year $year\en"; | |
573 | \& #} | |
574 | \& ## DEBUG_END | |
575 | .Ve | |
576 | .PP | |
577 | Here is the complete Debug filter: | |
578 | .PP | |
579 | .Vb 1 | |
580 | \& package Debug; | |
581 | .Ve | |
582 | .PP | |
583 | .Vb 3 | |
584 | \& use strict; | |
585 | \& use warnings; | |
586 | \& use Filter::Util::Call; | |
587 | .Ve | |
588 | .PP | |
589 | .Vb 2 | |
590 | \& use constant TRUE => 1; | |
591 | \& use constant FALSE => 0; | |
592 | .Ve | |
593 | .PP | |
594 | .Vb 11 | |
595 | \& sub import { | |
596 | \& my ($type) = @_; | |
597 | \& my (%context) = ( | |
598 | \& Enabled => defined $ENV{DEBUG}, | |
599 | \& InTraceBlock => FALSE, | |
600 | \& Filename => (caller)[1], | |
601 | \& LineNo => 0, | |
602 | \& LastBegin => 0, | |
603 | \& ); | |
604 | \& filter_add(bless \e%context); | |
605 | \& } | |
606 | .Ve | |
607 | .PP | |
608 | .Vb 6 | |
609 | \& sub Die { | |
610 | \& my ($self) = shift; | |
611 | \& my ($message) = shift; | |
612 | \& my ($line_no) = shift || $self->{LastBegin}; | |
613 | \& die "$message at $self->{Filename} line $line_no.\en" | |
614 | \& } | |
615 | .Ve | |
616 | .PP | |
617 | .Vb 5 | |
618 | \& sub filter { | |
619 | \& my ($self) = @_; | |
620 | \& my ($status); | |
621 | \& $status = filter_read(); | |
622 | \& ++ $self->{LineNo}; | |
623 | .Ve | |
624 | .PP | |
625 | .Vb 6 | |
626 | \& # deal with EOF/error first | |
627 | \& if ($status <= 0) { | |
628 | \& $self->Die("DEBUG_BEGIN has no DEBUG_END") | |
629 | \& if $self->{InTraceBlock}; | |
630 | \& return $status; | |
631 | \& } | |
632 | .Ve | |
633 | .PP | |
634 | .Vb 6 | |
635 | \& if ($self->{InTraceBlock}) { | |
636 | \& if (/^\es*##\es*DEBUG_BEGIN/ ) { | |
637 | \& $self->Die("Nested DEBUG_BEGIN", $self->{LineNo}) | |
638 | \& } elsif (/^\es*##\es*DEBUG_END/) { | |
639 | \& $self->{InTraceBlock} = FALSE; | |
640 | \& } | |
641 | .Ve | |
642 | .PP | |
643 | .Vb 10 | |
644 | \& # comment out the debug lines when the filter is disabled | |
645 | \& s/^/#/ if ! $self->{Enabled}; | |
646 | \& } elsif ( /^\es*##\es*DEBUG_BEGIN/ ) { | |
647 | \& $self->{InTraceBlock} = TRUE; | |
648 | \& $self->{LastBegin} = $self->{LineNo}; | |
649 | \& } elsif ( /^\es*##\es*DEBUG_END/ ) { | |
650 | \& $self->Die("DEBUG_END has no DEBUG_BEGIN", $self->{LineNo}); | |
651 | \& } | |
652 | \& return $status; | |
653 | \& } | |
654 | .Ve | |
655 | .PP | |
656 | .Vb 1 | |
657 | \& 1; | |
658 | .Ve | |
659 | .PP | |
660 | The big difference between this filter and the previous example is the | |
661 | use of context data in the filter object. The filter object is based on | |
662 | a hash reference, and is used to keep various pieces of context | |
663 | information between calls to the filter function. All but two of the | |
664 | hash fields are used for error reporting. The first of those two, | |
665 | Enabled, is used by the filter to determine whether the debugging code | |
666 | should be given to the Perl parser. The second, InTraceBlock, is true | |
667 | when the filter has encountered a \f(CW\*(C`DEBUG_BEGIN\*(C'\fR line, but has not yet | |
668 | encountered the following \f(CW\*(C`DEBUG_END\*(C'\fR line. | |
669 | .PP | |
670 | If you ignore all the error checking that most of the code does, the | |
671 | essence of the filter is as follows: | |
672 | .PP | |
673 | .Vb 4 | |
674 | \& sub filter { | |
675 | \& my ($self) = @_; | |
676 | \& my ($status); | |
677 | \& $status = filter_read(); | |
678 | .Ve | |
679 | .PP | |
680 | .Vb 6 | |
681 | \& # deal with EOF/error first | |
682 | \& return $status if $status <= 0; | |
683 | \& if ($self->{InTraceBlock}) { | |
684 | \& if (/^\es*##\es*DEBUG_END/) { | |
685 | \& $self->{InTraceBlock} = FALSE | |
686 | \& } | |
687 | .Ve | |
688 | .PP | |
689 | .Vb 7 | |
690 | \& # comment out debug lines when the filter is disabled | |
691 | \& s/^/#/ if ! $self->{Enabled}; | |
692 | \& } elsif ( /^\es*##\es*DEBUG_BEGIN/ ) { | |
693 | \& $self->{InTraceBlock} = TRUE; | |
694 | \& } | |
695 | \& return $status; | |
696 | \& } | |
697 | .Ve | |
698 | .PP | |
699 | Be warned: just as the C\-preprocessor doesn't know C, the Debug filter | |
700 | doesn't know Perl. It can be fooled quite easily: | |
701 | .PP | |
702 | .Vb 3 | |
703 | \& print <<EOM; | |
704 | \& ##DEBUG_BEGIN | |
705 | \& EOM | |
706 | .Ve | |
707 | .PP | |
708 | Such things aside, you can see that a lot can be achieved with a modest | |
709 | amount of code. | |
710 | .SH "CONCLUSION" | |
711 | .IX Header "CONCLUSION" | |
712 | You now have better understanding of what a source filter is, and you | |
713 | might even have a possible use for them. If you feel like playing with | |
714 | source filters but need a bit of inspiration, here are some extra | |
715 | features you could add to the Debug filter. | |
716 | .PP | |
717 | First, an easy one. Rather than having debugging code that is | |
718 | all\-or\-nothing, it would be much more useful to be able to control | |
719 | which specific blocks of debugging code get included. Try extending the | |
720 | syntax for debug blocks to allow each to be identified. The contents of | |
721 | the \f(CW\*(C`DEBUG\*(C'\fR environment variable can then be used to control which | |
722 | blocks get included. | |
723 | .PP | |
724 | Once you can identify individual blocks, try allowing them to be | |
725 | nested. That isn't difficult either. | |
726 | .PP | |
727 | Here is an interesting idea that doesn't involve the Debug filter. | |
728 | Currently Perl subroutines have fairly limited support for formal | |
729 | parameter lists. You can specify the number of parameters and their | |
730 | type, but you still have to manually take them out of the \f(CW@_\fR array | |
731 | yourself. Write a source filter that allows you to have a named | |
732 | parameter list. Such a filter would turn this: | |
733 | .PP | |
734 | .Vb 1 | |
735 | \& sub MySub ($first, $second, @rest) { ... } | |
736 | .Ve | |
737 | .PP | |
738 | into this: | |
739 | .PP | |
740 | .Vb 6 | |
741 | \& sub MySub($$@) { | |
742 | \& my ($first) = shift; | |
743 | \& my ($second) = shift; | |
744 | \& my (@rest) = @_; | |
745 | \& ... | |
746 | \& } | |
747 | .Ve | |
748 | .PP | |
749 | Finally, if you feel like a real challenge, have a go at writing a | |
750 | full-blown Perl macro preprocessor as a source filter. Borrow the | |
751 | useful features from the C preprocessor and any other macro processors | |
752 | you know. The tricky bit will be choosing how much knowledge of Perl's | |
753 | syntax you want your filter to have. | |
754 | .SH "THINGS TO LOOK OUT FOR" | |
755 | .IX Header "THINGS TO LOOK OUT FOR" | |
756 | .ie n .IP "Some Filters Clobber the ""DATA"" Handle" 5 | |
757 | .el .IP "Some Filters Clobber the \f(CWDATA\fR Handle" 5 | |
758 | .IX Item "Some Filters Clobber the DATA Handle" | |
759 | Some source filters use the \f(CW\*(C`DATA\*(C'\fR handle to read the calling program. | |
760 | When using these source filters you cannot rely on this handle, nor expect | |
761 | any particular kind of behavior when operating on it. Filters based on | |
762 | Filter::Util::Call (and therefore Filter::Simple) do not alter the \f(CW\*(C`DATA\*(C'\fR | |
763 | filehandle. | |
764 | .SH "REQUIREMENTS" | |
765 | .IX Header "REQUIREMENTS" | |
766 | The Source Filters distribution is available on \s-1CPAN\s0, in | |
767 | .PP | |
768 | .Vb 1 | |
769 | \& CPAN/modules/by-module/Filter | |
770 | .Ve | |
771 | .PP | |
772 | Starting from Perl 5.8 Filter::Util::Call (the core part of the | |
773 | Source Filters distribution) is part of the standard Perl distribution. | |
774 | Also included is a friendlier interface called Filter::Simple, by | |
775 | Damian Conway. | |
776 | .SH "AUTHOR" | |
777 | .IX Header "AUTHOR" | |
778 | Paul Marquess <Paul.Marquess@btinternet.com> | |
779 | .SH "Copyrights" | |
780 | .IX Header "Copyrights" | |
781 | This article originally appeared in The Perl Journal #11, and is | |
782 | copyright 1998 The Perl Journal. It appears courtesy of Jon Orwant and | |
783 | The Perl Journal. This document may be distributed under the same terms | |
784 | as Perl itself. |