Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | .\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLFAQ9 1" | |
132 | .TH PERLFAQ9 1 "2002-06-08" "perl v5.8.0" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlfaq9 \- Networking ($Revision: 1.9 $, $Date: 2002/04/07 18:46:13 $) | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | This section deals with questions related to networking, the internet, | |
138 | and a few on the web. | |
139 | .Sh "What is the correct form of response from a \s-1CGI\s0 script?" | |
140 | .IX Subsection "What is the correct form of response from a CGI script?" | |
141 | (Alan Flavell <flavell+www@a5.ph.gla.ac.uk> answers...) | |
142 | .PP | |
143 | The Common Gateway Interface (\s-1CGI\s0) specifies a software interface between | |
144 | a program (\*(L"\s-1CGI\s0 script\*(R") and a web server (\s-1HTTPD\s0). It is not specific | |
145 | to Perl, and has its own FAQs and tutorials, and usenet group, | |
146 | comp.infosystems.www.authoring.cgi | |
147 | .PP | |
148 | The original \s-1CGI\s0 specification is at: http://hoohoo.ncsa.uiuc.edu/cgi/ | |
149 | .PP | |
150 | Current best-practice \s-1RFC\s0 draft at: http://CGI\-Spec.Golux.Com/ | |
151 | .PP | |
152 | Other relevant documentation listed in: http://www.perl.org/CGI_MetaFAQ.html | |
153 | .PP | |
154 | These Perl FAQs very selectively cover some \s-1CGI\s0 issues. However, Perl | |
155 | programmers are strongly advised to use the \s-1CGI\s0.pm module, to take care | |
156 | of the details for them. | |
157 | .PP | |
158 | The similarity between \s-1CGI\s0 response headers (defined in the \s-1CGI\s0 | |
159 | specification) and \s-1HTTP\s0 response headers (defined in the \s-1HTTP\s0 | |
160 | specification, \s-1RFC2616\s0) is intentional, but can sometimes be confusing. | |
161 | .PP | |
162 | The \s-1CGI\s0 specification defines two kinds of script: the \*(L"Parsed Header\*(R" | |
163 | script, and the \*(L"Non Parsed Header\*(R" (\s-1NPH\s0) script. Check your server | |
164 | documentation to see what it supports. \*(L"Parsed Header\*(R" scripts are | |
165 | simpler in various respects. The \s-1CGI\s0 specification allows any of the | |
166 | usual newline representations in the \s-1CGI\s0 response (it's the server's | |
167 | job to create an accurate \s-1HTTP\s0 response based on it). So \*(L"\en\*(R" written in | |
168 | text mode is technically correct, and recommended. \s-1NPH\s0 scripts are more | |
169 | tricky: they must put out a complete and accurate set of \s-1HTTP\s0 | |
170 | transaction response headers; the \s-1HTTP\s0 specification calls for records | |
171 | to be terminated with carriage-return and line\-feed, i.e \s-1ASCII\s0 \e015\e012 | |
172 | written in binary mode. | |
173 | .PP | |
174 | Using \s-1CGI\s0.pm gives excellent platform independence, including \s-1EBCDIC\s0 | |
175 | systems. \s-1CGI\s0.pm selects an appropriate newline representation | |
176 | ($CGI::CRLF) and sets binmode as appropriate. | |
177 | .Sh "My \s-1CGI\s0 script runs from the command line but not the browser. (500 Server Error)" | |
178 | .IX Subsection "My CGI script runs from the command line but not the browser. (500 Server Error)" | |
179 | Several things could be wrong. You can go through the \*(L"Troubleshooting | |
180 | Perl \s-1CGI\s0 scripts\*(R" guide at | |
181 | .PP | |
182 | .Vb 1 | |
183 | \& http://www.perl.org/troubleshooting_CGI.html | |
184 | .Ve | |
185 | .PP | |
186 | If, after that, you can demonstrate that you've read the FAQs and that | |
187 | your problem isn't something simple that can be easily answered, you'll | |
188 | probably receive a courteous and useful reply to your question if you | |
189 | post it on comp.infosystems.www.authoring.cgi (if it's something to do | |
190 | with \s-1HTTP\s0 or the \s-1CGI\s0 protocols). Questions that appear to be Perl | |
191 | questions but are really \s-1CGI\s0 ones that are posted to comp.lang.perl.misc | |
192 | are not so well received. | |
193 | .PP | |
194 | The useful FAQs, related documents, and troubleshooting guides are | |
195 | listed in the \s-1CGI\s0 Meta \s-1FAQ:\s0 | |
196 | .PP | |
197 | .Vb 1 | |
198 | \& http://www.perl.org/CGI_MetaFAQ.html | |
199 | .Ve | |
200 | .Sh "How can I get better error messages from a \s-1CGI\s0 program?" | |
201 | .IX Subsection "How can I get better error messages from a CGI program?" | |
202 | Use the CGI::Carp module. It replaces \f(CW\*(C`warn\*(C'\fR and \f(CW\*(C`die\*(C'\fR, plus the | |
203 | normal Carp modules \f(CW\*(C`carp\*(C'\fR, \f(CW\*(C`croak\*(C'\fR, and \f(CW\*(C`confess\*(C'\fR functions with | |
204 | more verbose and safer versions. It still sends them to the normal | |
205 | server error log. | |
206 | .PP | |
207 | .Vb 3 | |
208 | \& use CGI::Carp; | |
209 | \& warn "This is a complaint"; | |
210 | \& die "But this one is serious"; | |
211 | .Ve | |
212 | .PP | |
213 | The following use of CGI::Carp also redirects errors to a file of your choice, | |
214 | placed in a \s-1BEGIN\s0 block to catch compile-time warnings as well: | |
215 | .PP | |
216 | .Vb 6 | |
217 | \& BEGIN { | |
218 | \& use CGI::Carp qw(carpout); | |
219 | \& open(LOG, ">>/var/local/cgi-logs/mycgi-log") | |
220 | \& or die "Unable to append to mycgi-log: $!\en"; | |
221 | \& carpout(*LOG); | |
222 | \& } | |
223 | .Ve | |
224 | .PP | |
225 | You can even arrange for fatal errors to go back to the client browser, | |
226 | which is nice for your own debugging, but might confuse the end user. | |
227 | .PP | |
228 | .Vb 2 | |
229 | \& use CGI::Carp qw(fatalsToBrowser); | |
230 | \& die "Bad error here"; | |
231 | .Ve | |
232 | .PP | |
233 | Even if the error happens before you get the \s-1HTTP\s0 header out, the module | |
234 | will try to take care of this to avoid the dreaded server 500 errors. | |
235 | Normal warnings still go out to the server error log (or wherever | |
236 | you've sent them with \f(CW\*(C`carpout\*(C'\fR) with the application name and date | |
237 | stamp prepended. | |
238 | .Sh "How do I remove \s-1HTML\s0 from a string?" | |
239 | .IX Subsection "How do I remove HTML from a string?" | |
240 | The most correct way (albeit not the fastest) is to use HTML::Parser | |
241 | from \s-1CPAN\s0. Another mostly correct | |
242 | way is to use HTML::FormatText which not only removes \s-1HTML\s0 but also | |
243 | attempts to do a little simple formatting of the resulting plain text. | |
244 | .PP | |
245 | Many folks attempt a simple-minded regular expression approach, like | |
246 | \&\f(CW\*(C`s/<.*?>//g\*(C'\fR, but that fails in many cases because the tags | |
247 | may continue over line breaks, they may contain quoted angle\-brackets, | |
248 | or \s-1HTML\s0 comment may be present. Plus, folks forget to convert | |
249 | entities\*(--like \f(CW\*(C`<\*(C'\fR for example. | |
250 | .PP | |
251 | Here's one \*(L"simple\-minded\*(R" approach, that works for most files: | |
252 | .PP | |
253 | .Vb 2 | |
254 | \& #!/usr/bin/perl -p0777 | |
255 | \& s/<(?:[^>'"]*|(['"]).*?\e1)*>//gs | |
256 | .Ve | |
257 | .PP | |
258 | If you want a more complete solution, see the 3\-stage striphtml | |
259 | program in | |
260 | http://www.cpan.org/authors/Tom_Christiansen/scripts/striphtml.gz | |
261 | \&. | |
262 | .PP | |
263 | Here are some tricky cases that you should think about when picking | |
264 | a solution: | |
265 | .PP | |
266 | .Vb 1 | |
267 | \& <IMG SRC = "foo.gif" ALT = "A > B"> | |
268 | .Ve | |
269 | .PP | |
270 | .Vb 2 | |
271 | \& <IMG SRC = "foo.gif" | |
272 | \& ALT = "A > B"> | |
273 | .Ve | |
274 | .PP | |
275 | .Vb 1 | |
276 | \& <!-- <A comment> --> | |
277 | .Ve | |
278 | .PP | |
279 | .Vb 1 | |
280 | \& <script>if (a<b && a>c)</script> | |
281 | .Ve | |
282 | .PP | |
283 | .Vb 1 | |
284 | \& <# Just data #> | |
285 | .Ve | |
286 | .PP | |
287 | .Vb 1 | |
288 | \& <![INCLUDE CDATA [ >>>>>>>>>>>> ]]> | |
289 | .Ve | |
290 | .PP | |
291 | If \s-1HTML\s0 comments include other tags, those solutions would also break | |
292 | on text like this: | |
293 | .PP | |
294 | .Vb 3 | |
295 | \& <!-- This section commented out. | |
296 | \& <B>You can't see me!</B> | |
297 | \& --> | |
298 | .Ve | |
299 | .Sh "How do I extract URLs?" | |
300 | .IX Subsection "How do I extract URLs?" | |
301 | You can easily extract all sorts of URLs from \s-1HTML\s0 with | |
302 | \&\f(CW\*(C`HTML::SimpleLinkExtor\*(C'\fR which handles anchors, images, objects, | |
303 | frames, and many other tags that can contain a \s-1URL\s0. If you need | |
304 | anything more complex, you can create your own subclass of | |
305 | \&\f(CW\*(C`HTML::LinkExtor\*(C'\fR or \f(CW\*(C`HTML::Parser\*(C'\fR. You might even use | |
306 | \&\f(CW\*(C`HTML::SimpleLinkExtor\*(C'\fR as an example for something specifically | |
307 | suited to your needs. | |
308 | .PP | |
309 | Less complete solutions involving regular expressions can save | |
310 | you a lot of processing time if you know that the input is simple. One | |
311 | solution from Tom Christiansen runs 100 times faster than most | |
312 | module based approaches but only extracts URLs from anchors where the first | |
313 | attribute is \s-1HREF\s0 and there are no other attributes. | |
314 | .PP | |
315 | .Vb 7 | |
316 | \& #!/usr/bin/perl -n00 | |
317 | \& # qxurl - tchrist@perl.com | |
318 | \& print "$2\en" while m{ | |
319 | \& < \es* | |
320 | \& A \es+ HREF \es* = \es* (["']) (.*?) \e1 | |
321 | \& \es* > | |
322 | \& }gsix; | |
323 | .Ve | |
324 | .Sh "How do I download a file from the user's machine? How do I open a file on another machine?" | |
325 | .IX Subsection "How do I download a file from the user's machine? How do I open a file on another machine?" | |
326 | In the context of an \s-1HTML\s0 form, you can use what's known as | |
327 | \&\fBmultipart/form\-data\fR encoding. The \s-1CGI\s0.pm module (available from | |
328 | \&\s-1CPAN\s0) supports this in the \fIstart_multipart_form()\fR method, which isn't | |
329 | the same as the \fIstartform()\fR method. | |
330 | .Sh "How do I make a pop-up menu in \s-1HTML\s0?" | |
331 | .IX Subsection "How do I make a pop-up menu in HTML?" | |
332 | Use the \fB<\s-1SELECT\s0>\fR and \fB<\s-1OPTION\s0>\fR tags. The \s-1CGI\s0.pm | |
333 | module (available from \s-1CPAN\s0) supports this widget, as well as many | |
334 | others, including some that it cleverly synthesizes on its own. | |
335 | .Sh "How do I fetch an \s-1HTML\s0 file?" | |
336 | .IX Subsection "How do I fetch an HTML file?" | |
337 | One approach, if you have the lynx text-based \s-1HTML\s0 browser installed | |
338 | on your system, is this: | |
339 | .PP | |
340 | .Vb 2 | |
341 | \& $html_code = `lynx -source $url`; | |
342 | \& $text_data = `lynx -dump $url`; | |
343 | .Ve | |
344 | .PP | |
345 | The libwww-perl (\s-1LWP\s0) modules from \s-1CPAN\s0 provide a more powerful way | |
346 | to do this. They don't require lynx, but like lynx, can still work | |
347 | through proxies: | |
348 | .PP | |
349 | .Vb 3 | |
350 | \& # simplest version | |
351 | \& use LWP::Simple; | |
352 | \& $content = get($URL); | |
353 | .Ve | |
354 | .PP | |
355 | .Vb 3 | |
356 | \& # or print HTML from a URL | |
357 | \& use LWP::Simple; | |
358 | \& getprint "http://www.linpro.no/lwp/"; | |
359 | .Ve | |
360 | .PP | |
361 | .Vb 11 | |
362 | \& # or print ASCII from HTML from a URL | |
363 | \& # also need HTML-Tree package from CPAN | |
364 | \& use LWP::Simple; | |
365 | \& use HTML::Parser; | |
366 | \& use HTML::FormatText; | |
367 | \& my ($html, $ascii); | |
368 | \& $html = get("http://www.perl.com/"); | |
369 | \& defined $html | |
370 | \& or die "Can't fetch HTML from http://www.perl.com/"; | |
371 | \& $ascii = HTML::FormatText->new->format(parse_html($html)); | |
372 | \& print $ascii; | |
373 | .Ve | |
374 | .Sh "How do I automate an \s-1HTML\s0 form submission?" | |
375 | .IX Subsection "How do I automate an HTML form submission?" | |
376 | If you're submitting values using the \s-1GET\s0 method, create a \s-1URL\s0 and encode | |
377 | the form using the \f(CW\*(C`query_form\*(C'\fR method: | |
378 | .PP | |
379 | .Vb 2 | |
380 | \& use LWP::Simple; | |
381 | \& use URI::URL; | |
382 | .Ve | |
383 | .PP | |
384 | .Vb 3 | |
385 | \& my $url = url('http://www.perl.com/cgi-bin/cpan_mod'); | |
386 | \& $url->query_form(module => 'DB_File', readme => 1); | |
387 | \& $content = get($url); | |
388 | .Ve | |
389 | .PP | |
390 | If you're using the \s-1POST\s0 method, create your own user agent and encode | |
391 | the content appropriately. | |
392 | .PP | |
393 | .Vb 2 | |
394 | \& use HTTP::Request::Common qw(POST); | |
395 | \& use LWP::UserAgent; | |
396 | .Ve | |
397 | .PP | |
398 | .Vb 4 | |
399 | \& $ua = LWP::UserAgent->new(); | |
400 | \& my $req = POST 'http://www.perl.com/cgi-bin/cpan_mod', | |
401 | \& [ module => 'DB_File', readme => 1 ]; | |
402 | \& $content = $ua->request($req)->as_string; | |
403 | .Ve | |
404 | .Sh "How do I decode or create those %\-encodings on the web?" | |
405 | .IX Subsection "How do I decode or create those %-encodings on the web?" | |
406 | If you are writing a \s-1CGI\s0 script, you should be using the \s-1CGI\s0.pm module | |
407 | that comes with perl, or some other equivalent module. The \s-1CGI\s0 module | |
408 | automatically decodes queries for you, and provides an \fIescape()\fR | |
409 | function to handle encoding. | |
410 | .PP | |
411 | The best source of detailed information on \s-1URI\s0 encoding is \s-1RFC\s0 2396. | |
412 | Basically, the following substitutions do it: | |
413 | .PP | |
414 | .Vb 1 | |
415 | \& s/([^\ew()'*~!.-])/sprintf '%%%02x', ord $1/eg; # encode | |
416 | .Ve | |
417 | .PP | |
418 | .Vb 1 | |
419 | \& s/%([A-Fa-f\ed]{2})/chr hex $1/eg; # decode | |
420 | .Ve | |
421 | .PP | |
422 | However, you should only apply them to individual \s-1URI\s0 components, not | |
423 | the entire \s-1URI\s0, otherwise you'll lose information and generally mess | |
424 | things up. If that didn't explain it, don't worry. Just go read | |
425 | section 2 of the \s-1RFC\s0, it's probably the best explanation there is. | |
426 | .PP | |
427 | \&\s-1RFC\s0 2396 also contains a lot of other useful information, including a | |
428 | regexp for breaking any arbitrary \s-1URI\s0 into components (Appendix B). | |
429 | .Sh "How do I redirect to another page?" | |
430 | .IX Subsection "How do I redirect to another page?" | |
431 | Specify the complete \s-1URL\s0 of the destination (even if it is on the same | |
432 | server). This is one of the two different kinds of \s-1CGI\s0 \*(L"Location:\*(R" | |
433 | responses which are defined in the \s-1CGI\s0 specification for a Parsed Headers | |
434 | script. The other kind (an absolute URLpath) is resolved internally to | |
435 | the server without any \s-1HTTP\s0 redirection. The \s-1CGI\s0 specifications do not | |
436 | allow relative URLs in either case. | |
437 | .PP | |
438 | Use of \s-1CGI\s0.pm is strongly recommended. This example shows redirection | |
439 | with a complete \s-1URL\s0. This redirection is handled by the web browser. | |
440 | .PP | |
441 | .Vb 1 | |
442 | \& use CGI qw/:standard/; | |
443 | .Ve | |
444 | .PP | |
445 | .Vb 2 | |
446 | \& my $url = 'http://www.cpan.org/'; | |
447 | \& print redirect($url); | |
448 | .Ve | |
449 | .PP | |
450 | This example shows a redirection with an absolute URLpath. This | |
451 | redirection is handled by the local web server. | |
452 | .PP | |
453 | .Vb 2 | |
454 | \& my $url = '/CPAN/index.html'; | |
455 | \& print redirect($url); | |
456 | .Ve | |
457 | .PP | |
458 | But if coded directly, it could be as follows (the final \*(L"\en\*(R" is | |
459 | shown separately, for clarity), using either a complete \s-1URL\s0 or | |
460 | an absolute URLpath. | |
461 | .PP | |
462 | .Vb 2 | |
463 | \& print "Location: $url\en"; # CGI response header | |
464 | \& print "\en"; # end of headers | |
465 | .Ve | |
466 | .Sh "How do I put a password on my web pages?" | |
467 | .IX Subsection "How do I put a password on my web pages?" | |
468 | That depends. You'll need to read the documentation for your web | |
469 | server, or perhaps check some of the other FAQs referenced above. | |
470 | .Sh "How do I edit my .htpasswd and .htgroup files with Perl?" | |
471 | .IX Subsection "How do I edit my .htpasswd and .htgroup files with Perl?" | |
472 | The HTTPD::UserAdmin and HTTPD::GroupAdmin modules provide a | |
473 | consistent \s-1OO\s0 interface to these files, regardless of how they're | |
474 | stored. Databases may be text, dbm, Berkeley \s-1DB\s0 or any database with | |
475 | a \s-1DBI\s0 compatible driver. HTTPD::UserAdmin supports files used by the | |
476 | `Basic' and `Digest' authentication schemes. Here's an example: | |
477 | .PP | |
478 | .Vb 4 | |
479 | \& use HTTPD::UserAdmin (); | |
480 | \& HTTPD::UserAdmin | |
481 | \& ->new(DB => "/foo/.htpasswd") | |
482 | \& ->add($username => $password); | |
483 | .Ve | |
484 | .Sh "How do I make sure users can't enter values into a form that cause my \s-1CGI\s0 script to do bad things?" | |
485 | .IX Subsection "How do I make sure users can't enter values into a form that cause my CGI script to do bad things?" | |
486 | See the security references listed in the \s-1CGI\s0 Meta \s-1FAQ\s0 | |
487 | .PP | |
488 | .Vb 1 | |
489 | \& http://www.perl.org/CGI_MetaFAQ.html | |
490 | .Ve | |
491 | .Sh "How do I parse a mail header?" | |
492 | .IX Subsection "How do I parse a mail header?" | |
493 | For a quick-and-dirty solution, try this solution derived | |
494 | from \*(L"split\*(R" in perlfunc: | |
495 | .PP | |
496 | .Vb 4 | |
497 | \& $/ = ''; | |
498 | \& $header = <MSG>; | |
499 | \& $header =~ s/\en\es+/ /g; # merge continuation lines | |
500 | \& %head = ( UNIX_FROM_LINE, split /^([-\ew]+):\es*/m, $header ); | |
501 | .Ve | |
502 | .PP | |
503 | That solution doesn't do well if, for example, you're trying to | |
504 | maintain all the Received lines. A more complete approach is to use | |
505 | the Mail::Header module from \s-1CPAN\s0 (part of the MailTools package). | |
506 | .Sh "How do I decode a \s-1CGI\s0 form?" | |
507 | .IX Subsection "How do I decode a CGI form?" | |
508 | You use a standard module, probably \s-1CGI\s0.pm. Under no circumstances | |
509 | should you attempt to do so by hand! | |
510 | .PP | |
511 | You'll see a lot of \s-1CGI\s0 programs that blindly read from \s-1STDIN\s0 the number | |
512 | of bytes equal to \s-1CONTENT_LENGTH\s0 for POSTs, or grab \s-1QUERY_STRING\s0 for | |
513 | decoding GETs. These programs are very poorly written. They only work | |
514 | sometimes. They typically forget to check the return value of the \fIread()\fR | |
515 | system call, which is a cardinal sin. They don't handle \s-1HEAD\s0 requests. | |
516 | They don't handle multipart forms used for file uploads. They don't deal | |
517 | with \s-1GET/POST\s0 combinations where query fields are in more than one place. | |
518 | They don't deal with keywords in the query string. | |
519 | .PP | |
520 | In short, they're bad hacks. Resist them at all costs. Please do not be | |
521 | tempted to reinvent the wheel. Instead, use the \s-1CGI\s0.pm or CGI_Lite.pm | |
522 | (available from \s-1CPAN\s0), or if you're trapped in the module-free land | |
523 | of perl1 .. perl4, you might look into cgi\-lib.pl (available from | |
524 | http://cgi\-lib.stanford.edu/cgi\-lib/ ). | |
525 | .PP | |
526 | Make sure you know whether to use a \s-1GET\s0 or a \s-1POST\s0 in your form. | |
527 | GETs should only be used for something that doesn't update the server. | |
528 | Otherwise you can get mangled databases and repeated feedback mail | |
529 | messages. The fancy word for this is ``idempotency''. This simply | |
530 | means that there should be no difference between making a \s-1GET\s0 request | |
531 | for a particular \s-1URL\s0 once or multiple times. This is because the | |
532 | \&\s-1HTTP\s0 protocol definition says that a \s-1GET\s0 request may be cached by the | |
533 | browser, or server, or an intervening proxy. \s-1POST\s0 requests cannot be | |
534 | cached, because each request is independent and matters. Typically, | |
535 | \&\s-1POST\s0 requests change or depend on state on the server (query or update | |
536 | a database, send mail, or purchase a computer). | |
537 | .Sh "How do I check a valid mail address?" | |
538 | .IX Subsection "How do I check a valid mail address?" | |
539 | You can't, at least, not in real time. Bummer, eh? | |
540 | .PP | |
541 | Without sending mail to the address and seeing whether there's a human | |
542 | on the other hand to answer you, you cannot determine whether a mail | |
543 | address is valid. Even if you apply the mail header standard, you | |
544 | can have problems, because there are deliverable addresses that aren't | |
545 | \&\s-1RFC\-822\s0 (the mail header standard) compliant, and addresses that aren't | |
546 | deliverable which are compliant. | |
547 | .PP | |
548 | Many are tempted to try to eliminate many frequently-invalid | |
549 | mail addresses with a simple regex, such as | |
550 | \&\f(CW\*(C`/^[\ew.\-]+\e@(?:[\ew\-]+\e.)+\ew+$/\*(C'\fR. It's a very bad idea. However, | |
551 | this also throws out many valid ones, and says nothing about | |
552 | potential deliverability, so it is not suggested. Instead, see | |
553 | http://www.cpan.org/authors/Tom_Christiansen/scripts/ckaddr.gz , | |
554 | which actually checks against the full \s-1RFC\s0 spec (except for nested | |
555 | comments), looks for addresses you may not wish to accept mail to | |
556 | (say, Bill Clinton or your postmaster), and then makes sure that the | |
557 | hostname given can be looked up in the \s-1DNS\s0 \s-1MX\s0 records. It's not fast, | |
558 | but it works for what it tries to do. | |
559 | .PP | |
560 | Our best advice for verifying a person's mail address is to have them | |
561 | enter their address twice, just as you normally do to change a password. | |
562 | This usually weeds out typos. If both versions match, send | |
563 | mail to that address with a personal message that looks somewhat like: | |
564 | .PP | |
565 | .Vb 1 | |
566 | \& Dear someuser@host.com, | |
567 | .Ve | |
568 | .PP | |
569 | .Vb 5 | |
570 | \& Please confirm the mail address you gave us Wed May 6 09:38:41 | |
571 | \& MDT 1998 by replying to this message. Include the string | |
572 | \& "Rumpelstiltskin" in that reply, but spelled in reverse; that is, | |
573 | \& start with "Nik...". Once this is done, your confirmed address will | |
574 | \& be entered into our records. | |
575 | .Ve | |
576 | .PP | |
577 | If you get the message back and they've followed your directions, | |
578 | you can be reasonably assured that it's real. | |
579 | .PP | |
580 | A related strategy that's less open to forgery is to give them a \s-1PIN\s0 | |
581 | (personal \s-1ID\s0 number). Record the address and \s-1PIN\s0 (best that it be a | |
582 | random one) for later processing. In the mail you send, ask them to | |
583 | include the \s-1PIN\s0 in their reply. But if it bounces, or the message is | |
584 | included via a ``vacation'' script, it'll be there anyway. So it's | |
585 | best to ask them to mail back a slight alteration of the \s-1PIN\s0, such as | |
586 | with the characters reversed, one added or subtracted to each digit, etc. | |
587 | .Sh "How do I decode a \s-1MIME/BASE64\s0 string?" | |
588 | .IX Subsection "How do I decode a MIME/BASE64 string?" | |
589 | The MIME\-Base64 package (available from \s-1CPAN\s0) handles this as well as | |
590 | the \s-1MIME/QP\s0 encoding. Decoding \s-1BASE64\s0 becomes as simple as: | |
591 | .PP | |
592 | .Vb 2 | |
593 | \& use MIME::Base64; | |
594 | \& $decoded = decode_base64($encoded); | |
595 | .Ve | |
596 | .PP | |
597 | The MIME-Tools package (available from \s-1CPAN\s0) supports extraction with | |
598 | decoding of \s-1BASE64\s0 encoded attachments and content directly from email | |
599 | messages. | |
600 | .PP | |
601 | If the string to decode is short (less than 84 bytes long) | |
602 | a more direct approach is to use the \fIunpack()\fR function's \*(L"u\*(R" | |
603 | format after minor transliterations: | |
604 | .PP | |
605 | .Vb 4 | |
606 | \& tr#A-Za-z0-9+/##cd; # remove non-base64 chars | |
607 | \& tr#A-Za-z0-9+/# -_#; # convert to uuencoded format | |
608 | \& $len = pack("c", 32 + 0.75*length); # compute length byte | |
609 | \& print unpack("u", $len . $_); # uudecode and print | |
610 | .Ve | |
611 | .Sh "How do I return the user's mail address?" | |
612 | .IX Subsection "How do I return the user's mail address?" | |
613 | On systems that support getpwuid, the $< variable, and the | |
614 | Sys::Hostname module (which is part of the standard perl distribution), | |
615 | you can probably try using something like this: | |
616 | .PP | |
617 | .Vb 2 | |
618 | \& use Sys::Hostname; | |
619 | \& $address = sprintf('%s@%s', scalar getpwuid($<), hostname); | |
620 | .Ve | |
621 | .PP | |
622 | Company policies on mail address can mean that this generates addresses | |
623 | that the company's mail system will not accept, so you should ask for | |
624 | users' mail addresses when this matters. Furthermore, not all systems | |
625 | on which Perl runs are so forthcoming with this information as is Unix. | |
626 | .PP | |
627 | The Mail::Util module from \s-1CPAN\s0 (part of the MailTools package) provides a | |
628 | \&\fImailaddress()\fR function that tries to guess the mail address of the user. | |
629 | It makes a more intelligent guess than the code above, using information | |
630 | given when the module was installed, but it could still be incorrect. | |
631 | Again, the best way is often just to ask the user. | |
632 | .Sh "How do I send mail?" | |
633 | .IX Subsection "How do I send mail?" | |
634 | Use the \f(CW\*(C`sendmail\*(C'\fR program directly: | |
635 | .PP | |
636 | .Vb 6 | |
637 | \& open(SENDMAIL, "|/usr/lib/sendmail -oi -t -odq") | |
638 | \& or die "Can't fork for sendmail: $!\en"; | |
639 | \& print SENDMAIL <<"EOF"; | |
640 | \& From: User Originating Mail <me\e@host> | |
641 | \& To: Final Destination <you\e@otherhost> | |
642 | \& Subject: A relevant subject line | |
643 | .Ve | |
644 | .PP | |
645 | .Vb 4 | |
646 | \& Body of the message goes here after the blank line | |
647 | \& in as many lines as you like. | |
648 | \& EOF | |
649 | \& close(SENDMAIL) or warn "sendmail didn't close nicely"; | |
650 | .Ve | |
651 | .PP | |
652 | The \fB\-oi\fR option prevents sendmail from interpreting a line consisting | |
653 | of a single dot as \*(L"end of message\*(R". The \fB\-t\fR option says to use the | |
654 | headers to decide who to send the message to, and \fB\-odq\fR says to put | |
655 | the message into the queue. This last option means your message won't | |
656 | be immediately delivered, so leave it out if you want immediate | |
657 | delivery. | |
658 | .PP | |
659 | Alternate, less convenient approaches include calling mail (sometimes | |
660 | called mailx) directly or simply opening up port 25 have having an | |
661 | intimate conversation between just you and the remote \s-1SMTP\s0 daemon, | |
662 | probably sendmail. | |
663 | .PP | |
664 | Or you might be able use the \s-1CPAN\s0 module Mail::Mailer: | |
665 | .PP | |
666 | .Vb 1 | |
667 | \& use Mail::Mailer; | |
668 | .Ve | |
669 | .PP | |
670 | .Vb 8 | |
671 | \& $mailer = Mail::Mailer->new(); | |
672 | \& $mailer->open({ From => $from_address, | |
673 | \& To => $to_address, | |
674 | \& Subject => $subject, | |
675 | \& }) | |
676 | \& or die "Can't open: $!\en"; | |
677 | \& print $mailer $body; | |
678 | \& $mailer->close(); | |
679 | .Ve | |
680 | .PP | |
681 | The Mail::Internet module uses Net::SMTP which is less Unix-centric than | |
682 | Mail::Mailer, but less reliable. Avoid raw \s-1SMTP\s0 commands. There | |
683 | are many reasons to use a mail transport agent like sendmail. These | |
684 | include queuing, \s-1MX\s0 records, and security. | |
685 | .Sh "How do I use \s-1MIME\s0 to make an attachment to a mail message?" | |
686 | .IX Subsection "How do I use MIME to make an attachment to a mail message?" | |
687 | This answer is extracted directly from the MIME::Lite documentation. | |
688 | Create a multipart message (i.e., one with attachments). | |
689 | .PP | |
690 | .Vb 1 | |
691 | \& use MIME::Lite; | |
692 | .Ve | |
693 | .PP | |
694 | .Vb 8 | |
695 | \& ### Create a new multipart message: | |
696 | \& $msg = MIME::Lite->new( | |
697 | \& From =>'me@myhost.com', | |
698 | \& To =>'you@yourhost.com', | |
699 | \& Cc =>'some@other.com, some@more.com', | |
700 | \& Subject =>'A message with 2 parts...', | |
701 | \& Type =>'multipart/mixed' | |
702 | \& ); | |
703 | .Ve | |
704 | .PP | |
705 | .Vb 8 | |
706 | \& ### Add parts (each "attach" has same arguments as "new"): | |
707 | \& $msg->attach(Type =>'TEXT', | |
708 | \& Data =>"Here's the GIF file you wanted" | |
709 | \& ); | |
710 | \& $msg->attach(Type =>'image/gif', | |
711 | \& Path =>'aaa000123.gif', | |
712 | \& Filename =>'logo.gif' | |
713 | \& ); | |
714 | .Ve | |
715 | .PP | |
716 | .Vb 1 | |
717 | \& $text = $msg->as_string; | |
718 | .Ve | |
719 | .PP | |
720 | MIME::Lite also includes a method for sending these things. | |
721 | .PP | |
722 | .Vb 1 | |
723 | \& $msg->send; | |
724 | .Ve | |
725 | .PP | |
726 | This defaults to using \fIsendmail\fR\|(1) but can be customized to use | |
727 | \&\s-1SMTP\s0 via Net::SMTP. | |
728 | .Sh "How do I read mail?" | |
729 | .IX Subsection "How do I read mail?" | |
730 | While you could use the Mail::Folder module from \s-1CPAN\s0 (part of the | |
731 | MailFolder package) or the Mail::Internet module from \s-1CPAN\s0 (also part | |
732 | of the MailTools package), often a module is overkill. Here's a | |
733 | mail sorter. | |
734 | .PP | |
735 | .Vb 15 | |
736 | \& #!/usr/bin/perl | |
737 | \& # bysub1 - simple sort by subject | |
738 | \& my(@msgs, @sub); | |
739 | \& my $msgno = -1; | |
740 | \& $/ = ''; # paragraph reads | |
741 | \& while (<>) { | |
742 | \& if (/^From/m) { | |
743 | \& /^Subject:\es*(?:Re:\es*)*(.*)/mi; | |
744 | \& $sub[++$msgno] = lc($1) || ''; | |
745 | \& } | |
746 | \& $msgs[$msgno] .= $_; | |
747 | \& } | |
748 | \& for my $i (sort { $sub[$a] cmp $sub[$b] || $a <=> $b } (0 .. $#msgs)) { | |
749 | \& print $msgs[$i]; | |
750 | \& } | |
751 | .Ve | |
752 | .PP | |
753 | Or more succinctly, | |
754 | .PP | |
755 | .Vb 6 | |
756 | \& #!/usr/bin/perl -n00 | |
757 | \& # bysub2 - awkish sort-by-subject | |
758 | \& BEGIN { $msgno = -1 } | |
759 | \& $sub[++$msgno] = (/^Subject:\es*(?:Re:\es*)*(.*)/mi)[0] if /^From/m; | |
760 | \& $msg[$msgno] .= $_; | |
761 | \& END { print @msg[ sort { $sub[$a] cmp $sub[$b] || $a <=> $b } (0 .. $#msg) ] } | |
762 | .Ve | |
763 | .Sh "How do I find out my hostname/domainname/IP address?" | |
764 | .IX Subsection "How do I find out my hostname/domainname/IP address?" | |
765 | The normal way to find your own hostname is to call the \f(CW`hostname`\fR | |
766 | program. While sometimes expedient, this has some problems, such as | |
767 | not knowing whether you've got the canonical name or not. It's one of | |
768 | those tradeoffs of convenience versus portability. | |
769 | .PP | |
770 | The Sys::Hostname module (part of the standard perl distribution) will | |
771 | give you the hostname after which you can find out the \s-1IP\s0 address | |
772 | (assuming you have working \s-1DNS\s0) with a \fIgethostbyname()\fR call. | |
773 | .PP | |
774 | .Vb 4 | |
775 | \& use Socket; | |
776 | \& use Sys::Hostname; | |
777 | \& my $host = hostname(); | |
778 | \& my $addr = inet_ntoa(scalar gethostbyname($host || 'localhost')); | |
779 | .Ve | |
780 | .PP | |
781 | Probably the simplest way to learn your \s-1DNS\s0 domain name is to grok | |
782 | it out of /etc/resolv.conf, at least under Unix. Of course, this | |
783 | assumes several things about your resolv.conf configuration, including | |
784 | that it exists. | |
785 | .PP | |
786 | (We still need a good \s-1DNS\s0 domain name-learning method for non-Unix | |
787 | systems.) | |
788 | .Sh "How do I fetch a news article or the active newsgroups?" | |
789 | .IX Subsection "How do I fetch a news article or the active newsgroups?" | |
790 | Use the Net::NNTP or News::NNTPClient modules, both available from \s-1CPAN\s0. | |
791 | This can make tasks like fetching the newsgroup list as simple as | |
792 | .PP | |
793 | .Vb 2 | |
794 | \& perl -MNews::NNTPClient | |
795 | \& -e 'print News::NNTPClient->new->list("newsgroups")' | |
796 | .Ve | |
797 | .Sh "How do I fetch/put an \s-1FTP\s0 file?" | |
798 | .IX Subsection "How do I fetch/put an FTP file?" | |
799 | LWP::Simple (available from \s-1CPAN\s0) can fetch but not put. Net::FTP (also | |
800 | available from \s-1CPAN\s0) is more complex but can put as well as fetch. | |
801 | .Sh "How can I do \s-1RPC\s0 in Perl?" | |
802 | .IX Subsection "How can I do RPC in Perl?" | |
803 | A \s-1DCE::RPC\s0 module is being developed (but is not yet available) and | |
804 | will be released as part of the DCE-Perl package (available from | |
805 | \&\s-1CPAN\s0). The rpcgen suite, available from CPAN/authors/id/JAKE/, is | |
806 | an \s-1RPC\s0 stub generator and includes an \s-1RPC::ONC\s0 module. | |
807 | .SH "AUTHOR AND COPYRIGHT" | |
808 | .IX Header "AUTHOR AND COPYRIGHT" | |
809 | Copyright (c) 1997\-2002 Tom Christiansen and Nathan Torkington. | |
810 | All rights reserved. | |
811 | .PP | |
812 | This documentation is free; you can redistribute it and/or modify it | |
813 | under the same terms as Perl itself. | |
814 | .PP | |
815 | Irrespective of its distribution, all code examples in this file | |
816 | are hereby placed into the public domain. You are permitted and | |
817 | encouraged to use this code in your own programs for fun | |
818 | or for profit as you see fit. A simple comment in the code giving | |
819 | credit would be courteous but is not required. |