Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / rfc822.pl
CommitLineData
86530b38
AT
1# rfc822.pl -- A perl package to manipulate RFC822 mail headers
2# A. P. Barrett <barrett@ee.und.ac.za>, June 1993
3# $Revision: 1.1 $$Date: 1996/09/17 13:37:40 $
4
5# Synopsis:
6# require 'rfc822.pl';
7#
8# # sample input
9# $string = 'Joe (Random) User <@route:"j.r.l"@host.com>';
10#
11# @toks = &rfc822'tokenise($string);
12# # Convert string to tokens.
13# # In an array context, returns:
14# # ('Joe', '(Random)', 'User', '<', '@', 'route', ':',
15# # '"j.r.l"', '@', 'host', '.', 'com', '>')
16# # Not intended for use in a scalar context, but would return:
17# # 'Joe(Random)User<@route:"j.r.l"@host.com>'
18#
19# $newstring = &rfc822'untokenise(@toks);
20# # Convert tokens to string with minimum white space.
21# # Not intended for use in an array context.
22# # In a scalar context, returns:
23# # 'Joe(Random)User<@route:"j.r.l"@host.com>'
24#
25# @newtoks = &rfc822'uncomment($string);
26# @newtoks = &rfc822'uncomment(@toks);
27# $newstring = &rfc822'uncomment($string);
28# $newstring = &rfc822'uncomment(@toks);
29# # Remove comments.
30# # In an array context, returns:
31# # ('Joe', 'User', '<', '@', 'route', ':',
32# # '"j.r.l"', '@', 'host', '.', 'com', '>')
33# # In a scalar context, returns:
34# # 'Joe User<@route:"j.r.l"@host.com>'
35#
36# @newtoks = &rfc822'first_route_addr($string);
37# @newtoks = &rfc822'first_route_addr(@toks);
38# $newstring = &rfc822'first_route_addr($string);
39# $newstring = &rfc822'first_route_addr(@toks);
40# # Obtain first route-addr or addr-spec.
41# # In an array context, returns:
42# # ('<', '@', 'route', ':',
43# # '"j.r.l"', '@', 'host', '.', 'com', '>')
44# # In a scalar context, returns:
45# # '<@route:"j.r.l"@host.com>'
46#
47# @newtoks = &rfc822'first_addr_spec($string);
48# @newtoks = &rfc822'first_addr_spec(@toks);
49# $newstring = &rfc822'first_addr_spec($string);
50# $newstring = &rfc822'first_addr_spec(@toks);
51# # Obtain first addr-spec.
52# # In an array context, returns:
53# # ('"j.r.l"', '@', 'host', '.', 'com')
54# # In a scalar context, returns:
55# # '"j.r.l"@host.com'
56
57package rfc822;
58
59# Define some variables to help us write regexps.
60$self_delimiters = '<>@,;:.'; # use /[$self_delimiters]/
61$specials = $self_delimiters.'()\\\\"\\[\\]'; # use /[$specials]/
62$quoted_pair = '\\\\.'; # use /$quoted_pair/
63$qp_or_bs_end = $quoted_pair.'|\\\\$'; # use /$qp_or_bs_end/
64
65# Tokenise, per RFC 822.
66#
67# As an extension, allows atoms to contain quoted pairs.
68# The last output token might contain an unterminated quoted pair,
69# comment, domain literal or quoted string.
70# Other output tokens might contain solitary unmatched special characters.
71#
72# Input is a single string.
73# In an array context, output is a list of tokens.
74# In a scalar context, output is a single string (not very useful).
75sub tokenise
76{
77 local ($_) = @_;
78 local (@outtoks);
79 local ($firstchar);
80 local ($comment, $comment_depth);
81
82 while (s/^\s*(\S)/$firstchar = $1/e) {
83 if ($firstchar =~ /[$self_delimiters]/o) {
84 # a special character as a self-delimiting token.
85 s/^(.)//;
86 push (@outtoks, $1);
87 } elsif ($firstchar eq '"') {
88 # a quoted string.
89 # XXX we don't prohibit bare CR.
90 s/^(\"($qp_or_bs_end|[^\\"])*\")//o;
91 push (@outtoks, $1);
92 } elsif ($firstchar eq '[') {
93 # a domain literal.
94 # XXX we don't prohibit bare CR or '['.
95 s/^(\[($qp_or_bs_end|[^\\\]])*(\]|$))//o;
96 push (@outtoks, $1);
97 } elsif ($firstchar eq '(') {
98 # a comment.
99 do {
100 s/^([^()]*([()]|$))//;
101 $comment .= $1;
102 $comment_depth++ if $2 eq '(';
103 $comment_depth-- if $2 eq ')';
104 do {
105 # XXX error recovery for unterminated comment
106 $comment_depth = 0;
107 } if $2 eq '';
108 } until ($comment_depth == 0);
109 push (@outtoks, $comment);
110 } elsif ($firstchar ne '\\' && $firstchar =~ /[$specials]/o) {
111 # an illegal special character.
112 s/^(.)//;
113 push (@outtoks, $1);
114 } else {
115 # should be an atom, which is not allowed to contain
116 # special characters or control characters.
117 # we have already checked for all special chars except
118 # controls and backslash.
119 # XXX we don't check for controls.
120 # XXX we allow a quoted-pair as part of an atom.
121 s/^(($qp_or_bs_end|[^\s$specials])+)//o;
122 push (@outtoks, $1);
123 }
124 }
125
126 # return result
127 wantarray ? @outtoks : &untokenise(@outtoks);
128}
129
130# Convert a list of tokens to a single string.
131#
132# Just pastes the tokens together, with blanks where they are essential.
133#
134# Input is a list of tokens.
135# Output is a single string.
136sub untokenise
137{
138 local ($token, $prevtok);
139 local ($result);
140 local ($prev, $this);
141
142 foreach $token (@_) {
143 # Do we need a space?
144 # A space is essential when both the left and right tokens
145 # are either atoms or quoted strings.
146 # XXX - Spaces are desirable in some other places, but for
147 # now it's too difficult to worry about that. It's
148 # context-dependent anyway -- for example, we sometimes
149 # want spaces after ':' and ',', but not when they appear
150 # inside a route-addr. The tokener has no business knowing
151 # about such details.
152 if ($result ne '') {
153 $prev = substr($prevtok, $[, 1);
154 $this = substr($token, $[, 1);
155 if ( ($this eq '"' || $this !~ /[$specials]/o)
156 && ($prev eq '"' || $prev !~ /[$specials]/o))
157 {
158 $result .= ' ';
159 }
160 }
161 $result .= $token;
162 $prevtok = $token;
163 }
164
165 # return result
166 $result;
167}
168
169# Delete comments.
170#
171# Input can be a single string or a list of tokens.
172# In an array context, output is a list of tokens.
173# In a scalar context, output is a single string.
174sub uncomment
175{
176 local (@intoks) = @_;
177 local (@outtoks);
178 local ($token);
179
180 # tokenise the input if we were given a single string
181 @intoks = &tokenise($intoks[$[]) if $#intoks le $[;
182
183 # delete comment tokens
184 @outtoks = grep (/^[^(]/, @intoks);
185
186 # return result
187 wantarray ? @outtoks : &untokenise(@outtoks);
188}
189
190# Try to extract a single RFC-822 route-addr or addr-spec from a
191# list of addresses.
192#
193# Returns the first route-addr or addr-spec if there are several
194# (for example, if the input is a comma-separated list)..
195# Garbage in, garbage out.
196#
197# Input can be a single string or a list of tokens.
198# In an array context, output is a list of tokens.
199# In a scalar context, output is a single string.
200sub first_route_addr
201{
202 local (@intoks) = @_;
203 local (@outtoks);
204 local ($token, $firstchar);
205 local ($state) = 'start';
206
207 # tokenise the input if we were given a single string
208 @intoks = &tokenise($intoks[$[]) if $#intoks le $[;
209
210 foreach $token (@intoks) {
211 $firstchar = substr($token,0,1);
212 if ($firstchar eq '(') {
213 # ignore comments
214 next;
215 } elsif ($firstchar eq '<') {
216 # '<' is start of route-addr.
217 # discard what came before.
218 $state = 'routeaddr';
219 @outtoks = ($token);
220 } elsif ($firstchar eq ':') {
221 # ':' might be end of phrase for a group,
222 # or might be end of route and start of addr-spec in route-addr.
223 if ($state eq 'routeaddr') {
224 push (@outtoks, $token);
225 } else {
226 $state = 'start';
227 @outtoks = ();
228 }
229 } elsif ($firstchar eq ',') {
230 # ',' might be a separator between addresses
231 # or might be part of a route inside a route-addr.
232 if ($state eq 'routeaddr') {
233 push (@outtoks, $token);
234 } else {
235 $state = 'start';
236 last if $#outtoks ge $[; # we got what we wanted
237 }
238 } elsif ($firstchar eq '>') {
239 # '>' is end of route-addr
240 push (@outtoks, $token);
241 $state = 'end';
242 last; # we got what we wanted
243 } elsif ($firstchar eq ';') {
244 # ';' is end of group
245 $state = 'end';
246 last if $#outtoks ge $[; # we got what we wanted
247 } else {
248 # accumulate valid tokens.
249 push (@outtoks, $token);
250 }
251 }
252
253 # return result
254 wantarray ? @outtoks : &untokenise(@outtoks);
255}
256
257# Try to extract a single RFC-822 addr-spec from a list of addresses.
258#
259# Returns the first addr-spec if there are several.
260# Garbage in, garbage out.
261#
262# Input can be a single string or a list of tokens.
263# In an array context, output is a list of tokens.
264# In a scalar context, output is a single string.
265sub first_addr_spec
266{
267 local (@intoks) = @_;
268 local ($token);
269 local ($i, $startpos, $endpos);
270
271 # Get the first route-addr or addr-spec
272 @intoks = &first_route_addr(@intoks);
273
274 # if starts with '<' then it was a route-addr.
275 # Keep the stuff between the last ':' (if any) and the first '>'.
276 if ($intoks[$[] eq '<') {
277 $startpos = $[+1; # skip the initial '<'
278 $endpos = $#intoks; # don't yet know if there is a final '>'
279 foreach $i ($startpos..$endpos) {
280 $token = $intoks[$i];
281 if ($token eq '>') {
282 $endpos = $i - 1;
283 last;
284 } elsif ($token eq ':') {
285 $startpos = $i + 1;
286 }
287 }
288 }
289 # if it didn't start with '<' then it was an addr-spec
290 else {
291 $startpos = $[;
292 $endpos = $#intoks;
293 }
294
295 # return result
296 wantarray ? @intoks[$startpos..$endpos]
297 : &untokenise(@intoks[$startpos..$endpos]);
298}
299
300# Lame attempt at some standalone test code.
301# I don't know a good way to tell if we were called from 'require'
302# or as a standalone program, so we guess by examining $0.
303if ($0 =~ /(^|\/)rfc822\.pl$/) {
304
305 package main;
306 while (<>) {
307 $string = $_;
308 print "input:\t$string";
309 @toks = &rfc822'tokenise($string);
310 print "tokenise:\n\t", join("\n\t", @toks), "\n";
311 print "untokenise: ", &rfc822'untokenise(@toks), "\n";
312 foreach $op ('uncomment', 'first_route_addr', 'first_addr_spec') {
313 ## just test the scalar to scalar version
314 eval qq[
315 \$newstring = &rfc822'$op(\$string);
316 print "$op:\t", \$newstring, "\n";
317 ];
318 ## test all four permutations
319 ## of scalar and array inputs and outputs
320 # eval qq[
321 # print "$op:\n";
322 # \@newtoks = &rfc822'$op(\$string);
323 # print " s-->a:\n\t", join("\n\t", \@newtoks), "\n";
324 # \$newstring = &rfc822'$op(\$string);
325 # print " s-->s:\t", \$newstring, "\n";
326 # \@newtoks = &rfc822'$op(\@toks);
327 # print " a-->a:\n\t", join("\n\t", \@newtoks), "\n";
328 # \$newstring = &rfc822'$op(\@toks);
329 # print " a-->s:\t", \$newstring, "\n";
330 # ];
331 }
332 }
333 exit 0;
334
335}
336
3371; # for require