Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | |
2 | package Locale::Maketext::Guts; | |
3 | BEGIN { *zorp = sub { return scalar @_ } unless defined &zorp; } | |
4 | # Just so we're nice and define SOMETHING in "our" package. | |
5 | ||
6 | package Locale::Maketext; | |
7 | use strict; | |
8 | use vars qw($USE_LITERALS $GUTSPATH); | |
9 | ||
10 | BEGIN { | |
11 | $GUTSPATH = __FILE__; | |
12 | *DEBUG = sub () {0} unless defined &DEBUG; | |
13 | } | |
14 | ||
15 | use utf8; | |
16 | ||
17 | sub _compile { | |
18 | # This big scary routine compiles an entry. | |
19 | # It returns either a coderef if there's brackety bits in this, or | |
20 | # otherwise a ref to a scalar. | |
21 | ||
22 | my $target = ref($_[0]) || $_[0]; | |
23 | ||
24 | my(@code); | |
25 | my(@c) = (''); # "chunks" -- scratch. | |
26 | my $call_count = 0; | |
27 | my $big_pile = ''; | |
28 | { | |
29 | my $in_group = 0; # start out outside a group | |
30 | my($m, @params); # scratch | |
31 | ||
32 | while($_[1] =~ # Iterate over chunks. | |
33 | m<\G( | |
34 | [^\~\[\]]+ # non-~[] stuff | |
35 | | | |
36 | ~. # ~[, ~], ~~, ~other | |
37 | | | |
38 | \[ # [ presumably opening a group | |
39 | | | |
40 | \] # ] presumably closing a group | |
41 | | | |
42 | ~ # terminal ~ ? | |
43 | | | |
44 | $ | |
45 | )>xgs | |
46 | ) { | |
47 | print " \"$1\"\n" if DEBUG > 2; | |
48 | ||
49 | if($1 eq '[' or $1 eq '') { # "[" or end | |
50 | # Whether this is "[" or end, force processing of any | |
51 | # preceding literal. | |
52 | if($in_group) { | |
53 | if($1 eq '') { | |
54 | $target->_die_pointing($_[1], "Unterminated bracket group"); | |
55 | } else { | |
56 | $target->_die_pointing($_[1], "You can't nest bracket groups"); | |
57 | } | |
58 | } else { | |
59 | if($1 eq '') { | |
60 | print " [end-string]\n" if DEBUG > 2; | |
61 | } else { | |
62 | $in_group = 1; | |
63 | } | |
64 | die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity | |
65 | if(length $c[-1]) { | |
66 | # Now actually processing the preceding literal | |
67 | $big_pile .= $c[-1]; | |
68 | if($USE_LITERALS and ( | |
69 | (ord('A') == 65) | |
70 | ? $c[-1] !~ m<[^\x20-\x7E]>s | |
71 | # ASCII very safe chars | |
72 | : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s | |
73 | # EBCDIC very safe chars | |
74 | )) { | |
75 | # normal case -- all very safe chars | |
76 | $c[-1] =~ s/'/\\'/g; | |
77 | push @code, q{ '} . $c[-1] . "',\n"; | |
78 | $c[-1] = ''; # reuse this slot | |
79 | } else { | |
80 | push @code, ' $c[' . $#c . "],\n"; | |
81 | push @c, ''; # new chunk | |
82 | } | |
83 | } | |
84 | # else just ignore the empty string. | |
85 | } | |
86 | ||
87 | } elsif($1 eq ']') { # "]" | |
88 | # close group -- go back in-band | |
89 | if($in_group) { | |
90 | $in_group = 0; | |
91 | ||
92 | print " --Closing group [$c[-1]]\n" if DEBUG > 2; | |
93 | ||
94 | # And now process the group... | |
95 | ||
96 | if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { | |
97 | DEBUG > 2 and print " -- (Ignoring)\n"; | |
98 | $c[-1] = ''; # reset out chink | |
99 | next; | |
100 | } | |
101 | ||
102 | #$c[-1] =~ s/^\s+//s; | |
103 | #$c[-1] =~ s/\s+$//s; | |
104 | ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/ | |
105 | ||
106 | # A bit of a hack -- we've turned "~,"'s into DELs, so turn | |
107 | # 'em into real commas here. | |
108 | if (ord('A') == 65) { # ASCII, etc | |
109 | foreach($m, @params) { tr/\x7F/,/ } | |
110 | } else { # EBCDIC (1047, 0037, POSIX-BC) | |
111 | # Thanks to Peter Prymmer for the EBCDIC handling | |
112 | foreach($m, @params) { tr/\x07/,/ } | |
113 | } | |
114 | ||
115 | # Special-case handling of some method names: | |
116 | if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) { | |
117 | # Treat [_1,...] as [,_1,...], etc. | |
118 | unshift @params, $m; | |
119 | $m = ''; | |
120 | } elsif($m eq '*') { | |
121 | $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" | |
122 | } elsif($m eq '#') { | |
123 | $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" | |
124 | } | |
125 | ||
126 | # Most common case: a simple, legal-looking method name | |
127 | if($m eq '') { | |
128 | # 0-length method name means to just interpolate: | |
129 | push @code, ' ('; | |
130 | } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s | |
131 | and $m !~ m<(?:^|\:)\d>s | |
132 | # exclude starting a (sub)package or symbol with a digit | |
133 | ) { | |
134 | # Yes, it even supports the demented (and undocumented?) | |
135 | # $obj->Foo::bar(...) syntax. | |
136 | $target->_die_pointing( | |
137 | $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method", | |
138 | 2 + length($c[-1]) | |
139 | ) | |
140 | if $m =~ m/^SUPER::/s; | |
141 | # Because for SUPER:: to work, we'd have to compile this into | |
142 | # the right package, and that seems just not worth the bother, | |
143 | # unless someone convinces me otherwise. | |
144 | ||
145 | push @code, ' $_[0]->' . $m . '('; | |
146 | } else { | |
147 | # TODO: implement something? or just too icky to consider? | |
148 | $target->_die_pointing( | |
149 | $_[1], | |
150 | "Can't use \"$m\" as a method name in bracket group", | |
151 | 2 + length($c[-1]) | |
152 | ); | |
153 | } | |
154 | ||
155 | pop @c; # we don't need that chunk anymore | |
156 | ++$call_count; | |
157 | ||
158 | foreach my $p (@params) { | |
159 | if($p eq '_*') { | |
160 | # Meaning: all parameters except $_[0] | |
161 | $code[-1] .= ' @_[1 .. $#_], '; | |
162 | # and yes, that does the right thing for all @_ < 3 | |
163 | } elsif($p =~ m<^_(-?\d+)$>s) { | |
164 | # _3 meaning $_[3] | |
165 | $code[-1] .= '$_[' . (0 + $1) . '], '; | |
166 | } elsif($USE_LITERALS and ( | |
167 | (ord('A') == 65) | |
168 | ? $p !~ m<[^\x20-\x7E]>s | |
169 | # ASCII very safe chars | |
170 | : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s | |
171 | # EBCDIC very safe chars | |
172 | )) { | |
173 | # Normal case: a literal containing only safe characters | |
174 | $p =~ s/'/\\'/g; | |
175 | $code[-1] .= q{'} . $p . q{', }; | |
176 | } else { | |
177 | # Stow it on the chunk-stack, and just refer to that. | |
178 | push @c, $p; | |
179 | push @code, ' $c[' . $#c . "], "; | |
180 | } | |
181 | } | |
182 | $code[-1] .= "),\n"; | |
183 | ||
184 | push @c, ''; | |
185 | } else { | |
186 | $target->_die_pointing($_[1], "Unbalanced ']'"); | |
187 | } | |
188 | ||
189 | } elsif(substr($1,0,1) ne '~') { | |
190 | # it's stuff not containing "~" or "[" or "]" | |
191 | # i.e., a literal blob | |
192 | $c[-1] .= $1; | |
193 | ||
194 | } elsif($1 eq '~~') { # "~~" | |
195 | $c[-1] .= '~'; | |
196 | ||
197 | } elsif($1 eq '~[') { # "~[" | |
198 | $c[-1] .= '['; | |
199 | ||
200 | } elsif($1 eq '~]') { # "~]" | |
201 | $c[-1] .= ']'; | |
202 | ||
203 | } elsif($1 eq '~,') { # "~," | |
204 | if($in_group) { | |
205 | # This is a hack, based on the assumption that no-one will actually | |
206 | # want a DEL inside a bracket group. Let's hope that's it's true. | |
207 | if (ord('A') == 65) { # ASCII etc | |
208 | $c[-1] .= "\x7F"; | |
209 | } else { # EBCDIC (cp 1047, 0037, POSIX-BC) | |
210 | $c[-1] .= "\x07"; | |
211 | } | |
212 | } else { | |
213 | $c[-1] .= '~,'; | |
214 | } | |
215 | ||
216 | } elsif($1 eq '~') { # possible only at string-end, it seems. | |
217 | $c[-1] .= '~'; | |
218 | ||
219 | } else { | |
220 | # It's a "~X" where X is not a special character. | |
221 | # Consider it a literal ~ and X. | |
222 | $c[-1] .= $1; | |
223 | } | |
224 | } | |
225 | } | |
226 | ||
227 | if($call_count) { | |
228 | undef $big_pile; # Well, nevermind that. | |
229 | } else { | |
230 | # It's all literals! Ahwell, that can happen. | |
231 | # So don't bother with the eval. Return a SCALAR reference. | |
232 | return \$big_pile; | |
233 | } | |
234 | ||
235 | die "Last chunk isn't null??" if @c and length $c[-1]; # sanity | |
236 | print scalar(@c), " chunks under closure\n" if DEBUG; | |
237 | if(@code == 0) { # not possible? | |
238 | print "Empty code\n" if DEBUG; | |
239 | return \''; | |
240 | } elsif(@code > 1) { # most cases, presumably! | |
241 | unshift @code, "join '',\n"; | |
242 | } | |
243 | unshift @code, "use strict; sub {\n"; | |
244 | push @code, "}\n"; | |
245 | ||
246 | print @code if DEBUG; | |
247 | my $sub = eval(join '', @code); | |
248 | die "$@ while evalling" . join('', @code) if $@; # Should be impossible. | |
249 | return $sub; | |
250 | } | |
251 | ||
252 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
253 | ||
254 | sub _die_pointing { | |
255 | # This is used by _compile to throw a fatal error | |
256 | my $target = shift; # class name | |
257 | # ...leaving $_[0] the error-causing text, and $_[1] the error message | |
258 | ||
259 | my $i = index($_[0], "\n"); | |
260 | ||
261 | my $pointy; | |
262 | my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; | |
263 | if($pos < 1) { | |
264 | $pointy = "^=== near there\n"; | |
265 | } else { # we need to space over | |
266 | my $first_tab = index($_[0], "\t"); | |
267 | if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { | |
268 | # No tabs, or the first tab is harmlessly after where we will point to, | |
269 | # AND we're far enough from the margin that we can draw a proper arrow. | |
270 | $pointy = ('=' x $pos) . "^ near there\n"; | |
271 | } else { | |
272 | # tabs screw everything up! | |
273 | $pointy = substr($_[0],0,$pos); | |
274 | $pointy =~ tr/\t //cd; | |
275 | # make everything into whitespace, but preseving tabs | |
276 | $pointy .= "^=== near there\n"; | |
277 | } | |
278 | } | |
279 | ||
280 | my $errmsg = "$_[1], in\:\n$_[0]"; | |
281 | ||
282 | if($i == -1) { | |
283 | # No newline. | |
284 | $errmsg .= "\n" . $pointy; | |
285 | } elsif($i == (length($_[0]) - 1) ) { | |
286 | # Already has a newline at end. | |
287 | $errmsg .= $pointy; | |
288 | } else { | |
289 | # don't bother with the pointy bit, I guess. | |
290 | } | |
291 | Carp::croak( "$errmsg via $target, as used" ); | |
292 | } | |
293 | ||
294 | 1; | |
295 |