Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | package ExtUtils::Constant::Base; |
2 | ||
3 | use strict; | |
4 | use vars qw($VERSION $is_perl56); | |
5 | use Carp; | |
6 | use Text::Wrap; | |
7 | use ExtUtils::Constant::Utils qw(C_stringify perl_stringify); | |
8 | ||
9 | $VERSION = '0.01'; | |
10 | ||
11 | $is_perl56 = ($] < 5.007 && $] > 5.005_50); | |
12 | ||
13 | ||
14 | =head1 NAME | |
15 | ||
16 | ExtUtils::Constant::Base - base class for ExtUtils::Constant objects | |
17 | ||
18 | =head1 SYNOPSIS | |
19 | ||
20 | require ExtUtils::Constant::Base; | |
21 | @ISA = 'ExtUtils::Constant::Base'; | |
22 | ||
23 | =head1 DESCRIPTION | |
24 | ||
25 | ExtUtils::Constant::Base provides a base implementation of methods to | |
26 | generate C code to give fast constant value lookup by named string. Currently | |
27 | it's mostly used ExtUtils::Constant::XS, which generates the lookup code | |
28 | for the constant() subroutine found in many XS modules. | |
29 | ||
30 | =head1 USAGE | |
31 | ||
32 | ExtUtils::Constant::Base exports no subroutines. The following methods are | |
33 | available | |
34 | ||
35 | =over 4 | |
36 | ||
37 | =cut | |
38 | ||
39 | sub valid_type { | |
40 | # Default to assuming that you don't need different types of return data. | |
41 | 1; | |
42 | } | |
43 | sub default_type { | |
44 | ''; | |
45 | } | |
46 | ||
47 | =item header | |
48 | ||
49 | A method returning a scalar containing definitions needed, typically for a | |
50 | C header file. | |
51 | ||
52 | =cut | |
53 | ||
54 | sub header { | |
55 | '' | |
56 | } | |
57 | ||
58 | # This might actually be a return statement. Note that you are responsible | |
59 | # for any space you might need before your value, as it lets to perform | |
60 | # "tricks" such as "return KEY_" and have strings appended. | |
61 | sub assignment_clause_for_type; | |
62 | # In which case this might be an empty string | |
63 | sub return_statement_for_type {undef}; | |
64 | sub return_statement_for_notdef; | |
65 | sub return_statement_for_notfound; | |
66 | ||
67 | # "#if 1" is true to a C pre-processor | |
68 | sub macro_from_name { | |
69 | 1; | |
70 | } | |
71 | ||
72 | sub name_param { | |
73 | 'name'; | |
74 | } | |
75 | ||
76 | # This is possibly buggy, in that it's not mandatory (below, in the main | |
77 | # C_constant parameters, but is expected to exist here, if it's needed) | |
78 | # Buggy because if you're definitely pure 8 bit only, and will never be | |
79 | # presented with your constants in utf8, the default form of C_constant can't | |
80 | # be told not to do the utf8 version. | |
81 | ||
82 | sub is_utf8_param { | |
83 | 'utf8'; | |
84 | } | |
85 | ||
86 | sub memEQ { | |
87 | "!memcmp"; | |
88 | } | |
89 | ||
90 | =item memEQ_clause args_hashref | |
91 | ||
92 | A method to return a suitable C C<if> statement to check whether I<name> | |
93 | is equal to the C variable C<name>. If I<checked_at> is defined, then it | |
94 | is used to avoid C<memEQ> for short names, or to generate a comment to | |
95 | highlight the position of the character in the C<switch> statement. | |
96 | ||
97 | If i<checked_at> is a reference to a scalar, then instead it gives | |
98 | the characters pre-checked at the beginning, (and the number of chars by | |
99 | which the C variable name has been advanced. These need to be chopped from | |
100 | the front of I<name>). | |
101 | ||
102 | =cut | |
103 | ||
104 | sub memEQ_clause { | |
105 | # if (memEQ(name, "thingy", 6)) { | |
106 | # Which could actually be a character comparison or even "" | |
107 | my ($self, $args) = @_; | |
108 | my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)}; | |
109 | $indent = ' ' x ($indent || 4); | |
110 | my $front_chop; | |
111 | if (ref $checked_at) { | |
112 | # regexp won't work on 5.6.1 without use utf8; in turn that won't work | |
113 | # on 5.005_03. | |
114 | substr ($name, 0, length $$checked_at,) = ''; | |
115 | $front_chop = C_stringify ($$checked_at); | |
116 | undef $checked_at; | |
117 | } | |
118 | my $len = length $name; | |
119 | ||
120 | if ($len < 2) { | |
121 | return $indent . "{\n" | |
122 | if (defined $checked_at and $checked_at == 0) or $len == 0; | |
123 | # We didn't switch, drop through to the code for the 2 character string | |
124 | $checked_at = 1; | |
125 | } | |
126 | ||
127 | my $name_param = $self->name_param; | |
128 | ||
129 | if ($len < 3 and defined $checked_at) { | |
130 | my $check; | |
131 | if ($checked_at == 1) { | |
132 | $check = 0; | |
133 | } elsif ($checked_at == 0) { | |
134 | $check = 1; | |
135 | } | |
136 | if (defined $check) { | |
137 | my $char = C_stringify (substr $name, $check, 1); | |
138 | # Placate 5.005 with a break in the string. I can't see a good way of | |
139 | # getting it to not take [ as introducing an array lookup, even with | |
140 | # ${name_param}[$check] | |
141 | return $indent . "if ($name_param" . "[$check] == '$char') {\n"; | |
142 | } | |
143 | } | |
144 | if (($len == 2 and !defined $checked_at) | |
145 | or ($len == 3 and defined ($checked_at) and $checked_at == 2)) { | |
146 | my $char1 = C_stringify (substr $name, 0, 1); | |
147 | my $char2 = C_stringify (substr $name, 1, 1); | |
148 | return $indent . | |
149 | "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n"; | |
150 | } | |
151 | if (($len == 3 and defined ($checked_at) and $checked_at == 1)) { | |
152 | my $char1 = C_stringify (substr $name, 0, 1); | |
153 | my $char2 = C_stringify (substr $name, 2, 1); | |
154 | return $indent . | |
155 | "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n"; | |
156 | } | |
157 | ||
158 | my $pointer = '^'; | |
159 | my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1; | |
160 | if ($have_checked_last) { | |
161 | # Checked at the last character, so no need to memEQ it. | |
162 | $pointer = C_stringify (chop $name); | |
163 | $len--; | |
164 | } | |
165 | ||
166 | $name = C_stringify ($name); | |
167 | my $memEQ = $self->memEQ(); | |
168 | my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n"; | |
169 | # Put a little ^ under the letter we checked at | |
170 | # Screws up for non printable and non-7 bit stuff, but that's too hard to | |
171 | # get right. | |
172 | if (defined $checked_at) { | |
173 | $body .= $indent . "/* " . (' ' x length $memEQ) | |
174 | . (' ' x length $name_param) | |
175 | . (' ' x $checked_at) . $pointer | |
176 | . (' ' x ($len - $checked_at + length $len)) . " */\n"; | |
177 | } elsif (defined $front_chop) { | |
178 | $body .= $indent . "/* $front_chop" | |
179 | . (' ' x ($len + 1 + length $len)) . " */\n"; | |
180 | } | |
181 | return $body; | |
182 | } | |
183 | ||
184 | =item dump_names arg_hashref, ITEM... | |
185 | ||
186 | An internal function to generate the embedded perl code that will regenerate | |
187 | the constant subroutines. I<default_type>, I<types> and I<ITEM>s are the | |
188 | same as for C_constant. I<indent> is treated as number of spaces to indent | |
189 | by. If C<declare_types> is true a C<$types> is always declared in the perl | |
190 | code generated, if defined and false never declared, and if undefined C<$types> | |
191 | is only declared if the values in I<types> as passed in cannot be inferred from | |
192 | I<default_types> and the I<ITEM>s. | |
193 | ||
194 | =cut | |
195 | ||
196 | sub dump_names { | |
197 | my ($self, $args, @items) = @_; | |
198 | my ($default_type, $what, $indent, $declare_types) | |
199 | = @{$args}{qw(default_type what indent declare_types)}; | |
200 | $indent = ' ' x ($indent || 0); | |
201 | ||
202 | my $result; | |
203 | my (@simple, @complex, %used_types); | |
204 | foreach (@items) { | |
205 | my $type; | |
206 | if (ref $_) { | |
207 | $type = $_->{type} || $default_type; | |
208 | if ($_->{utf8}) { | |
209 | # For simplicity always skip the bytes case, and reconstitute this entry | |
210 | # from its utf8 twin. | |
211 | next if $_->{utf8} eq 'no'; | |
212 | # Copy the hashref, as we don't want to mess with the caller's hashref. | |
213 | $_ = {%$_}; | |
214 | unless ($is_perl56) { | |
215 | utf8::decode ($_->{name}); | |
216 | } else { | |
217 | $_->{name} = pack 'U*', unpack 'U0U*', $_->{name}; | |
218 | } | |
219 | delete $_->{utf8}; | |
220 | } | |
221 | } else { | |
222 | $_ = {name=>$_}; | |
223 | $type = $default_type; | |
224 | } | |
225 | $used_types{$type}++; | |
226 | if ($type eq $default_type | |
227 | # grr 5.6.1 | |
228 | and length $_->{name} | |
229 | and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//) | |
230 | and !defined ($_->{macro}) and !defined ($_->{value}) | |
231 | and !defined ($_->{default}) and !defined ($_->{pre}) | |
232 | and !defined ($_->{post}) and !defined ($_->{def_pre}) | |
233 | and !defined ($_->{def_post}) and !defined ($_->{weight})) { | |
234 | # It's the default type, and the name consists only of A-Za-z0-9_ | |
235 | push @simple, $_->{name}; | |
236 | } else { | |
237 | push @complex, $_; | |
238 | } | |
239 | } | |
240 | ||
241 | if (!defined $declare_types) { | |
242 | # Do they pass in any types we weren't already using? | |
243 | foreach (keys %$what) { | |
244 | next if $used_types{$_}; | |
245 | $declare_types++; # Found one in $what that wasn't used. | |
246 | last; # And one is enough to terminate this loop | |
247 | } | |
248 | } | |
249 | if ($declare_types) { | |
250 | $result = $indent . 'my $types = {map {($_, 1)} qw(' | |
251 | . join (" ", sort keys %$what) . ")};\n"; | |
252 | } | |
253 | local $Text::Wrap::huge = 'overflow'; | |
254 | local $Text::Wrap::columns = 80; | |
255 | $result .= wrap ($indent . "my \@names = (qw(", | |
256 | $indent . " ", join (" ", sort @simple) . ")"); | |
257 | if (@complex) { | |
258 | foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) { | |
259 | my $name = perl_stringify $item->{name}; | |
260 | my $line = ",\n$indent {name=>\"$name\""; | |
261 | $line .= ", type=>\"$item->{type}\"" if defined $item->{type}; | |
262 | foreach my $thing (qw (macro value default pre post def_pre def_post)) { | |
263 | my $value = $item->{$thing}; | |
264 | if (defined $value) { | |
265 | if (ref $value) { | |
266 | $line .= ", $thing=>[\"" | |
267 | . join ('", "', map {perl_stringify $_} @$value) . '"]'; | |
268 | } else { | |
269 | $line .= ", $thing=>\"" . perl_stringify($value) . "\""; | |
270 | } | |
271 | } | |
272 | } | |
273 | $line .= "}"; | |
274 | # Ensure that the enclosing C comment doesn't end | |
275 | # by turning */ into *" . "/ | |
276 | $line =~ s!\*\/!\*" . "/!gs; | |
277 | # gcc -Wall doesn't like finding /* inside a comment | |
278 | $line =~ s!\/\*!/" . "\*!gs; | |
279 | $result .= $line; | |
280 | } | |
281 | } | |
282 | $result .= ");\n"; | |
283 | ||
284 | $result; | |
285 | } | |
286 | ||
287 | =item assign arg_hashref, VALUE... | |
288 | ||
289 | A method to return a suitable assignment clause. If I<type> is aggregate | |
290 | (eg I<PVN> expects both pointer and length) then there should be multiple | |
291 | I<VALUE>s for the components. I<pre> and I<post> if defined give snippets | |
292 | of C code to proceed and follow the assignment. I<pre> will be at the start | |
293 | of a block, so variables may be defined in it. | |
294 | ||
295 | =cut | |
296 | # Hmm. value undef to to NOTDEF? value () to do NOTFOUND? | |
297 | ||
298 | sub assign { | |
299 | my $self = shift; | |
300 | my $args = shift; | |
301 | my ($indent, $type, $pre, $post, $item) | |
302 | = @{$args}{qw(indent type pre post item)}; | |
303 | $post ||= ''; | |
304 | my $clause; | |
305 | my $close; | |
306 | if ($pre) { | |
307 | chomp $pre; | |
308 | $close = "$indent}\n"; | |
309 | $clause = $indent . "{\n"; | |
310 | $indent .= " "; | |
311 | $clause .= "$indent$pre"; | |
312 | $clause .= ";" unless $pre =~ /;$/; | |
313 | $clause .= "\n"; | |
314 | } | |
315 | confess "undef \$type" unless defined $type; | |
316 | confess "Can't generate code for type $type" | |
317 | unless $self->valid_type($type); | |
318 | ||
319 | $clause .= join '', map {"$indent$_\n"} | |
320 | $self->assignment_clause_for_type({type=>$type,item=>$item}, @_); | |
321 | chomp $post; | |
322 | if (length $post) { | |
323 | $clause .= "$post"; | |
324 | $clause .= ";" unless $post =~ /;$/; | |
325 | $clause .= "\n"; | |
326 | } | |
327 | my $return = $self->return_statement_for_type($type); | |
328 | $clause .= "$indent$return\n" if defined $return; | |
329 | $clause .= $close if $close; | |
330 | return $clause; | |
331 | } | |
332 | ||
333 | =item return_clause arg_hashref, ITEM | |
334 | ||
335 | A method to return a suitable C<#ifdef> clause. I<ITEM> is a hashref | |
336 | (as passed to C<C_constant> and C<match_clause>. I<indent> is the number | |
337 | of spaces to indent, defaulting to 6. | |
338 | ||
339 | =cut | |
340 | ||
341 | sub return_clause { | |
342 | ||
343 | ##ifdef thingy | |
344 | # *iv_return = thingy; | |
345 | # return PERL_constant_ISIV; | |
346 | ##else | |
347 | # return PERL_constant_NOTDEF; | |
348 | ##endif | |
349 | my ($self, $args, $item) = @_; | |
350 | my $indent = $args->{indent}; | |
351 | ||
352 | my ($name, $value, $macro, $default, $pre, $post, $def_pre, $def_post, $type) | |
353 | = @$item{qw (name value macro default pre post def_pre def_post type)}; | |
354 | $value = $name unless defined $value; | |
355 | $macro = $self->macro_from_name($item) unless defined $macro; | |
356 | # "#if 1" is true to a C pre-processor | |
357 | $macro = 1 if !defined $macro or $macro eq ''; | |
358 | $indent = ' ' x ($indent || 6); | |
359 | unless (defined $type) { | |
360 | # use Data::Dumper; print STDERR Dumper ($item); | |
361 | confess "undef \$type"; | |
362 | } | |
363 | ||
364 | my $clause; | |
365 | ||
366 | ##ifdef thingy | |
367 | if (ref $macro) { | |
368 | $clause = $macro->[0]; | |
369 | } elsif ($macro ne "1") { | |
370 | $clause = "#ifdef $macro\n"; | |
371 | } | |
372 | ||
373 | # *iv_return = thingy; | |
374 | # return PERL_constant_ISIV; | |
375 | $clause | |
376 | .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post, | |
377 | item=>$item}, ref $value ? @$value : $value); | |
378 | ||
379 | if (ref $macro or $macro ne "1") { | |
380 | ##else | |
381 | $clause .= "#else\n"; | |
382 | ||
383 | # return PERL_constant_NOTDEF; | |
384 | if (!defined $default) { | |
385 | my $notdef = $self->return_statement_for_notdef(); | |
386 | $clause .= "$indent$notdef\n" if defined $notdef; | |
387 | } else { | |
388 | my @default = ref $default ? @$default : $default; | |
389 | $type = shift @default; | |
390 | $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, | |
391 | post=>$post, item=>$item}, @default); | |
392 | } | |
393 | ||
394 | ##endif | |
395 | if (ref $macro) { | |
396 | $clause .= $macro->[1]; | |
397 | } else { | |
398 | $clause .= "#endif\n"; | |
399 | } | |
400 | } | |
401 | return $clause; | |
402 | } | |
403 | ||
404 | sub match_clause { | |
405 | # $offset defined if we have checked an offset. | |
406 | my ($self, $args, $item) = @_; | |
407 | my ($offset, $indent) = @{$args}{qw(checked_at indent)}; | |
408 | $indent = ' ' x ($indent || 4); | |
409 | my $body = ''; | |
410 | my ($no, $yes, $either, $name, $inner_indent); | |
411 | if (ref $item eq 'ARRAY') { | |
412 | ($yes, $no) = @$item; | |
413 | $either = $yes || $no; | |
414 | confess "$item is $either expecting hashref in [0] || [1]" | |
415 | unless ref $either eq 'HASH'; | |
416 | $name = $either->{name}; | |
417 | } else { | |
418 | confess "$item->{name} has utf8 flag '$item->{utf8}', should be false" | |
419 | if $item->{utf8}; | |
420 | $name = $item->{name}; | |
421 | $inner_indent = $indent; | |
422 | } | |
423 | ||
424 | $body .= $self->memEQ_clause ({name => $name, checked_at => $offset, | |
425 | indent => length $indent}); | |
426 | # If we've been presented with an arrayref for $item, then the user string | |
427 | # contains in the range 128-255, and we need to check whether it was utf8 | |
428 | # (or not). | |
429 | # In the worst case we have two named constants, where one's name happens | |
430 | # encoded in UTF8 happens to be the same byte sequence as the second's | |
431 | # encoded in (say) ISO-8859-1. | |
432 | # In this case, $yes and $no both have item hashrefs. | |
433 | if ($yes) { | |
434 | $body .= $indent . " if (" . $self->is_utf8_param . ") {\n"; | |
435 | } elsif ($no) { | |
436 | $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n"; | |
437 | } | |
438 | if ($either) { | |
439 | $body .= $self->return_clause ({indent=>4 + length $indent}, $either); | |
440 | if ($yes and $no) { | |
441 | $body .= $indent . " } else {\n"; | |
442 | $body .= $self->return_clause ({indent=>4 + length $indent}, $no); | |
443 | } | |
444 | $body .= $indent . " }\n"; | |
445 | } else { | |
446 | $body .= $self->return_clause ({indent=>2 + length $indent}, $item); | |
447 | } | |
448 | $body .= $indent . "}\n"; | |
449 | } | |
450 | ||
451 | ||
452 | =item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM... | |
453 | ||
454 | An internal method to generate a suitable C<switch> clause, called by | |
455 | C<C_constant> I<ITEM>s are in the hash ref format as given in the description | |
456 | of C<C_constant>, and must all have the names of the same length, given by | |
457 | I<NAMELEN>. I<ITEMHASH> is a reference to a hash, keyed by name, values being | |
458 | the hashrefs in the I<ITEM> list. (No parameters are modified, and there can | |
459 | be keys in the I<ITEMHASH> that are not in the list of I<ITEM>s without | |
460 | causing problems - the hash is passed in to save generating it afresh for | |
461 | each call). | |
462 | ||
463 | =cut | |
464 | ||
465 | sub switch_clause { | |
466 | my ($self, $args, $namelen, $items, @items) = @_; | |
467 | my ($indent, $comment) = @{$args}{qw(indent comment)}; | |
468 | $indent = ' ' x ($indent || 2); | |
469 | ||
470 | local $Text::Wrap::huge = 'overflow'; | |
471 | local $Text::Wrap::columns = 80; | |
472 | ||
473 | my @names = sort map {$_->{name}} @items; | |
474 | my $leader = $indent . '/* '; | |
475 | my $follower = ' ' x length $leader; | |
476 | my $body = $indent . "/* Names all of length $namelen. */\n"; | |
477 | if (defined $comment) { | |
478 | $body = wrap ($leader, $follower, $comment) . "\n"; | |
479 | $leader = $follower; | |
480 | } | |
481 | my @safe_names = @names; | |
482 | foreach (@safe_names) { | |
483 | confess sprintf "Name '$_' is length %d, not $namelen", length | |
484 | unless length == $namelen; | |
485 | # Argh. 5.6.1 | |
486 | # next unless tr/A-Za-z0-9_//c; | |
487 | next if tr/A-Za-z0-9_// == length; | |
488 | $_ = '"' . perl_stringify ($_) . '"'; | |
489 | # Ensure that the enclosing C comment doesn't end | |
490 | # by turning */ into *" . "/ | |
491 | s!\*\/!\*"."/!gs; | |
492 | # gcc -Wall doesn't like finding /* inside a comment | |
493 | s!\/\*!/"."\*!gs; | |
494 | } | |
495 | $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n"; | |
496 | # Figure out what to switch on. | |
497 | # (RMS, Spread of jump table, Position, Hashref) | |
498 | my @best = (1e38, ~0); | |
499 | # Prefer the last character over the others. (As it lets us shorten the | |
500 | # memEQ clause at no cost). | |
501 | foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) { | |
502 | my ($min, $max) = (~0, 0); | |
503 | my %spread; | |
504 | if ($is_perl56) { | |
505 | # Need proper Unicode preserving hash keys for bytes in range 128-255 | |
506 | # here too, for some reason. grr 5.6.1 yet again. | |
507 | tie %spread, 'ExtUtils::Constant::Aaargh56Hash'; | |
508 | } | |
509 | foreach (@names) { | |
510 | my $char = substr $_, $i, 1; | |
511 | my $ord = ord $char; | |
512 | confess "char $ord is out of range" if $ord > 255; | |
513 | $max = $ord if $ord > $max; | |
514 | $min = $ord if $ord < $min; | |
515 | push @{$spread{$char}}, $_; | |
516 | # warn "$_ $char"; | |
517 | } | |
518 | # I'm going to pick the character to split on that minimises the root | |
519 | # mean square of the number of names in each case. Normally this should | |
520 | # be the one with the most keys, but it may pick a 7 where the 8 has | |
521 | # one long linear search. I'm not sure if RMS or just sum of squares is | |
522 | # actually better. | |
523 | # $max and $min are for the tie-breaker if the root mean squares match. | |
524 | # Assuming that the compiler may be building a jump table for the | |
525 | # switch() then try to minimise the size of that jump table. | |
526 | # Finally use < not <= so that if it still ties the earliest part of | |
527 | # the string wins. Because if that passes but the memEQ fails, it may | |
528 | # only need the start of the string to bin the choice. | |
529 | # I think. But I'm micro-optimising. :-) | |
530 | # OK. Trump that. Now favour the last character of the string, before the | |
531 | # rest. | |
532 | my $ss; | |
533 | $ss += @$_ * @$_ foreach values %spread; | |
534 | my $rms = sqrt ($ss / keys %spread); | |
535 | if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) { | |
536 | @best = ($rms, $max - $min, $i, \%spread); | |
537 | } | |
538 | } | |
539 | confess "Internal error. Failed to pick a switch point for @names" | |
540 | unless defined $best[2]; | |
541 | # use Data::Dumper; print Dumper (@best); | |
542 | my ($offset, $best) = @best[2,3]; | |
543 | $body .= $indent . "/* Offset $offset gives the best switch position. */\n"; | |
544 | ||
545 | my $do_front_chop = $offset == 0 && $namelen > 2; | |
546 | if ($do_front_chop) { | |
547 | $body .= $indent . "switch (*" . $self->name_param() . "++) {\n"; | |
548 | } else { | |
549 | $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n"; | |
550 | } | |
551 | foreach my $char (sort keys %$best) { | |
552 | confess sprintf "'$char' is %d bytes long, not 1", length $char | |
553 | if length ($char) != 1; | |
554 | confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255; | |
555 | $body .= $indent . "case '" . C_stringify ($char) . "':\n"; | |
556 | foreach my $thisone (sort { | |
557 | # Deal with the case of an item actually being an array ref to 1 or 2 | |
558 | # hashrefs. Don't assign to $a or $b, as they're aliases to the orignal | |
559 | my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a; | |
560 | my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b; | |
561 | # Sort by weight first | |
562 | ($r->{weight} || 0) <=> ($l->{weight} || 0) | |
563 | # Sort equal weights by name | |
564 | or $l->{name} cmp $r->{name}} | |
565 | # If this looks evil, maybe it is. $items is a | |
566 | # hashref, and we're doing a hash slice on it | |
567 | @{$items}{@{$best->{$char}}}) { | |
568 | # warn "You are here"; | |
569 | if ($do_front_chop) { | |
570 | $body .= $self->match_clause ({indent => 2 + length $indent, | |
571 | checked_at => \$char}, $thisone); | |
572 | } else { | |
573 | $body .= $self->match_clause ({indent => 2 + length $indent, | |
574 | checked_at => $offset}, $thisone); | |
575 | } | |
576 | } | |
577 | $body .= $indent . " break;\n"; | |
578 | } | |
579 | $body .= $indent . "}\n"; | |
580 | return $body; | |
581 | } | |
582 | ||
583 | sub C_constant_return_type { | |
584 | "static int"; | |
585 | } | |
586 | ||
587 | sub C_constant_prefix_param { | |
588 | ''; | |
589 | } | |
590 | ||
591 | sub C_constant_prefix_param_defintion { | |
592 | ''; | |
593 | } | |
594 | ||
595 | sub name_param_definition { | |
596 | "const char *" . $_[0]->name_param; | |
597 | } | |
598 | ||
599 | sub namelen_param { | |
600 | 'len'; | |
601 | } | |
602 | ||
603 | sub namelen_param_definition { | |
604 | 'size_t ' . $_[0]->namelen_param; | |
605 | } | |
606 | ||
607 | sub C_constant_other_params { | |
608 | ''; | |
609 | } | |
610 | ||
611 | sub C_constant_other_params_defintion { | |
612 | ''; | |
613 | } | |
614 | ||
615 | =item params WHAT | |
616 | ||
617 | An "internal" method, subject to change, currently called to allow an | |
618 | overriding class to cache information that will then be passed into all | |
619 | the C<*param*> calls. (Yes, having to read the source to make sense of this is | |
620 | considered a known bug). I<WHAT> is be a hashref of types the constant | |
621 | function will return. In ExtUtils::Constant::XS this method is used to | |
622 | returns a hashref keyed IV NV PV SV to show which combination of pointers will | |
623 | be needed in the C argument list generated by | |
624 | C_constant_other_params_definition and C_constant_other_params | |
625 | ||
626 | =cut | |
627 | ||
628 | sub params { | |
629 | ''; | |
630 | } | |
631 | ||
632 | ||
633 | =item dogfood arg_hashref, ITEM... | |
634 | ||
635 | An internal function to generate the embedded perl code that will regenerate | |
636 | the constant subroutines. Parameters are the same as for C_constant. | |
637 | ||
638 | Currently the base class does nothing and returns an empty string. | |
639 | ||
640 | =cut | |
641 | ||
642 | sub dogfood { | |
643 | '' | |
644 | } | |
645 | ||
646 | =item C_constant arg_hashref, ITEM... | |
647 | ||
648 | A function that returns a B<list> of C subroutine definitions that return | |
649 | the value and type of constants when passed the name by the XS wrapper. | |
650 | I<ITEM...> gives a list of constant names. Each can either be a string, | |
651 | which is taken as a C macro name, or a reference to a hash with the following | |
652 | keys | |
653 | ||
654 | =over 8 | |
655 | ||
656 | =item name | |
657 | ||
658 | The name of the constant, as seen by the perl code. | |
659 | ||
660 | =item type | |
661 | ||
662 | The type of the constant (I<IV>, I<NV> etc) | |
663 | ||
664 | =item value | |
665 | ||
666 | A C expression for the value of the constant, or a list of C expressions if | |
667 | the type is aggregate. This defaults to the I<name> if not given. | |
668 | ||
669 | =item macro | |
670 | ||
671 | The C pre-processor macro to use in the C<#ifdef>. This defaults to the | |
672 | I<name>, and is mainly used if I<value> is an C<enum>. If a reference an | |
673 | array is passed then the first element is used in place of the C<#ifdef> | |
674 | line, and the second element in place of the C<#endif>. This allows | |
675 | pre-processor constructions such as | |
676 | ||
677 | #if defined (foo) | |
678 | #if !defined (bar) | |
679 | ... | |
680 | #endif | |
681 | #endif | |
682 | ||
683 | to be used to determine if a constant is to be defined. | |
684 | ||
685 | A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif> | |
686 | test is omitted. | |
687 | ||
688 | =item default | |
689 | ||
690 | Default value to use (instead of C<croak>ing with "your vendor has not | |
691 | defined...") to return if the macro isn't defined. Specify a reference to | |
692 | an array with type followed by value(s). | |
693 | ||
694 | =item pre | |
695 | ||
696 | C code to use before the assignment of the value of the constant. This allows | |
697 | you to use temporary variables to extract a value from part of a C<struct> | |
698 | and return this as I<value>. This C code is places at the start of a block, | |
699 | so you can declare variables in it. | |
700 | ||
701 | =item post | |
702 | ||
703 | C code to place between the assignment of value (to a temporary) and the | |
704 | return from the function. This allows you to clear up anything in I<pre>. | |
705 | Rarely needed. | |
706 | ||
707 | =item def_pre | |
708 | ||
709 | =item def_post | |
710 | ||
711 | Equivalents of I<pre> and I<post> for the default value. | |
712 | ||
713 | =item utf8 | |
714 | ||
715 | Generated internally. Is zero or undefined if name is 7 bit ASCII, | |
716 | "no" if the name is 8 bit (and so should only match if SvUTF8() is false), | |
717 | "yes" if the name is utf8 encoded. | |
718 | ||
719 | The internals automatically clone any name with characters 128-255 but none | |
720 | 256+ (ie one that could be either in bytes or utf8) into a second entry | |
721 | which is utf8 encoded. | |
722 | ||
723 | =item weight | |
724 | ||
725 | Optional sorting weight for names, to determine the order of | |
726 | linear testing when multiple names fall in the same case of a switch clause. | |
727 | Higher comes earlier, undefined defaults to zero. | |
728 | ||
729 | =back | |
730 | ||
731 | In the argument hashref, I<package> is the name of the package, and is only | |
732 | used in comments inside the generated C code. I<subname> defaults to | |
733 | C<constant> if undefined. | |
734 | ||
735 | I<default_type> is the type returned by C<ITEM>s that don't specify their | |
736 | type. It defaults to the value of C<default_type()>. I<types> should be given | |
737 | either as a comma separated list of types that the C subroutine I<subname> | |
738 | will generate or as a reference to a hash. I<default_type> will be added to | |
739 | the list if not present, as will any types given in the list of I<ITEM>s. The | |
740 | resultant list should be the same list of types that C<XS_constant> is | |
741 | given. [Otherwise C<XS_constant> and C<C_constant> may differ in the number of | |
742 | parameters to the constant function. I<indent> is currently unused and | |
743 | ignored. In future it may be used to pass in information used to change the C | |
744 | indentation style used.] The best way to maintain consistency is to pass in a | |
745 | hash reference and let this function update it. | |
746 | ||
747 | I<breakout> governs when child functions of I<subname> are generated. If there | |
748 | are I<breakout> or more I<ITEM>s with the same length of name, then the code | |
749 | to switch between them is placed into a function named I<subname>_I<len>, for | |
750 | example C<constant_5> for names 5 characters long. The default I<breakout> is | |
751 | 3. A single C<ITEM> is always inlined. | |
752 | ||
753 | =cut | |
754 | ||
755 | # The parameter now BREAKOUT was previously documented as: | |
756 | # | |
757 | # I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of | |
758 | # this length, and that the constant name passed in by perl is checked and | |
759 | # also of this length. It is used during recursion, and should be C<undef> | |
760 | # unless the caller has checked all the lengths during code generation, and | |
761 | # the generated subroutine is only to be called with a name of this length. | |
762 | # | |
763 | # As you can see it now performs this function during recursion by being a | |
764 | # scalar reference. | |
765 | ||
766 | sub C_constant { | |
767 | my ($self, $args, @items) = @_; | |
768 | my ($package, $subname, $default_type, $what, $indent, $breakout) = | |
769 | @{$args}{qw(package subname default_type types indent breakout)}; | |
770 | $package ||= 'Foo'; | |
771 | $subname ||= 'constant'; | |
772 | # I'm not using this. But a hashref could be used for full formatting without | |
773 | # breaking this API | |
774 | # $indent ||= 0; | |
775 | ||
776 | my ($namelen, $items); | |
777 | if (ref $breakout) { | |
778 | # We are called recursively. We trust @items to be normalised, $what to | |
779 | # be a hashref, and pinch %$items from our parent to save recalculation. | |
780 | ($namelen, $items) = @$breakout; | |
781 | } else { | |
782 | if ($is_perl56) { | |
783 | # Need proper Unicode preserving hash keys. | |
784 | require ExtUtils::Constant::Aaargh56Hash; | |
785 | $items = {}; | |
786 | tie %$items, 'ExtUtils::Constant::Aaargh56Hash'; | |
787 | } | |
788 | $breakout ||= 3; | |
789 | $default_type ||= $self->default_type(); | |
790 | if (!ref $what) { | |
791 | # Convert line of the form IV,UV,NV to hash | |
792 | $what = {map {$_ => 1} split /,\s*/, ($what || '')}; | |
793 | # Figure out what types we're dealing with, and assign all unknowns to the | |
794 | # default type | |
795 | } | |
796 | my @new_items; | |
797 | foreach my $orig (@items) { | |
798 | my ($name, $item); | |
799 | if (ref $orig) { | |
800 | # Make a copy which is a normalised version of the ref passed in. | |
801 | $name = $orig->{name}; | |
802 | my ($type, $macro, $value) = @$orig{qw (type macro value)}; | |
803 | $type ||= $default_type; | |
804 | $what->{$type} = 1; | |
805 | $item = {name=>$name, type=>$type}; | |
806 | ||
807 | undef $macro if defined $macro and $macro eq $name; | |
808 | $item->{macro} = $macro if defined $macro; | |
809 | undef $value if defined $value and $value eq $name; | |
810 | $item->{value} = $value if defined $value; | |
811 | foreach my $key (qw(default pre post def_pre def_post weight)) { | |
812 | my $value = $orig->{$key}; | |
813 | $item->{$key} = $value if defined $value; | |
814 | # warn "$key $value"; | |
815 | } | |
816 | } else { | |
817 | $name = $orig; | |
818 | $item = {name=>$name, type=>$default_type}; | |
819 | $what->{$default_type} = 1; | |
820 | } | |
821 | warn +(ref ($self) || $self) | |
822 | . "doesn't know how to handle values of type $_ used in macro $name" | |
823 | unless $self->valid_type ($item->{type}); | |
824 | # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c | |
825 | # doesn't work. Upgrade to 5.8 | |
826 | # if ($name !~ tr/\0-\177//c || $] < 5.005_50) { | |
827 | if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50) { | |
828 | # No characters outside 7 bit ASCII. | |
829 | if (exists $items->{$name}) { | |
830 | die "Multiple definitions for macro $name"; | |
831 | } | |
832 | $items->{$name} = $item; | |
833 | } else { | |
834 | # No characters outside 8 bit. This is hardest. | |
835 | if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') { | |
836 | confess "Unexpected ASCII definition for macro $name"; | |
837 | } | |
838 | # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/; | |
839 | # if ($name !~ tr/\0-\377//c) { | |
840 | if ($name =~ tr/\0-\377// == length $name) { | |
841 | # if ($] < 5.007) { | |
842 | # $name = pack "C*", unpack "U*", $name; | |
843 | # } | |
844 | $item->{utf8} = 'no'; | |
845 | $items->{$name}[1] = $item; | |
846 | push @new_items, $item; | |
847 | # Copy item, to create the utf8 variant. | |
848 | $item = {%$item}; | |
849 | } | |
850 | # Encode the name as utf8 bytes. | |
851 | unless ($is_perl56) { | |
852 | utf8::encode($name); | |
853 | } else { | |
854 | # warn "Was >$name< " . length ${name}; | |
855 | $name = pack 'C*', unpack 'C*', $name . pack 'U*'; | |
856 | # warn "Now '${name}' " . length ${name}; | |
857 | } | |
858 | if ($items->{$name}[0]) { | |
859 | die "Multiple definitions for macro $name"; | |
860 | } | |
861 | $item->{utf8} = 'yes'; | |
862 | $item->{name} = $name; | |
863 | $items->{$name}[0] = $item; | |
864 | # We have need for the utf8 flag. | |
865 | $what->{''} = 1; | |
866 | } | |
867 | push @new_items, $item; | |
868 | } | |
869 | @items = @new_items; | |
870 | # use Data::Dumper; print Dumper @items; | |
871 | } | |
872 | my $params = $self->params ($what); | |
873 | ||
874 | # Probably "static int" | |
875 | my ($body, @subs); | |
876 | $body = $self->C_constant_return_type($params) . "\n$subname (" | |
877 | # Eg "pTHX_ " | |
878 | . $self->C_constant_prefix_param_defintion($params) | |
879 | # Probably "const char *name" | |
880 | . $self->name_param_definition($params); | |
881 | # Something like ", STRLEN len" | |
882 | $body .= ", " . $self->namelen_param_definition($params) | |
883 | unless defined $namelen; | |
884 | $body .= $self->C_constant_other_params_defintion($params); | |
885 | $body .= ") {\n"; | |
886 | ||
887 | if (defined $namelen) { | |
888 | # We are a child subroutine. Print the simple description | |
889 | my $comment = 'When generated this function returned values for the list' | |
890 | . ' of names given here. However, subsequent manual editing may have' | |
891 | . ' added or removed some.'; | |
892 | $body .= $self->switch_clause ({indent=>2, comment=>$comment}, | |
893 | $namelen, $items, @items); | |
894 | } else { | |
895 | # We are the top level. | |
896 | $body .= " /* Initially switch on the length of the name. */\n"; | |
897 | $body .= $self->dogfood ({package => $package, subname => $subname, | |
898 | default_type => $default_type, what => $what, | |
899 | indent => $indent, breakout => $breakout}, | |
900 | @items); | |
901 | $body .= ' switch ('.$self->namelen_param().") {\n"; | |
902 | # Need to group names of the same length | |
903 | my @by_length; | |
904 | foreach (@items) { | |
905 | push @{$by_length[length $_->{name}]}, $_; | |
906 | } | |
907 | foreach my $i (0 .. $#by_length) { | |
908 | next unless $by_length[$i]; # None of this length | |
909 | $body .= " case $i:\n"; | |
910 | if (@{$by_length[$i]} == 1) { | |
911 | my $only_thing = $by_length[$i]->[0]; | |
912 | if ($only_thing->{utf8}) { | |
913 | if ($only_thing->{utf8} eq 'yes') { | |
914 | # With utf8 on flag item is passed in element 0 | |
915 | $body .= $self->match_clause (undef, [$only_thing]); | |
916 | } else { | |
917 | # With utf8 off flag item is passed in element 1 | |
918 | $body .= $self->match_clause (undef, [undef, $only_thing]); | |
919 | } | |
920 | } else { | |
921 | $body .= $self->match_clause (undef, $only_thing); | |
922 | } | |
923 | } elsif (@{$by_length[$i]} < $breakout) { | |
924 | $body .= $self->switch_clause ({indent=>4}, | |
925 | $i, $items, @{$by_length[$i]}); | |
926 | } else { | |
927 | # Only use the minimal set of parameters actually needed by the types | |
928 | # of the names of this length. | |
929 | my $what = {}; | |
930 | foreach (@{$by_length[$i]}) { | |
931 | $what->{$_->{type}} = 1; | |
932 | $what->{''} = 1 if $_->{utf8}; | |
933 | } | |
934 | $params = $self->params ($what); | |
935 | push @subs, $self->C_constant ({package=>$package, | |
936 | subname=>"${subname}_$i", | |
937 | default_type => $default_type, | |
938 | types => $what, indent => $indent, | |
939 | breakout => [$i, $items]}, | |
940 | @{$by_length[$i]}); | |
941 | $body .= " return ${subname}_$i (" | |
942 | # Eg "aTHX_ " | |
943 | . $self->C_constant_prefix_param($params) | |
944 | # Probably "name" | |
945 | . $self->name_param($params); | |
946 | $body .= $self->C_constant_other_params($params); | |
947 | $body .= ");\n"; | |
948 | } | |
949 | $body .= " break;\n"; | |
950 | } | |
951 | $body .= " }\n"; | |
952 | } | |
953 | my $notfound = $self->return_statement_for_notfound(); | |
954 | $body .= " $notfound\n" if $notfound; | |
955 | $body .= "}\n"; | |
956 | return (@subs, $body); | |
957 | } | |
958 | ||
959 | 1; | |
960 | __END__ | |
961 | ||
962 | =back | |
963 | ||
964 | =head1 BUGS | |
965 | ||
966 | Not everything is documented yet. | |
967 | ||
968 | Probably others. | |
969 | ||
970 | =head1 AUTHOR | |
971 | ||
972 | Nicholas Clark <nick@ccl4.org> based on the code in C<h2xs> by Larry Wall and | |
973 | others |