Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | package Inline::C::ParseRegExp; |
2 | use strict; | |
3 | use Carp; | |
4 | ||
5 | sub register { | |
6 | { | |
7 | extends => [qw(C)], | |
8 | overrides => [qw(get_parser)], | |
9 | } | |
10 | } | |
11 | ||
12 | sub get_parser { | |
13 | bless {}, 'Inline::C::ParseRegExp' | |
14 | } | |
15 | ||
16 | sub code { | |
17 | my($self,$code) = @_; | |
18 | ||
19 | # These regular expressions were derived from Regexp::Common v0.01. | |
20 | my $RE_comment_C = q{(?:(?:\/\*)(?:(?:(?!\*\/)[\s\S])*)(?:\*\/))}; | |
21 | my $RE_comment_Cpp = q{(?:\/\*(?:(?!\*\/)[\s\S])*\*\/|\/\/[^\n]*\n)}; | |
22 | my $RE_quoted = (q{(?:(?:\")(?:[^\\\"]*(?:\\.[^\\\"]*)*)(?:\")} | |
23 | .q{|(?:\')(?:[^\\\']*(?:\\.[^\\\']*)*)(?:\'))}); | |
24 | our $RE_balanced_brackets = | |
25 | qr'(?:[{]((?:(?>[^{}]+)|(??{$RE_balanced_brackets}))*)[}])'; | |
26 | our $RE_balanced_parens = | |
27 | qr'(?:[(]((?:(?>[^()]+)|(??{$RE_balanced_parens}))*)[)])'; | |
28 | ||
29 | # First, we crush out anything potentially confusing. | |
30 | # The order of these _does_ matter. | |
31 | $code =~ s/$RE_comment_C/ /go; | |
32 | $code =~ s/$RE_comment_Cpp/ /go; | |
33 | $code =~ s/^\#.*(\\\n.*)*//mgo; | |
34 | $code =~ s/$RE_quoted/\"\"/go; | |
35 | $code =~ s/$RE_balanced_brackets/{ }/go; | |
36 | ||
37 | $self->{_the_code_most_recently_parsed} = $code; # Simplifies debugging. | |
38 | ||
39 | my $normalize_type = sub { | |
40 | # Normalize a type for lookup in a typemap. | |
41 | my($type) = @_; | |
42 | ||
43 | # Remove "extern". | |
44 | # But keep "static", "inline", "typedef", etc, | |
45 | # to cause desirable typemap misses. | |
46 | $type =~ s/\bextern\b//g; | |
47 | ||
48 | # Whitespace: only single spaces, none leading or trailing. | |
49 | $type =~ s/\s+/ /g; | |
50 | $type =~ s/^\s//; $type =~ s/\s$//; | |
51 | ||
52 | # Adjacent "derivative characters" are not separated by whitespace, | |
53 | # but _are_ separated from the adjoining text. | |
54 | # [ Is really only * (and not ()[]) needed??? ] | |
55 | $type =~ s/\*\s\*/\*\*/g; | |
56 | $type =~ s/(?<=[^ \*])\*/ \*/g; | |
57 | ||
58 | return $type; | |
59 | }; | |
60 | ||
61 | # The decision of what is an acceptable declaration was originally | |
62 | # derived from Inline::C::grammar.pm version 0.30 (Inline 0.43). | |
63 | ||
64 | my $re_plausible_place_to_begin_a_declaration = qr { | |
65 | # The beginning of a line, possibly indented. | |
66 | # (Accepting indentation allows for C code to be aligned with | |
67 | # its surrounding perl, and for backwards compatibility with | |
68 | # Inline 0.43). | |
69 | (?m: ^ ) \s* | |
70 | }xo; | |
71 | ||
72 | # Instead of using \s , we dont tolerate blank lines. | |
73 | # This matches user expectation better than allowing arbitrary | |
74 | # vertical whitespace. | |
75 | my $sp = qr{[ \t]|\n(?![ \t]*\n)}; | |
76 | ||
77 | my $re_type = qr {( | |
78 | (?: \w+ $sp* )+? # words | |
79 | (?: \* $sp* )* # stars | |
80 | )}xo; | |
81 | ||
82 | my $re_identifier = qr{ (\w+) $sp* }xo; | |
83 | ||
84 | while($code =~ m{ | |
85 | $re_plausible_place_to_begin_a_declaration | |
86 | ( $re_type $re_identifier $RE_balanced_parens $sp* (\;|\{) ) | |
87 | }xgo) | |
88 | { | |
89 | my($type, $identifier, $args, $what) = ($2,$3,$4,$5); | |
90 | $args = "" if $args =~ /^\s+$/; | |
91 | ||
92 | my $is_decl = $what eq ';'; | |
93 | my $function = $identifier; | |
94 | my $return_type = &$normalize_type($type); | |
95 | my @arguments = split ',', $args; | |
96 | ||
97 | goto RESYNC if $is_decl && !$self->{data}{AUTOWRAP}; | |
98 | goto RESYNC if $self->{data}{done}{$function}; | |
99 | goto RESYNC if !defined | |
100 | $self->{data}{typeconv}{valid_rtypes}{$return_type}; | |
101 | ||
102 | my(@arg_names,@arg_types); | |
103 | my $dummy_name = 'arg1'; | |
104 | ||
105 | foreach my $arg (@arguments) { | |
106 | ||
107 | if(my($type, $identifier) = | |
108 | $arg =~ /^\s*$re_type(?:$re_identifier)?\s*$/o) | |
109 | { | |
110 | my $arg_name = $identifier; | |
111 | my $arg_type = &$normalize_type($type); | |
112 | ||
113 | if(!defined $arg_name) { | |
114 | goto RESYNC if !$is_decl; | |
115 | $arg_name = $dummy_name++; | |
116 | } | |
117 | goto RESYNC if !defined | |
118 | $self->{data}{typeconv}{valid_types}{$arg_type}; | |
119 | ||
120 | push(@arg_names,$arg_name); | |
121 | push(@arg_types,$arg_type); | |
122 | } | |
123 | elsif($arg =~ /^\s*\.\.\.\s*$/) { | |
124 | push(@arg_names,'...'); | |
125 | push(@arg_types,'...'); | |
126 | } | |
127 | else { | |
128 | goto RESYNC; | |
129 | } | |
130 | } | |
131 | ||
132 | # Commit. | |
133 | push @{$self->{data}{functions}}, $function; | |
134 | $self->{data}{function}{$function}{return_type}= $return_type; | |
135 | $self->{data}{function}{$function}{arg_names} = [@arg_names]; | |
136 | $self->{data}{function}{$function}{arg_types} = [@arg_types]; | |
137 | $self->{data}{done}{$function} = 1; | |
138 | ||
139 | next; | |
140 | ||
141 | RESYNC: # Skip the rest of the current line, and continue. | |
142 | $code =~ /\G[^\n]*\n/gc; | |
143 | } | |
144 | ||
145 | return 1; # We never fail. | |
146 | } | |
147 | ||
148 | 1; | |
149 | ||
150 | __DATA__ | |
151 | ||
152 | =head1 NAME | |
153 | ||
154 | Inline::C::ParseRegExp - The New and Improved Inline::C Parser | |
155 | ||
156 | =head1 SYNOPSIS | |
157 | ||
158 | use Inline C => DATA => | |
159 | USING => ParseRegExp; | |
160 | ||
161 | =head1 DESCRIPTION | |
162 | ||
163 | This module is a much faster version of Inline::C's Parse::RecDescent | |
164 | parser. It is based on regular expressions instead. | |
165 | ||
166 | =head2 AUTHOR | |
167 | ||
168 | Mitchell N Charity <mcharity@vendian.org> | |
169 | ||
170 | =head1 COPYRIGHT | |
171 | ||
172 | Copyright (c) 2002. Brian Ingerson. All rights reserved. | |
173 | ||
174 | This program is free software; you can redistribute it and/or modify it | |
175 | under the same terms as Perl itself. | |
176 | ||
177 | See http://www.perl.com/perl/misc/Artistic.html | |
178 | ||
179 | =cut |