[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / Inline / C / ParseRegExp.pm

package Inline::C::ParseRegExp;
use strict;
use Carp;

sub register {
    {
     extends => [qw(C)],
     overrides => [qw(get_parser)],
    }
}

sub get_parser {
    bless {}, 'Inline::C::ParseRegExp'
}

sub code {
    my($self,$code) = @_;
    
    # These regular expressions were derived from Regexp::Common v0.01.
    my $RE_comment_C   = q{(?:(?:\/\*)(?:(?:(?!\*\/)[\s\S])*)(?:\*\/))};
    my $RE_comment_Cpp = q{(?:\/\*(?:(?!\*\/)[\s\S])*\*\/|\/\/[^\n]*\n)};
    my $RE_quoted      = (q{(?:(?:\")(?:[^\\\"]*(?:\\.[^\\\"]*)*)(?:\")}
                         .q{|(?:\')(?:[^\\\']*(?:\\.[^\\\']*)*)(?:\'))});
    our $RE_balanced_brackets =
        qr'(?:[{]((?:(?>[^{}]+)|(??{$RE_balanced_brackets}))*)[}])';
    our $RE_balanced_parens   =
        qr'(?:[(]((?:(?>[^()]+)|(??{$RE_balanced_parens}))*)[)])';

    # First, we crush out anything potentially confusing.
    # The order of these _does_ matter.
    $code =~ s/$RE_comment_C/ /go;
    $code =~ s/$RE_comment_Cpp/ /go;
    $code =~ s/^\#.*(\\\n.*)*//mgo;
    $code =~ s/$RE_quoted/\"\"/go;
    $code =~ s/$RE_balanced_brackets/{ }/go;
    
    $self->{_the_code_most_recently_parsed} = $code; # Simplifies debugging.

    my $normalize_type = sub {
	# Normalize a type for lookup in a typemap.
        my($type) = @_;

        # Remove "extern".
        # But keep "static", "inline", "typedef", etc,
        #  to cause desirable typemap misses.
        $type =~ s/\bextern\b//g;

        # Whitespace: only single spaces, none leading or trailing.
        $type =~ s/\s+/ /g;
        $type =~ s/^\s//; $type =~ s/\s$//;

        # Adjacent "derivative characters" are not separated by whitespace,
        # but _are_ separated from the adjoining text.
        # [ Is really only * (and not ()[]) needed??? ]
        $type =~ s/\*\s\*/\*\*/g;
        $type =~ s/(?<=[^ \*])\*/ \*/g;

        return $type;
    };

    # The decision of what is an acceptable declaration was originally
    # derived from Inline::C::grammar.pm version 0.30 (Inline 0.43).

    my $re_plausible_place_to_begin_a_declaration = qr {
	# The beginning of a line, possibly indented.
	# (Accepting indentation allows for C code to be aligned with
	#  its surrounding perl, and for backwards compatibility with
	#  Inline 0.43).
	(?m: ^ ) \s*
    }xo;

    # Instead of using \s , we dont tolerate blank lines.
    # This matches user expectation better than allowing arbitrary
    # vertical whitespace.
    my $sp = qr{[ \t]|\n(?![ \t]*\n)};

    my $re_type = qr {( 
			(?: \w+ $sp* )+? # words
			(?: \*  $sp* )*  # stars
			)}xo;

    my $re_identifier = qr{ (\w+) $sp* }xo;

    while($code =~ m{
	$re_plausible_place_to_begin_a_declaration
        ( $re_type $re_identifier $RE_balanced_parens $sp* (\;|\{) )
       }xgo)
    {
        my($type, $identifier, $args, $what) = ($2,$3,$4,$5);
        $args = "" if $args =~ /^\s+$/;

        my $is_decl     = $what eq ';';
        my $function    = $identifier;
        my $return_type = &$normalize_type($type);
	my @arguments   = split ',', $args;

        goto RESYNC if $is_decl && !$self->{data}{AUTOWRAP};
        goto RESYNC if $self->{data}{done}{$function};
        goto RESYNC if !defined
            $self->{data}{typeconv}{valid_rtypes}{$return_type};
        
        my(@arg_names,@arg_types);
	my $dummy_name = 'arg1';

	foreach my $arg (@arguments) {

	    if(my($type, $identifier) =
	       $arg =~ /^\s*$re_type(?:$re_identifier)?\s*$/o)
	    {
		my $arg_name = $identifier;
		my $arg_type = &$normalize_type($type);

		if(!defined $arg_name) {
		    goto RESYNC if !$is_decl;
		    $arg_name = $dummy_name++;
		}
		goto RESYNC if !defined
		    $self->{data}{typeconv}{valid_types}{$arg_type};
		
		push(@arg_names,$arg_name);
		push(@arg_types,$arg_type);
	    }
	    elsif($arg =~ /^\s*\.\.\.\s*$/) {
		push(@arg_names,'...');
		push(@arg_types,'...');
	    }
	    else {
		goto RESYNC;
	    }
	}

        # Commit.
        push @{$self->{data}{functions}}, $function;
        $self->{data}{function}{$function}{return_type}= $return_type; 
        $self->{data}{function}{$function}{arg_names} = [@arg_names];
        $self->{data}{function}{$function}{arg_types} = [@arg_types];
        $self->{data}{done}{$function} = 1;

        next;

      RESYNC:  # Skip the rest of the current line, and continue.
        $code =~ /\G[^\n]*\n/gc;
    }
 
   return 1;  # We never fail.
}

1;

__DATA__

=head1 NAME

Inline::C::ParseRegExp - The New and Improved Inline::C Parser

=head1 SYNOPSIS

    use Inline C => DATA =>
               USING => ParseRegExp;

=head1 DESCRIPTION

This module is a much faster version of Inline::C's Parse::RecDescent
parser. It is based on regular expressions instead.

=head2 AUTHOR

Mitchell N Charity <mcharity@vendian.org>

=head1 COPYRIGHT

Copyright (c) 2002. Brian Ingerson. All rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

See http://www.perl.com/perl/misc/Artistic.html

=cut
Commit	Line	Data
86530b38 AT	1	package Inline::C::ParseRegExp;
	2	use strict;
	3	use Carp;
	4
	5	sub register {
	6	{
	7	extends => [qw(C)],
	8	overrides => [qw(get_parser)],
	9	}
	10	}
	11
	12	sub get_parser {
	13	bless {}, 'Inline::C::ParseRegExp'
	14	}
	15
	16	sub code {
	17	my($self,$code) = @_;
	18
	19	# These regular expressions were derived from Regexp::Common v0.01.
	20	my $RE_comment_C = q{(?:(?:\/\)(?:(?:(?!\\/)[\s\S]))(?:\\/))};
	21	my $RE_comment_Cpp = q{(?:\/\(?:(?!\\/)[\s\S])\\/\|\/\/[^\n]*\n)};
	22	my $RE_quoted = (q{(?:(?:\")(?:[^\\\"](?:\\.[^\\\"])*)(?:\")}
	23	.q{\|(?:\')(?:[^\\\'](?:\\.[^\\\'])*)(?:\'))});
	24	our $RE_balanced_brackets =
	25	qr'(?:[{]((?:(?>[^{}]+)\|(??{$RE_balanced_brackets}))*)[}])';
	26	our $RE_balanced_parens =
	27	qr'(?:[(]((?:(?>[^()]+)\|(??{$RE_balanced_parens}))*)[)])';
	28
	29	# First, we crush out anything potentially confusing.
	30	# The order of these _does_ matter.
	31	$code =~ s/$RE_comment_C/ /go;
	32	$code =~ s/$RE_comment_Cpp/ /go;
	33	$code =~ s/^\#.(\\\n.)*//mgo;
	34	$code =~ s/$RE_quoted/\"\"/go;
	35	$code =~ s/$RE_balanced_brackets/{ }/go;
	36
	37	$self->{_the_code_most_recently_parsed} = $code; # Simplifies debugging.
	38
	39	my $normalize_type = sub {
	40	# Normalize a type for lookup in a typemap.
	41	my($type) = @_;
	42
	43	# Remove "extern".
	44	# But keep "static", "inline", "typedef", etc,
	45	# to cause desirable typemap misses.
	46	$type =~ s/\bextern\b//g;
	47
	48	# Whitespace: only single spaces, none leading or trailing.
	49	$type =~ s/\s+/ /g;
	50	$type =~ s/^\s//; $type =~ s/\s$//;
	51
	52	# Adjacent "derivative characters" are not separated by whitespace,
	53	# but _are_ separated from the adjoining text.
	54	# [ Is really only * (and not ()[]) needed??? ]
	55	$type =~ s/\\s\/\\/g;
	56	$type =~ s/(?<=[^ \])\/ \*/g;
	57
	58	return $type;
	59	};
	60
	61	# The decision of what is an acceptable declaration was originally
	62	# derived from Inline::C::grammar.pm version 0.30 (Inline 0.43).
	63
	64	my $re_plausible_place_to_begin_a_declaration = qr {
65	# The beginning of a line, possibly indented.
66	# (Accepting indentation allows for C code to be aligned with
67	# its surrounding perl, and for backwards compatibility with
68	# Inline 0.43).
69	(?m: ^ ) \s*
70	}xo;
71
72	# Instead of using \s , we dont tolerate blank lines.
73	# This matches user expectation better than allowing arbitrary
74	# vertical whitespace.
75	my $sp = qr{[ \t]\|\n(?![ \t]*\n)};
76
77	my $re_type = qr {(
78	(?: \w+ $sp* )+? # words
79	(?: \* $sp* )* # stars
80	)}xo;
81
82	my $re_identifier = qr{ (\w+) $sp* }xo;
83
84	while($code =~ m{
85	$re_plausible_place_to_begin_a_declaration
86	( $re_type $re_identifier $RE_balanced_parens $sp* (\;\|\{) )
87	}xgo)
88	{
89	my($type, $identifier, $args, $what) = ($2,$3,$4,$5);
90	$args = "" if $args =~ /^\s+$/;
91
92	my $is_decl = $what eq ';';
93	my $function = $identifier;
94	my $return_type = &$normalize_type($type);
95	my @arguments = split ',', $args;
96
97	goto RESYNC if $is_decl && !$self->{data}{AUTOWRAP};
98	goto RESYNC if $self->{data}{done}{$function};
99	goto RESYNC if !defined
100	$self->{data}{typeconv}{valid_rtypes}{$return_type};
101
102	my(@arg_names,@arg_types);
103	my $dummy_name = 'arg1';
104
105	foreach my $arg (@arguments) {
106
107	if(my($type, $identifier) =
108	$arg =~ /^\s$re_type(?:$re_identifier)?\s$/o)
109	{
110	my $arg_name = $identifier;
111	my $arg_type = &$normalize_type($type);
112
113	if(!defined $arg_name) {
114	goto RESYNC if !$is_decl;
115	$arg_name = $dummy_name++;
116	}
117	goto RESYNC if !defined
118	$self->{data}{typeconv}{valid_types}{$arg_type};
119
120	push(@arg_names,$arg_name);
121	push(@arg_types,$arg_type);
122	}
123	elsif($arg =~ /^\s\.\.\.\s$/) {
124	push(@arg_names,'...');
125	push(@arg_types,'...');
126	}
127	else {
128	goto RESYNC;
129	}
130	}
131
132	# Commit.
133	push @{$self->{data}{functions}}, $function;
134	$self->{data}{function}{$function}{return_type}= $return_type;
135	$self->{data}{function}{$function}{arg_names} = [@arg_names];
136	$self->{data}{function}{$function}{arg_types} = [@arg_types];
137	$self->{data}{done}{$function} = 1;
138
139	next;
140
141	RESYNC: # Skip the rest of the current line, and continue.
142	$code =~ /\G[^\n]*\n/gc;
143	}
144
145	return 1; # We never fail.
146	}
147
148	1;
149
150	__DATA__
151
152	=head1 NAME
153
154	Inline::C::ParseRegExp - The New and Improved Inline::C Parser
155
156	=head1 SYNOPSIS
157
158	use Inline C => DATA =>
159	USING => ParseRegExp;
160
161	=head1 DESCRIPTION
162
163	This module is a much faster version of Inline::C's Parse::RecDescent
164	parser. It is based on regular expressions instead.
165
166	=head2 AUTHOR
167
168	Mitchell N Charity <mcharity@vendian.org>
169
170	=head1 COPYRIGHT
171
172	Copyright (c) 2002. Brian Ingerson. All rights reserved.
173
174	This program is free software; you can redistribute it and/or modify it
175	under the same terms as Perl itself.
176
177	See http://www.perl.com/perl/misc/Artistic.html
178
179	=cut