Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* Definitions for data structures callers pass the regex library. |
2 | ||
3 | Copyright (C) 1985, 1989-90 Free Software Foundation, Inc. | |
4 | ||
5 | This program is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 2, or (at your option) | |
8 | any later version. | |
9 | ||
10 | This program is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
16 | along with this program; if not, write to the Free Software | |
17 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
18 | ||
19 | ||
20 | #ifndef __REGEXP_LIBRARY | |
21 | #define __REGEXP_LIBRARY | |
22 | ||
23 | /* Define number of parens for which we record the beginnings and ends. | |
24 | This affects how much space the `struct re_registers' type takes up. */ | |
25 | #ifndef RE_NREGS | |
26 | #define RE_NREGS 10 | |
27 | #endif | |
28 | ||
29 | #define BYTEWIDTH 8 | |
30 | ||
31 | ||
32 | /* Maximum number of duplicates an interval can allow. */ | |
33 | #define RE_DUP_MAX ((1 << 15) - 1) | |
34 | ||
35 | ||
36 | /* This defines the various regexp syntaxes. */ | |
37 | extern int obscure_syntax; | |
38 | ||
39 | ||
40 | /* The following bits are used in the obscure_syntax variable to choose among | |
41 | alternative regexp syntaxes. */ | |
42 | ||
43 | /* If this bit is set, plain parentheses serve as grouping, and backslash | |
44 | parentheses are needed for literal searching. | |
45 | If not set, backslash-parentheses are grouping, and plain parentheses | |
46 | are for literal searching. */ | |
47 | #define RE_NO_BK_PARENS 1 | |
48 | ||
49 | /* If this bit is set, plain | serves as the `or'-operator, and \| is a | |
50 | literal. | |
51 | If not set, \| serves as the `or'-operator, and | is a literal. */ | |
52 | #define RE_NO_BK_VBAR (1 << 1) | |
53 | ||
54 | /* If this bit is not set, plain + or ? serves as an operator, and \+, \? are | |
55 | literals. | |
56 | If set, \+, \? are operators and plain +, ? are literals. */ | |
57 | #define RE_BK_PLUS_QM (1 << 2) | |
58 | ||
59 | /* If this bit is set, | binds tighter than ^ or $. | |
60 | If not set, the contrary. */ | |
61 | #define RE_TIGHT_VBAR (1 << 3) | |
62 | ||
63 | /* If this bit is set, then treat newline as an OR operator. | |
64 | If not set, treat it as a normal character. */ | |
65 | #define RE_NEWLINE_OR (1 << 4) | |
66 | ||
67 | /* If this bit is set, then special characters may act as normal | |
68 | characters in some contexts. Specifically, this applies to: | |
69 | ^ -- only special at the beginning, or after ( or |; | |
70 | $ -- only special at the end, or before ) or |; | |
71 | *, +, ? -- only special when not after the beginning, (, or |. | |
72 | If this bit is not set, special characters (such as *, ^, and $) | |
73 | always have their special meaning regardless of the surrounding | |
74 | context. */ | |
75 | #define RE_CONTEXT_INDEP_OPS (1 << 5) | |
76 | ||
77 | /* If this bit is not set, then \ before anything inside [ and ] is taken as | |
78 | a real \. | |
79 | If set, then such a \ escapes the following character. This is a | |
80 | special case for awk. */ | |
81 | #define RE_AWK_CLASS_HACK (1 << 6) | |
82 | ||
83 | /* If this bit is set, then \{ and \} or { and } serve as interval operators. | |
84 | If not set, then \{ and \} and { and } are treated as literals. */ | |
85 | #define RE_INTERVALS (1 << 7) | |
86 | ||
87 | /* If this bit is not set, then \{ and \} serve as interval operators and | |
88 | { and } are literals. | |
89 | If set, then { and } serve as interval operators and \{ and \} are | |
90 | literals. */ | |
91 | #define RE_NO_BK_CURLY_BRACES (1 << 8) | |
92 | ||
93 | /* If this bit is set, then character classes are supported; they are: | |
94 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | |
95 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | |
96 | If not set, then character classes are not supported. */ | |
97 | #define RE_CHAR_CLASSES (1 << 9) | |
98 | ||
99 | /* If this bit is set, then the dot re doesn't match a null byte. | |
100 | If not set, it does. */ | |
101 | #define RE_DOT_NOT_NULL (1 << 10) | |
102 | ||
103 | /* If this bit is set, then [^...] doesn't match a newline. | |
104 | If not set, it does. */ | |
105 | #define RE_HAT_NOT_NEWLINE (1 << 11) | |
106 | ||
107 | /* If this bit is set, back references are recognized. | |
108 | If not set, they aren't. */ | |
109 | #define RE_NO_BK_REFS (1 << 12) | |
110 | ||
111 | /* If this bit is set, back references must refer to a preceding | |
112 | subexpression. If not set, a back reference to a nonexistent | |
113 | subexpression is treated as literal characters. */ | |
114 | #define RE_NO_EMPTY_BK_REF (1 << 13) | |
115 | ||
116 | /* If this bit is set, bracket expressions can't be empty. | |
117 | If it is set, they can be empty. */ | |
118 | #define RE_NO_EMPTY_BRACKETS (1 << 14) | |
119 | ||
120 | /* If this bit is set, then *, +, ? and { cannot be first in an re or | |
121 | immediately after a |, or a (. Furthermore, a | cannot be first or | |
122 | last in an re, or immediately follow another | or a (. Also, a ^ | |
123 | cannot appear in a nonleading position and a $ cannot appear in a | |
124 | nontrailing position (outside of bracket expressions, that is). */ | |
125 | #define RE_CONTEXTUAL_INVALID_OPS (1 << 15) | |
126 | ||
127 | /* If this bit is set, then +, ? and | aren't recognized as operators. | |
128 | If it's not, they are. */ | |
129 | #define RE_LIMITED_OPS (1 << 16) | |
130 | ||
131 | /* If this bit is set, then an ending range point has to collate higher | |
132 | or equal to the starting range point. | |
133 | If it's not set, then when the ending range point collates higher | |
134 | than the starting range point, the range is just considered empty. */ | |
135 | #define RE_NO_EMPTY_RANGES (1 << 17) | |
136 | ||
137 | /* If this bit is set, then a hyphen (-) can't be an ending range point. | |
138 | If it isn't, then it can. */ | |
139 | #define RE_NO_HYPHEN_RANGE_END (1 << 18) | |
140 | ||
141 | ||
142 | /* Define combinations of bits for the standard possibilities. */ | |
143 | #define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | |
144 | | RE_CONTEXT_INDEP_OPS) | |
145 | #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | |
146 | | RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK) | |
147 | #define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | |
148 | | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR) | |
149 | #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) | |
150 | #define RE_SYNTAX_EMACS 0 | |
151 | #define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \ | |
152 | | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ | |
153 | | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \ | |
154 | | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \ | |
155 | | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END) | |
156 | ||
157 | #define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \ | |
158 | | RE_NO_BK_VBAR | RE_NO_BK_PARENS \ | |
159 | | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \ | |
160 | | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \ | |
161 | | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \ | |
162 | | RE_NO_HYPHEN_RANGE_END) | |
163 | ||
164 | ||
165 | /* This data structure is used to represent a compiled pattern. */ | |
166 | ||
167 | struct re_pattern_buffer | |
168 | { | |
169 | char *buffer; /* Space holding the compiled pattern commands. */ | |
170 | long allocated; /* Size of space that `buffer' points to. */ | |
171 | long used; /* Length of portion of buffer actually occupied */ | |
172 | char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ | |
173 | /* re_search uses the fastmap, if there is one, | |
174 | to skip over totally implausible characters. */ | |
175 | char *translate; /* Translate table to apply to all characters before | |
176 | comparing, or zero for no translation. | |
177 | The translation is applied to a pattern when it is | |
178 | compiled and to data when it is matched. */ | |
179 | char fastmap_accurate; | |
180 | /* Set to zero when a new pattern is stored, | |
181 | set to one when the fastmap is updated from it. */ | |
182 | char can_be_null; /* Set to one by compiling fastmap | |
183 | if this pattern might match the null string. | |
184 | It does not necessarily match the null string | |
185 | in that case, but if this is zero, it cannot. | |
186 | 2 as value means can match null string | |
187 | but at end of range or before a character | |
188 | listed in the fastmap. */ | |
189 | }; | |
190 | ||
191 | ||
192 | /* search.c (search_buffer) needs this one value. It is defined both in | |
193 | regex.c and here. */ | |
194 | #define RE_EXACTN_VALUE 1 | |
195 | ||
196 | ||
197 | /* Structure to store register contents data in. | |
198 | ||
199 | Pass the address of such a structure as an argument to re_match, etc., | |
200 | if you want this information back. | |
201 | ||
202 | For i from 1 to RE_NREGS - 1, start[i] records the starting index in | |
203 | the string of where the ith subexpression matched, and end[i] records | |
204 | one after the ending index. start[0] and end[0] are analogous, for | |
205 | the entire pattern. */ | |
206 | ||
207 | struct re_registers | |
208 | { | |
209 | int start[RE_NREGS]; | |
210 | int end[RE_NREGS]; | |
211 | }; | |
212 | ||
213 | ||
214 | \f | |
215 | #ifdef __STDC__ | |
216 | ||
217 | extern char *re_compile_pattern (char *, int, struct re_pattern_buffer *); | |
218 | /* Is this really advertised? */ | |
219 | extern void re_compile_fastmap (struct re_pattern_buffer *); | |
220 | extern int re_search (struct re_pattern_buffer *, char*, int, int, int, | |
221 | struct re_registers *); | |
222 | extern int re_search_2 (struct re_pattern_buffer *, char *, int, | |
223 | char *, int, int, int, | |
224 | struct re_registers *, int); | |
225 | extern int re_match (struct re_pattern_buffer *, char *, int, int, | |
226 | struct re_registers *); | |
227 | extern int re_match_2 (struct re_pattern_buffer *, char *, int, | |
228 | char *, int, int, struct re_registers *, int); | |
229 | ||
230 | /* 4.2 bsd compatibility. */ | |
231 | extern char *re_comp (char *); | |
232 | extern int re_exec (char *); | |
233 | ||
234 | #else /* !__STDC__ */ | |
235 | ||
236 | extern char *re_compile_pattern (); | |
237 | /* Is this really advertised? */ | |
238 | extern void re_compile_fastmap (); | |
239 | extern int re_search (), re_search_2 (); | |
240 | extern int re_match (), re_match_2 (); | |
241 | ||
242 | /* 4.2 bsd compatibility. */ | |
243 | extern char *re_comp (); | |
244 | extern int re_exec (); | |
245 | ||
246 | #endif /* __STDC__ */ | |
247 | ||
248 | ||
249 | #ifdef SYNTAX_TABLE | |
250 | extern char *re_syntax_table; | |
251 | #endif | |
252 | ||
253 | #endif /* !__REGEXP_LIBRARY */ |