Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Michael Rendell of Memorial University of Newfoundland. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | */ | |
36 | ||
37 | #ifndef lint | |
38 | char copyright[] = | |
39 | "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ | |
40 | All rights reserved.\n"; | |
41 | #endif /* not lint */ | |
42 | ||
43 | #ifndef lint | |
44 | static char sccsid[] = "@(#)tsort.c 5.3 (Berkeley) 6/1/90"; | |
45 | #endif /* not lint */ | |
46 | ||
47 | #include <sys/types.h> | |
48 | #include <errno.h> | |
49 | #include <stdio.h> | |
50 | #include <ctype.h> | |
51 | #include <string.h> | |
52 | ||
53 | /* | |
54 | * Topological sort. Input is a list of pairs of strings seperated by | |
55 | * white space (spaces, tabs, and/or newlines); strings are written to | |
56 | * standard output in sorted order, one per line. | |
57 | * | |
58 | * usage: | |
59 | * tsort [inputfile] | |
60 | * If no input file is specified, standard input is read. | |
61 | * | |
62 | * Should be compatable with AT&T tsort HOWEVER the output is not identical | |
63 | * (i.e. for most graphs there is more than one sorted order, and this tsort | |
64 | * usually generates a different one then the AT&T tsort). Also, cycle | |
65 | * reporting seems to be more accurate in this version (the AT&T tsort | |
66 | * sometimes says a node is in a cycle when it isn't). | |
67 | * | |
68 | * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 | |
69 | */ | |
70 | #define HASHSIZE 53 /* doesn't need to be big */ | |
71 | #define NF_MARK 0x1 /* marker for cycle detection */ | |
72 | #define NF_ACYCLIC 0x2 /* this node is cycle free */ | |
73 | ||
74 | typedef struct node_str NODE; | |
75 | ||
76 | struct node_str { | |
77 | char *n_name; /* name of this node */ | |
78 | NODE **n_prevp; /* pointer to previous node's n_next */ | |
79 | NODE *n_next; /* next node in graph */ | |
80 | NODE *n_hash; /* next node in hash table */ | |
81 | int n_narcs; /* number of arcs in n_arcs[] */ | |
82 | int n_arcsize; /* size of n_arcs[] array */ | |
83 | NODE **n_arcs; /* array of arcs to other nodes */ | |
84 | int n_refcnt; /* # of arcs pointing to this node */ | |
85 | int n_flags; /* NF_* */ | |
86 | }; | |
87 | ||
88 | typedef struct _buf { | |
89 | char *b_buf; | |
90 | int b_bsize; | |
91 | } BUF; | |
92 | ||
93 | NODE *add_node(), *find_node(); | |
94 | void add_arc(), no_memory(), remove_node(), tsort(); | |
95 | char *grow_buf(), *malloc(); | |
96 | ||
97 | extern int errno; | |
98 | NODE *graph; | |
99 | NODE *hashtable[HASHSIZE]; | |
100 | NODE **cycle_buf; | |
101 | NODE **longest_cycle; | |
102 | ||
103 | main(argc, argv) | |
104 | int argc; | |
105 | char **argv; | |
106 | { | |
107 | register BUF *b; | |
108 | register int c, n; | |
109 | FILE *fp; | |
110 | int bsize, nused; | |
111 | BUF bufs[2]; | |
112 | ||
113 | if (argc < 2) | |
114 | fp = stdin; | |
a6fe0c40 | 115 | else if (argc > 2) { |
15637ed4 RG |
116 | (void)fprintf(stderr, "usage: tsort [ inputfile ]\n"); |
117 | exit(1); | |
118 | } else if (!(fp = fopen(argv[1], "r"))) { | |
119 | (void)fprintf(stderr, "tsort: %s.\n", strerror(errno)); | |
120 | exit(1); | |
121 | } | |
122 | ||
123 | for (b = bufs, n = 2; --n >= 0; b++) | |
124 | b->b_buf = grow_buf((char *)NULL, b->b_bsize = 1024); | |
125 | ||
126 | /* parse input and build the graph */ | |
127 | for (n = 0, c = getc(fp);;) { | |
128 | while (c != EOF && isspace(c)) | |
129 | c = getc(fp); | |
130 | if (c == EOF) | |
131 | break; | |
132 | ||
133 | nused = 0; | |
134 | b = &bufs[n]; | |
135 | bsize = b->b_bsize; | |
136 | do { | |
137 | b->b_buf[nused++] = c; | |
138 | if (nused == bsize) { | |
139 | bsize *= 2; | |
140 | b->b_buf = grow_buf(b->b_buf, bsize); | |
141 | } | |
142 | c = getc(fp); | |
143 | } while (c != EOF && !isspace(c)); | |
144 | ||
145 | b->b_buf[nused] = '\0'; | |
146 | b->b_bsize = bsize; | |
147 | if (n) | |
148 | add_arc(bufs[0].b_buf, bufs[1].b_buf); | |
149 | n = !n; | |
150 | } | |
151 | (void)fclose(fp); | |
152 | if (n) { | |
153 | (void)fprintf(stderr, "tsort: odd data count.\n"); | |
154 | exit(1); | |
155 | } | |
156 | ||
157 | /* do the sort */ | |
158 | tsort(); | |
159 | exit(0); | |
160 | } | |
161 | ||
162 | /* double the size of oldbuf and return a pointer to the new buffer. */ | |
163 | char * | |
164 | grow_buf(bp, size) | |
165 | char *bp; | |
166 | int size; | |
167 | { | |
168 | char *realloc(); | |
169 | ||
170 | if (!(bp = realloc(bp, (u_int)size))) | |
171 | no_memory(); | |
172 | return(bp); | |
173 | } | |
174 | ||
175 | /* | |
176 | * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in | |
177 | * the graph, then add them. | |
178 | */ | |
179 | void | |
180 | add_arc(s1, s2) | |
181 | char *s1, *s2; | |
182 | { | |
183 | register NODE *n1; | |
184 | NODE *n2; | |
185 | int bsize; | |
186 | ||
187 | n1 = find_node(s1); | |
188 | if (!n1) | |
189 | n1 = add_node(s1); | |
190 | ||
191 | if (!strcmp(s1, s2)) | |
192 | return; | |
193 | ||
194 | n2 = find_node(s2); | |
195 | if (!n2) | |
196 | n2 = add_node(s2); | |
197 | ||
198 | /* | |
199 | * could check to see if this arc is here already, but it isn't | |
200 | * worth the bother -- there usually isn't and it doesn't hurt if | |
201 | * there is (I think :-). | |
202 | */ | |
203 | if (n1->n_narcs == n1->n_arcsize) { | |
204 | if (!n1->n_arcsize) | |
205 | n1->n_arcsize = 10; | |
206 | bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; | |
207 | n1->n_arcs = (NODE **)grow_buf((char *)n1->n_arcs, bsize); | |
208 | n1->n_arcsize = bsize / sizeof(*n1->n_arcs); | |
209 | } | |
210 | n1->n_arcs[n1->n_narcs++] = n2; | |
211 | ++n2->n_refcnt; | |
212 | } | |
213 | ||
214 | hash_string(s) | |
215 | char *s; | |
216 | { | |
217 | register int hash, i; | |
218 | ||
219 | for (hash = 0, i = 1; *s; s++, i++) | |
220 | hash += *s * i; | |
221 | return(hash % HASHSIZE); | |
222 | } | |
223 | ||
224 | /* | |
225 | * find a node in the graph and return a pointer to it - returns null if not | |
226 | * found. | |
227 | */ | |
228 | NODE * | |
229 | find_node(name) | |
230 | char *name; | |
231 | { | |
232 | register NODE *n; | |
233 | ||
234 | for (n = hashtable[hash_string(name)]; n; n = n->n_hash) | |
235 | if (!strcmp(n->n_name, name)) | |
236 | return(n); | |
237 | return((NODE *)NULL); | |
238 | } | |
239 | ||
240 | /* Add a node to the graph and return a pointer to it. */ | |
241 | NODE * | |
242 | add_node(name) | |
243 | char *name; | |
244 | { | |
245 | register NODE *n; | |
246 | int hash; | |
247 | ||
248 | if (!(n = (NODE *)malloc(sizeof(NODE))) || !(n->n_name = strdup(name))) | |
249 | no_memory(); | |
250 | ||
251 | n->n_narcs = 0; | |
252 | n->n_arcsize = 0; | |
253 | n->n_arcs = (NODE **)NULL; | |
254 | n->n_refcnt = 0; | |
255 | n->n_flags = 0; | |
256 | ||
257 | /* add to linked list */ | |
258 | if (n->n_next = graph) | |
259 | graph->n_prevp = &n->n_next; | |
260 | n->n_prevp = &graph; | |
261 | graph = n; | |
262 | ||
263 | /* add to hash table */ | |
264 | hash = hash_string(name); | |
265 | n->n_hash = hashtable[hash]; | |
266 | hashtable[hash] = n; | |
267 | return(n); | |
268 | } | |
269 | ||
270 | /* do topological sort on graph */ | |
271 | void | |
272 | tsort() | |
273 | { | |
439c584a | 274 | register NODE *n, *m, *next; |
15637ed4 RG |
275 | register int cnt; |
276 | ||
277 | while (graph) { | |
278 | /* | |
279 | * keep getting rid of simple cases until there are none left, | |
280 | * if there are any nodes still in the graph, then there is | |
281 | * a cycle in it. | |
282 | */ | |
283 | do { | |
284 | for (cnt = 0, n = graph; n; n = next) { | |
285 | next = n->n_next; | |
286 | if (n->n_refcnt == 0) { | |
287 | remove_node(n); | |
288 | ++cnt; | |
289 | } | |
290 | } | |
291 | } while (graph && cnt); | |
292 | ||
293 | if (!graph) | |
294 | break; | |
295 | ||
296 | if (!cycle_buf) { | |
297 | /* | |
298 | * allocate space for two cycle logs - one to be used | |
299 | * as scratch space, the other to save the longest | |
300 | * cycle. | |
301 | */ | |
302 | for (cnt = 0, n = graph; n; n = n->n_next) | |
303 | ++cnt; | |
304 | cycle_buf = | |
305 | (NODE **)malloc((u_int)sizeof(NODE *) * cnt); | |
306 | longest_cycle = | |
307 | (NODE **)malloc((u_int)sizeof(NODE *) * cnt); | |
308 | if (!cycle_buf || !longest_cycle) | |
309 | no_memory(); | |
310 | } | |
311 | for (n = graph; n; n = n->n_next) | |
312 | if (!(n->n_flags & NF_ACYCLIC)) { | |
439c584a JH |
313 | for (m=graph; m; m=m->n_next) |
314 | m->n_flags &= ~NF_MARK; | |
15637ed4 RG |
315 | if (cnt = find_cycle(n, n, 0, 0)) { |
316 | register int i; | |
317 | ||
318 | (void)fprintf(stderr, | |
319 | "tsort: cycle in data.\n"); | |
320 | for (i = 0; i < cnt; i++) | |
321 | (void)fprintf(stderr, | |
322 | "tsort: %s.\n", longest_cycle[i]->n_name); | |
323 | remove_node(n); | |
324 | break; | |
325 | } else | |
326 | /* to avoid further checks */ | |
327 | n->n_flags = NF_ACYCLIC; | |
328 | } | |
329 | ||
330 | if (!n) { | |
331 | (void)fprintf(stderr, | |
332 | "tsort: internal error -- could not find cycle.\n"); | |
333 | exit(1); | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | /* print node and remove from graph (does not actually free node) */ | |
339 | void | |
340 | remove_node(n) | |
341 | register NODE *n; | |
342 | { | |
343 | register NODE **np; | |
344 | register int i; | |
345 | ||
346 | (void)printf("%s\n", n->n_name); | |
347 | for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) | |
348 | --(*np)->n_refcnt; | |
349 | n->n_narcs = 0; | |
350 | *n->n_prevp = n->n_next; | |
351 | if (n->n_next) | |
352 | n->n_next->n_prevp = n->n_prevp; | |
353 | } | |
354 | ||
439c584a | 355 | /* look for a path from node from to node to. */ |
15637ed4 RG |
356 | find_cycle(from, to, longest_len, depth) |
357 | NODE *from, *to; | |
358 | int depth, longest_len; | |
359 | { | |
360 | register NODE **np; | |
361 | register int i, len; | |
362 | ||
363 | /* | |
364 | * avoid infinite loops and ignore portions of the graph known | |
365 | * to be acyclic | |
366 | */ | |
367 | if (from->n_flags & (NF_MARK|NF_ACYCLIC)) | |
368 | return(0); | |
369 | from->n_flags = NF_MARK; | |
370 | ||
371 | for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { | |
372 | cycle_buf[depth] = *np; | |
373 | if (*np == to) { | |
374 | if (depth + 1 > longest_len) { | |
375 | longest_len = depth + 1; | |
376 | (void)memcpy((char *)longest_cycle, | |
377 | (char *)cycle_buf, | |
378 | longest_len * sizeof(NODE *)); | |
379 | } | |
380 | } else { | |
381 | len = find_cycle(*np, to, longest_len, depth + 1); | |
439c584a | 382 | if (len > longest_len) { |
15637ed4 | 383 | longest_len = len; |
439c584a JH |
384 | break; |
385 | } | |
15637ed4 RG |
386 | } |
387 | } | |
15637ed4 RG |
388 | return(longest_len); |
389 | } | |
390 | ||
391 | void | |
392 | no_memory() | |
393 | { | |
394 | (void)fprintf(stderr, "tsort: %s.\n", strerror(ENOMEM)); | |
395 | exit(1); | |
396 | } |