Commit | Line | Data |
---|---|---|
3ff38436 KB |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Michael Rendell of Memorial University of Newfoundland. | |
7 | * | |
8 | * Redistribution and use in source and binary forms are permitted | |
9 | * provided that the above copyright notice and this paragraph are | |
10 | * duplicated in all such forms and that any documentation, | |
11 | * advertising materials, and other materials related to such | |
12 | * distribution and use acknowledge that the software was developed | |
13 | * by the University of California, Berkeley. The name of the | |
14 | * University may not be used to endorse or promote products derived | |
15 | * from this software without specific prior written permission. | |
16 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |
17 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | |
18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
19 | */ | |
20 | ||
21 | #ifndef lint | |
22 | char copyright[] = | |
23 | "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ | |
24 | All rights reserved.\n"; | |
25 | #endif /* not lint */ | |
26 | ||
27 | #ifndef lint | |
28 | static char sccsid[] = "@(#)tsort.c 5.1 (Berkeley) %G%"; | |
29 | #endif /* not lint */ | |
30 | ||
31 | #include <sys/types.h> | |
32 | #include <errno.h> | |
4a5cb176 | 33 | #include <stdio.h> |
3ff38436 KB |
34 | #include <ctype.h> |
35 | #include <strings.h> | |
dc5d5eb8 | 36 | |
3ff38436 KB |
37 | /* |
38 | * Topological sort. Input is a list of pairs of strings seperated by | |
39 | * white space (spaces, tabs, and/or newlines); strings are written to | |
40 | * standard output in sorted order, one per line. | |
41 | * | |
42 | * usage: | |
43 | * tsort [inputfile] | |
44 | * If no input file is specified, standard input is read. | |
45 | * | |
46 | * Should be compatable with AT&T tsort HOWEVER the output is not identical | |
47 | * (i.e. for most graphs there is more than one sorted order, and this tsort | |
48 | * usually generates a different one then the AT&T tsort). Also, cycle | |
49 | * reporting seems to be more accurate in this version (the AT&T tsort | |
50 | * sometimes says a node is in a cycle when it isn't). | |
51 | * | |
52 | * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 | |
53 | */ | |
54 | #define HASHSIZE 53 /* doesn't need to be big */ | |
55 | #define NF_MARK 0x1 /* marker for cycle detection */ | |
56 | #define NF_ACYCLIC 0x2 /* this node is cycle free */ | |
dc5d5eb8 | 57 | |
3ff38436 KB |
58 | typedef struct node_str NODE; |
59 | ||
60 | struct node_str { | |
61 | char *n_name; /* name of this node */ | |
62 | NODE **n_prevp; /* pointer to previous node's n_next */ | |
63 | NODE *n_next; /* next node in graph */ | |
64 | NODE *n_hash; /* next node in hash table */ | |
65 | int n_narcs; /* number of arcs in n_arcs[] */ | |
66 | int n_arcsize; /* size of n_arcs[] array */ | |
67 | NODE **n_arcs; /* array of arcs to other nodes */ | |
68 | int n_refcnt; /* # of arcs pointing to this node */ | |
69 | int n_flags; /* NF_* */ | |
dc5d5eb8 BJ |
70 | }; |
71 | ||
3ff38436 KB |
72 | typedef struct _buf { |
73 | char *b_buf; | |
74 | int b_bsize; | |
75 | } BUF; | |
76 | ||
77 | NODE *add_node(), *find_node(); | |
78 | void add_arc(), no_memory(), remove_node(), tsort(); | |
79 | char *grow_buf(), *malloc(); | |
80 | ||
81 | extern int errno; | |
82 | NODE *graph; | |
83 | NODE *hashtable[HASHSIZE]; | |
84 | NODE **cycle_buf; | |
85 | NODE **longest_cycle; | |
86 | ||
87 | main(argc, argv) | |
88 | int argc; | |
89 | char **argv; | |
dc5d5eb8 | 90 | { |
3ff38436 KB |
91 | register BUF *b; |
92 | register int c, n; | |
93 | FILE *fp; | |
94 | int bsize, nused; | |
95 | BUF bufs[2]; | |
96 | ||
97 | if (argc < 2) | |
98 | fp = stdin; | |
99 | else if (argc == 2) { | |
100 | (void)fprintf(stderr, "usage: tsort [ inputfile ]\n"); | |
101 | exit(1); | |
102 | } else if (!(fp = fopen(argv[1], "r"))) { | |
103 | (void)fprintf(stderr, "tsort: %s.\n", strerror(errno)); | |
104 | exit(1); | |
dc5d5eb8 | 105 | } |
3ff38436 KB |
106 | |
107 | for (b = bufs, n = 2; --n >= 0; b++) | |
108 | b->b_buf = grow_buf((char *)NULL, b->b_bsize = 1024); | |
109 | ||
110 | /* parse input and build the graph */ | |
111 | for (n = 0, c = getc(fp);;) { | |
112 | while (c != EOF && isspace(c)) | |
113 | c = getc(fp); | |
114 | if (c == EOF) | |
dc5d5eb8 | 115 | break; |
3ff38436 KB |
116 | |
117 | nused = 0; | |
118 | b = &bufs[n]; | |
119 | bsize = b->b_bsize; | |
120 | do { | |
121 | b->b_buf[nused++] = c; | |
122 | if (nused == bsize) { | |
123 | bsize *= 2; | |
124 | b->b_buf = grow_buf(b->b_buf, bsize); | |
dc5d5eb8 | 125 | } |
3ff38436 KB |
126 | c = getc(fp); |
127 | } while (c != EOF && !isspace(c)); | |
128 | ||
129 | b->b_buf[nused] = '\0'; | |
130 | b->b_bsize = bsize; | |
131 | if (n) | |
132 | add_arc(bufs[0].b_buf, bufs[1].b_buf); | |
133 | n = !n; | |
134 | } | |
135 | (void)fclose(fp); | |
136 | if (n) { | |
137 | (void)fprintf(stderr, "tsort: odd data count.\n"); | |
138 | exit(1); | |
dc5d5eb8 | 139 | } |
3ff38436 KB |
140 | |
141 | /* do the sort */ | |
142 | tsort(); | |
2afd1306 | 143 | exit(0); |
dc5d5eb8 BJ |
144 | } |
145 | ||
3ff38436 KB |
146 | /* double the size of oldbuf and return a pointer to the new buffer. */ |
147 | char * | |
148 | grow_buf(bp, size) | |
149 | char *bp; | |
150 | int size; | |
dc5d5eb8 | 151 | { |
3ff38436 KB |
152 | char *realloc(); |
153 | ||
154 | if (!(bp = realloc(bp, (u_int)size))) | |
155 | no_memory(); | |
156 | return(bp); | |
dc5d5eb8 BJ |
157 | } |
158 | ||
3ff38436 KB |
159 | /* |
160 | * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in | |
161 | * the graph, then add them. | |
162 | */ | |
163 | void | |
164 | add_arc(s1, s2) | |
165 | char *s1, *s2; | |
dc5d5eb8 | 166 | { |
3ff38436 KB |
167 | register NODE *n1; |
168 | NODE *n2; | |
169 | int bsize; | |
170 | ||
171 | n1 = find_node(s1); | |
172 | if (!n1) | |
173 | n1 = add_node(s1); | |
174 | ||
175 | if (!strcmp(s1, s2)) | |
176 | return; | |
177 | ||
178 | n2 = find_node(s2); | |
179 | if (!n2) | |
180 | n2 = add_node(s2); | |
181 | ||
182 | /* | |
183 | * could check to see if this arc is here already, but it isn't | |
184 | * worth the bother -- there usually isn't and it doesn't hurt if | |
185 | * there is (I think :-). | |
186 | */ | |
187 | if (n1->n_narcs == n1->n_arcsize) { | |
188 | if (!n1->n_arcsize) | |
189 | n1->n_arcsize = 10; | |
190 | bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; | |
191 | n1->n_arcs = (NODE **)grow_buf((char *)n1->n_arcs, bsize); | |
192 | n1->n_arcsize = bsize / sizeof(*n1->n_arcs); | |
193 | } | |
194 | n1->n_arcs[n1->n_narcs++] = n2; | |
195 | ++n2->n_refcnt; | |
dc5d5eb8 BJ |
196 | } |
197 | ||
3ff38436 KB |
198 | hash_string(s) |
199 | char *s; | |
dc5d5eb8 | 200 | { |
3ff38436 KB |
201 | register int hash, i; |
202 | ||
203 | for (hash = 0, i = 1; *s; s++, i++) | |
204 | hash += *s * i; | |
205 | return(hash % HASHSIZE); | |
dc5d5eb8 BJ |
206 | } |
207 | ||
3ff38436 KB |
208 | /* |
209 | * find a node in the graph and return a pointer to it - returns null if not | |
210 | * found. | |
211 | */ | |
212 | NODE * | |
213 | find_node(name) | |
214 | char *name; | |
dc5d5eb8 | 215 | { |
3ff38436 KB |
216 | register NODE *n; |
217 | ||
218 | for (n = hashtable[hash_string(name)]; n; n = n->n_hash) | |
219 | if (!strcmp(n->n_name, name)) | |
220 | return(n); | |
221 | return((NODE *)NULL); | |
dc5d5eb8 BJ |
222 | } |
223 | ||
3ff38436 KB |
224 | /* Add a node to the graph and return a pointer to it. */ |
225 | NODE * | |
226 | add_node(name) | |
227 | char *name; | |
dc5d5eb8 | 228 | { |
3ff38436 KB |
229 | register NODE *n; |
230 | int hash; | |
231 | ||
232 | if (!(n = (NODE *)malloc(sizeof(NODE))) || !(n->n_name = strdup(name))) | |
233 | no_memory(); | |
234 | ||
235 | n->n_narcs = 0; | |
236 | n->n_arcsize = 0; | |
237 | n->n_arcs = (NODE **)NULL; | |
238 | n->n_refcnt = 0; | |
239 | n->n_flags = 0; | |
240 | ||
241 | /* add to linked list */ | |
242 | if (n->n_next = graph) | |
243 | graph->n_prevp = &n->n_next; | |
244 | n->n_prevp = &graph; | |
245 | graph = n; | |
246 | ||
247 | /* add to hash table */ | |
248 | hash = hash_string(name); | |
249 | n->n_hash = hashtable[hash]; | |
250 | hashtable[hash] = n; | |
251 | return(n); | |
dc5d5eb8 BJ |
252 | } |
253 | ||
3ff38436 KB |
254 | /* do topological sort on graph */ |
255 | void | |
256 | tsort() | |
dc5d5eb8 | 257 | { |
3ff38436 KB |
258 | register NODE *n, *next; |
259 | register int cnt; | |
260 | ||
261 | while (graph) { | |
262 | /* | |
263 | * keep getting rid of simple cases until there are none left, | |
264 | * if there are any nodes still in the graph, then there is | |
265 | * a cycle in it. | |
266 | */ | |
267 | do { | |
268 | for (cnt = 0, n = graph; n; n = next) { | |
269 | next = n->n_next; | |
270 | if (n->n_refcnt == 0) { | |
271 | remove_node(n); | |
272 | ++cnt; | |
273 | } | |
274 | } | |
275 | } while (graph && cnt); | |
276 | ||
277 | if (!graph) | |
278 | break; | |
279 | ||
280 | if (!cycle_buf) { | |
281 | /* | |
282 | * allocate space for two cycle logs - one to be used | |
283 | * as scratch space, the other to save the longest | |
284 | * cycle. | |
285 | */ | |
286 | for (cnt = 0, n = graph; n; n = n->n_next) | |
287 | ++cnt; | |
288 | cycle_buf = | |
289 | (NODE **)malloc((u_int)sizeof(NODE *) * cnt); | |
290 | longest_cycle = | |
291 | (NODE **)malloc((u_int)sizeof(NODE *) * cnt); | |
292 | if (!cycle_buf || !longest_cycle) | |
293 | no_memory(); | |
294 | } | |
295 | for (n = graph; n; n = n->n_next) | |
296 | if (!(n->n_flags & NF_ACYCLIC)) { | |
297 | if (cnt = find_cycle(n, n, 0, 0)) { | |
298 | register int i; | |
299 | ||
300 | (void)fprintf(stderr, | |
301 | "tsort: cycle in data.\n"); | |
302 | for (i = 0; i < cnt; i++) | |
303 | (void)fprintf(stderr, | |
304 | "tsort: %s.\n", longest_cycle[i]->n_name); | |
305 | remove_node(n); | |
306 | break; | |
307 | } else | |
308 | /* to avoid further checks */ | |
309 | n->n_flags = NF_ACYCLIC; | |
310 | } | |
311 | ||
312 | if (!n) { | |
313 | (void)fprintf(stderr, | |
314 | "tsort: internal error -- could not find cycle.\n"); | |
315 | exit(1); | |
316 | } | |
317 | } | |
dc5d5eb8 BJ |
318 | } |
319 | ||
3ff38436 KB |
320 | /* print node and remove from graph (does not actually free node) */ |
321 | void | |
322 | remove_node(n) | |
323 | register NODE *n; | |
dc5d5eb8 | 324 | { |
3ff38436 KB |
325 | register NODE **np; |
326 | register int i; | |
327 | ||
328 | (void)printf("%s\n", n->n_name); | |
329 | for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) | |
330 | --(*np)->n_refcnt; | |
331 | n->n_narcs = 0; | |
332 | *n->n_prevp = n->n_next; | |
333 | if (n->n_next) | |
334 | n->n_next->n_prevp = n->n_prevp; | |
dc5d5eb8 BJ |
335 | } |
336 | ||
3ff38436 KB |
337 | /* look for the longest cycle from node from to node to. */ |
338 | find_cycle(from, to, longest_len, depth) | |
339 | NODE *from, *to; | |
340 | int depth, longest_len; | |
dc5d5eb8 | 341 | { |
3ff38436 KB |
342 | register NODE **np; |
343 | register int i, len; | |
344 | ||
345 | /* | |
346 | * avoid infinite loops and ignore portions of the graph known | |
347 | * to be acyclic | |
348 | */ | |
349 | if (from->n_flags & (NF_MARK|NF_ACYCLIC)) | |
350 | return(0); | |
351 | from->n_flags = NF_MARK; | |
352 | ||
353 | for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { | |
354 | cycle_buf[depth] = *np; | |
355 | if (*np == to) { | |
356 | if (depth + 1 > longest_len) { | |
357 | longest_len = depth + 1; | |
358 | (void)memcpy((char *)longest_cycle, | |
359 | (char *)cycle_buf, | |
360 | longest_len * sizeof(NODE *)); | |
361 | } | |
362 | } else { | |
363 | len = find_cycle(*np, to, longest_len, depth + 1); | |
364 | if (len > longest_len) | |
365 | longest_len = len; | |
dc5d5eb8 BJ |
366 | } |
367 | } | |
3ff38436 KB |
368 | from->n_flags &= ~NF_MARK; |
369 | return(longest_len); | |
370 | } | |
371 | ||
372 | void | |
373 | no_memory() | |
374 | { | |
375 | (void)fprintf(stderr, "tsort: %s.\n", strerror(ENOMEM)); | |
376 | exit(1); | |
dc5d5eb8 | 377 | } |