Commit | Line | Data |
---|---|---|
3ff38436 KB |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Michael Rendell of Memorial University of Newfoundland. | |
7 | * | |
6ecf3d85 | 8 | * %sccs.include.redist.c% |
3ff38436 KB |
9 | */ |
10 | ||
11 | #ifndef lint | |
12 | char copyright[] = | |
13 | "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ | |
14 | All rights reserved.\n"; | |
15 | #endif /* not lint */ | |
16 | ||
17 | #ifndef lint | |
a9e5048e | 18 | static char sccsid[] = "@(#)tsort.c 5.8 (Berkeley) %G%"; |
3ff38436 KB |
19 | #endif /* not lint */ |
20 | ||
21 | #include <sys/types.h> | |
22 | #include <errno.h> | |
6b044e40 KB |
23 | #include <fcntl.h> |
24 | #include <db.h> | |
4a5cb176 | 25 | #include <stdio.h> |
471f7899 | 26 | #include <stdlib.h> |
3ff38436 | 27 | #include <ctype.h> |
6ebcb998 | 28 | #include <string.h> |
dc5d5eb8 | 29 | |
3ff38436 | 30 | /* |
39b3d945 | 31 | * Topological sort. Input is a list of pairs of strings separated by |
3ff38436 KB |
32 | * white space (spaces, tabs, and/or newlines); strings are written to |
33 | * standard output in sorted order, one per line. | |
34 | * | |
35 | * usage: | |
36 | * tsort [inputfile] | |
37 | * If no input file is specified, standard input is read. | |
38 | * | |
39 | * Should be compatable with AT&T tsort HOWEVER the output is not identical | |
40 | * (i.e. for most graphs there is more than one sorted order, and this tsort | |
41 | * usually generates a different one then the AT&T tsort). Also, cycle | |
42 | * reporting seems to be more accurate in this version (the AT&T tsort | |
43 | * sometimes says a node is in a cycle when it isn't). | |
44 | * | |
45 | * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 | |
46 | */ | |
47 | #define HASHSIZE 53 /* doesn't need to be big */ | |
48 | #define NF_MARK 0x1 /* marker for cycle detection */ | |
49 | #define NF_ACYCLIC 0x2 /* this node is cycle free */ | |
dc5d5eb8 | 50 | |
3ff38436 KB |
51 | typedef struct node_str NODE; |
52 | ||
53 | struct node_str { | |
3ff38436 KB |
54 | NODE **n_prevp; /* pointer to previous node's n_next */ |
55 | NODE *n_next; /* next node in graph */ | |
6b044e40 | 56 | NODE **n_arcs; /* array of arcs to other nodes */ |
3ff38436 KB |
57 | int n_narcs; /* number of arcs in n_arcs[] */ |
58 | int n_arcsize; /* size of n_arcs[] array */ | |
3ff38436 KB |
59 | int n_refcnt; /* # of arcs pointing to this node */ |
60 | int n_flags; /* NF_* */ | |
6b044e40 | 61 | char n_name[1]; /* name of this node */ |
dc5d5eb8 BJ |
62 | }; |
63 | ||
3ff38436 KB |
64 | typedef struct _buf { |
65 | char *b_buf; | |
66 | int b_bsize; | |
67 | } BUF; | |
68 | ||
6b044e40 | 69 | DB *db; |
3ff38436 | 70 | NODE *graph; |
3ff38436 KB |
71 | NODE **cycle_buf; |
72 | NODE **longest_cycle; | |
73 | ||
471f7899 | 74 | void add_arc __P((char *, char *)); |
471f7899 KB |
75 | void err __P((const char *, ...)); |
76 | int find_cycle __P((NODE *, NODE *, int, int)); | |
6b044e40 | 77 | NODE *get_node __P((char *)); |
471f7899 | 78 | void *grow_buf __P((void *, int)); |
471f7899 KB |
79 | void remove_node __P((NODE *)); |
80 | void tsort __P((void)); | |
81 | void usage __P((void)); | |
82 | ||
83 | int | |
3ff38436 KB |
84 | main(argc, argv) |
85 | int argc; | |
471f7899 | 86 | char *argv[]; |
dc5d5eb8 | 87 | { |
3ff38436 KB |
88 | register BUF *b; |
89 | register int c, n; | |
90 | FILE *fp; | |
471f7899 | 91 | int bsize, ch, nused; |
3ff38436 KB |
92 | BUF bufs[2]; |
93 | ||
471f7899 KB |
94 | while ((ch = getopt(argc, argv, "")) != EOF) |
95 | switch(ch) { | |
96 | case '?': | |
97 | default: | |
98 | usage(); | |
99 | } | |
100 | argc -= optind; | |
101 | argv += optind; | |
102 | ||
103 | switch(argc) { | |
104 | case 0: | |
3ff38436 | 105 | fp = stdin; |
471f7899 KB |
106 | break; |
107 | case 1: | |
108 | if ((fp = fopen(*argv, "r")) == NULL) | |
109 | err("%s: %s", *argv, strerror(errno)); | |
110 | break; | |
111 | default: | |
112 | usage(); | |
dc5d5eb8 | 113 | } |
3ff38436 KB |
114 | |
115 | for (b = bufs, n = 2; --n >= 0; b++) | |
471f7899 | 116 | b->b_buf = grow_buf(NULL, b->b_bsize = 1024); |
3ff38436 KB |
117 | |
118 | /* parse input and build the graph */ | |
119 | for (n = 0, c = getc(fp);;) { | |
120 | while (c != EOF && isspace(c)) | |
121 | c = getc(fp); | |
122 | if (c == EOF) | |
dc5d5eb8 | 123 | break; |
3ff38436 KB |
124 | |
125 | nused = 0; | |
126 | b = &bufs[n]; | |
127 | bsize = b->b_bsize; | |
128 | do { | |
129 | b->b_buf[nused++] = c; | |
471f7899 KB |
130 | if (nused == bsize) |
131 | b->b_buf = grow_buf(b->b_buf, bsize *= 2); | |
3ff38436 KB |
132 | c = getc(fp); |
133 | } while (c != EOF && !isspace(c)); | |
134 | ||
135 | b->b_buf[nused] = '\0'; | |
136 | b->b_bsize = bsize; | |
137 | if (n) | |
138 | add_arc(bufs[0].b_buf, bufs[1].b_buf); | |
139 | n = !n; | |
140 | } | |
141 | (void)fclose(fp); | |
471f7899 KB |
142 | if (n) |
143 | err("odd data count"); | |
3ff38436 KB |
144 | |
145 | /* do the sort */ | |
146 | tsort(); | |
2afd1306 | 147 | exit(0); |
dc5d5eb8 BJ |
148 | } |
149 | ||
3ff38436 | 150 | /* double the size of oldbuf and return a pointer to the new buffer. */ |
471f7899 | 151 | void * |
3ff38436 | 152 | grow_buf(bp, size) |
471f7899 | 153 | void *bp; |
3ff38436 | 154 | int size; |
dc5d5eb8 | 155 | { |
471f7899 KB |
156 | if ((bp = realloc(bp, (u_int)size)) == NULL) |
157 | err("%s", strerror(errno)); | |
158 | return (bp); | |
dc5d5eb8 BJ |
159 | } |
160 | ||
3ff38436 KB |
161 | /* |
162 | * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in | |
163 | * the graph, then add them. | |
164 | */ | |
165 | void | |
166 | add_arc(s1, s2) | |
167 | char *s1, *s2; | |
dc5d5eb8 | 168 | { |
3ff38436 KB |
169 | register NODE *n1; |
170 | NODE *n2; | |
5b453b35 | 171 | int bsize, i; |
3ff38436 | 172 | |
6b044e40 | 173 | n1 = get_node(s1); |
3ff38436 KB |
174 | |
175 | if (!strcmp(s1, s2)) | |
176 | return; | |
177 | ||
6b044e40 | 178 | n2 = get_node(s2); |
3ff38436 KB |
179 | |
180 | /* | |
5b453b35 MT |
181 | * Check if this arc is already here. |
182 | */ | |
183 | for (i = 0; i < n1->n_narcs; i++) | |
184 | if (n1->n_arcs[i] == n2) | |
185 | return; | |
186 | /* | |
187 | * Add it. | |
3ff38436 KB |
188 | */ |
189 | if (n1->n_narcs == n1->n_arcsize) { | |
190 | if (!n1->n_arcsize) | |
191 | n1->n_arcsize = 10; | |
192 | bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; | |
471f7899 | 193 | n1->n_arcs = grow_buf(n1->n_arcs, bsize); |
3ff38436 KB |
194 | n1->n_arcsize = bsize / sizeof(*n1->n_arcs); |
195 | } | |
196 | n1->n_arcs[n1->n_narcs++] = n2; | |
197 | ++n2->n_refcnt; | |
dc5d5eb8 BJ |
198 | } |
199 | ||
6b044e40 | 200 | /* Find a node in the graph (insert if not found) and return a pointer to it. */ |
3ff38436 | 201 | NODE * |
6b044e40 | 202 | get_node(name) |
3ff38436 | 203 | char *name; |
dc5d5eb8 | 204 | { |
6b044e40 KB |
205 | DBT data, key; |
206 | NODE *n; | |
3ff38436 | 207 | |
6b044e40 KB |
208 | if (db == NULL && |
209 | (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) | |
210 | err("db: open: %s", name, strerror(errno)); | |
dc5d5eb8 | 211 | |
6b044e40 KB |
212 | key.data = name; |
213 | key.size = strlen(name) + 1; | |
214 | ||
215 | switch((*db->get)(db, &key, &data, 0)) { | |
216 | case 0: | |
a9e5048e KB |
217 | bcopy(data.data, &n, sizeof(n)); |
218 | return (n); | |
6b044e40 KB |
219 | case 1: |
220 | break; | |
221 | default: | |
222 | case -1: | |
223 | err("db: get %s: %s", name, strerror(errno)); | |
224 | } | |
3ff38436 | 225 | |
6b044e40 | 226 | if ((n = malloc(sizeof(NODE) + key.size)) == NULL) |
471f7899 | 227 | err("%s", strerror(errno)); |
3ff38436 KB |
228 | |
229 | n->n_narcs = 0; | |
230 | n->n_arcsize = 0; | |
471f7899 | 231 | n->n_arcs = NULL; |
3ff38436 KB |
232 | n->n_refcnt = 0; |
233 | n->n_flags = 0; | |
6b044e40 | 234 | bcopy(name, n->n_name, key.size); |
3ff38436 | 235 | |
6b044e40 | 236 | /* Add to linked list. */ |
3ff38436 KB |
237 | if (n->n_next = graph) |
238 | graph->n_prevp = &n->n_next; | |
239 | n->n_prevp = &graph; | |
240 | graph = n; | |
241 | ||
6b044e40 KB |
242 | /* Add to hash table. */ |
243 | data.data = &n; | |
244 | data.size = sizeof(n); | |
245 | if ((*db->put)(db, &key, &data, 0)) | |
246 | err("db: put %s: %s", name, strerror(errno)); | |
471f7899 | 247 | return (n); |
dc5d5eb8 BJ |
248 | } |
249 | ||
3ff38436 KB |
250 | /* do topological sort on graph */ |
251 | void | |
252 | tsort() | |
dc5d5eb8 | 253 | { |
3ff38436 KB |
254 | register NODE *n, *next; |
255 | register int cnt; | |
256 | ||
257 | while (graph) { | |
258 | /* | |
39b3d945 | 259 | * Keep getting rid of simple cases until there are none left, |
3ff38436 KB |
260 | * if there are any nodes still in the graph, then there is |
261 | * a cycle in it. | |
262 | */ | |
263 | do { | |
264 | for (cnt = 0, n = graph; n; n = next) { | |
265 | next = n->n_next; | |
266 | if (n->n_refcnt == 0) { | |
267 | remove_node(n); | |
268 | ++cnt; | |
269 | } | |
270 | } | |
271 | } while (graph && cnt); | |
272 | ||
273 | if (!graph) | |
274 | break; | |
275 | ||
276 | if (!cycle_buf) { | |
277 | /* | |
5b453b35 | 278 | * Allocate space for two cycle logs - one to be used |
3ff38436 KB |
279 | * as scratch space, the other to save the longest |
280 | * cycle. | |
281 | */ | |
282 | for (cnt = 0, n = graph; n; n = n->n_next) | |
283 | ++cnt; | |
39b3d945 KB |
284 | cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); |
285 | longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); | |
286 | if (cycle_buf == NULL || longest_cycle == NULL) | |
471f7899 | 287 | err("%s", strerror(errno)); |
3ff38436 KB |
288 | } |
289 | for (n = graph; n; n = n->n_next) | |
290 | if (!(n->n_flags & NF_ACYCLIC)) { | |
291 | if (cnt = find_cycle(n, n, 0, 0)) { | |
292 | register int i; | |
293 | ||
294 | (void)fprintf(stderr, | |
471f7899 | 295 | "tsort: cycle in data\n"); |
3ff38436 KB |
296 | for (i = 0; i < cnt; i++) |
297 | (void)fprintf(stderr, | |
471f7899 | 298 | "tsort: %s\n", longest_cycle[i]->n_name); |
3ff38436 KB |
299 | remove_node(n); |
300 | break; | |
301 | } else | |
302 | /* to avoid further checks */ | |
303 | n->n_flags = NF_ACYCLIC; | |
304 | } | |
305 | ||
471f7899 KB |
306 | if (!n) |
307 | err("internal error -- could not find cycle"); | |
3ff38436 | 308 | } |
dc5d5eb8 BJ |
309 | } |
310 | ||
3ff38436 KB |
311 | /* print node and remove from graph (does not actually free node) */ |
312 | void | |
313 | remove_node(n) | |
314 | register NODE *n; | |
dc5d5eb8 | 315 | { |
3ff38436 KB |
316 | register NODE **np; |
317 | register int i; | |
318 | ||
319 | (void)printf("%s\n", n->n_name); | |
320 | for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) | |
321 | --(*np)->n_refcnt; | |
322 | n->n_narcs = 0; | |
323 | *n->n_prevp = n->n_next; | |
324 | if (n->n_next) | |
325 | n->n_next->n_prevp = n->n_prevp; | |
dc5d5eb8 BJ |
326 | } |
327 | ||
3ff38436 | 328 | /* look for the longest cycle from node from to node to. */ |
471f7899 | 329 | int |
3ff38436 KB |
330 | find_cycle(from, to, longest_len, depth) |
331 | NODE *from, *to; | |
332 | int depth, longest_len; | |
dc5d5eb8 | 333 | { |
3ff38436 KB |
334 | register NODE **np; |
335 | register int i, len; | |
336 | ||
337 | /* | |
338 | * avoid infinite loops and ignore portions of the graph known | |
339 | * to be acyclic | |
340 | */ | |
341 | if (from->n_flags & (NF_MARK|NF_ACYCLIC)) | |
471f7899 | 342 | return (0); |
3ff38436 KB |
343 | from->n_flags = NF_MARK; |
344 | ||
345 | for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { | |
346 | cycle_buf[depth] = *np; | |
347 | if (*np == to) { | |
348 | if (depth + 1 > longest_len) { | |
349 | longest_len = depth + 1; | |
350 | (void)memcpy((char *)longest_cycle, | |
351 | (char *)cycle_buf, | |
352 | longest_len * sizeof(NODE *)); | |
353 | } | |
354 | } else { | |
355 | len = find_cycle(*np, to, longest_len, depth + 1); | |
356 | if (len > longest_len) | |
357 | longest_len = len; | |
dc5d5eb8 BJ |
358 | } |
359 | } | |
3ff38436 | 360 | from->n_flags &= ~NF_MARK; |
471f7899 | 361 | return (longest_len); |
3ff38436 KB |
362 | } |
363 | ||
364 | void | |
471f7899 KB |
365 | usage() |
366 | { | |
367 | (void)fprintf(stderr, "usage: tsort [file]\n"); | |
368 | exit(1); | |
369 | } | |
370 | ||
371 | #if __STDC__ | |
372 | #include <stdarg.h> | |
373 | #else | |
374 | #include <varargs.h> | |
375 | #endif | |
376 | ||
377 | void | |
378 | #if __STDC__ | |
379 | err(const char *fmt, ...) | |
380 | #else | |
381 | err(fmt, va_alist) | |
382 | char *fmt; | |
383 | va_dcl | |
384 | #endif | |
3ff38436 | 385 | { |
471f7899 KB |
386 | va_list ap; |
387 | #if __STDC__ | |
388 | va_start(ap, fmt); | |
389 | #else | |
390 | va_start(ap); | |
391 | #endif | |
392 | (void)fprintf(stderr, "tsort: "); | |
393 | (void)vfprintf(stderr, fmt, ap); | |
394 | va_end(ap); | |
395 | (void)fprintf(stderr, "\n"); | |
3ff38436 | 396 | exit(1); |
471f7899 | 397 | /* NOTREACHED */ |
dc5d5eb8 | 398 | } |