Commit | Line | Data |
---|---|---|
7eeb782e AT |
1 | #!/usr/bin/env pike |
2 | ||
3 | static int debug = 0; | |
4 | static int verbose = 0; | |
5 | Thread.Queue testsuite_queue; | |
6 | ||
7 | /* General class to manage a high-score list (e.g. of slow tests, tests | |
8 | * with many nodes, ..) | |
9 | */ | |
10 | class Highscorelist | |
11 | { | |
12 | array(float) scores; | |
13 | array(string) names; | |
14 | int max; | |
15 | ||
16 | void create(int m) | |
17 | { | |
18 | max = m; | |
19 | scores = ({}); | |
20 | names = ({}); | |
21 | } | |
22 | ||
23 | void add_score(float score, string name) | |
24 | { | |
25 | int num = sizeof(scores); | |
26 | if (num != sizeof(names)) { | |
27 | write("This should not happen!!"); | |
28 | return; | |
29 | } | |
30 | if (num < max) { | |
31 | scores += ({score}); | |
32 | names += ({name}); | |
33 | sort(scores, names); | |
34 | } | |
35 | else if (scores[0] < score) { | |
36 | scores[0] = score; | |
37 | names[0] = name; | |
38 | sort(scores, names); | |
39 | } | |
40 | return; | |
41 | } | |
42 | ||
43 | void report(string s) | |
44 | { | |
45 | for (int i = 0; i < sizeof(scores); i++) | |
46 | write(s, names[i], scores[i]); | |
47 | } | |
48 | } | |
49 | ||
50 | Highscorelist slow_moves; | |
51 | int report_slow = 0; | |
52 | ||
53 | class Testsuite | |
54 | { | |
55 | string name; | |
56 | mapping(int:string) correct_results = ([]); | |
57 | multiset expected_failures = (<>); | |
58 | int reading_nodes; | |
59 | int owl_nodes; | |
60 | int connection_nodes; | |
61 | float walltime; | |
62 | float cputime; | |
63 | float uncertainty; | |
64 | Process.create_process engine; | |
65 | int quit_has_been_sent; | |
66 | ||
67 | array(int) pass; | |
68 | array(int) fail; | |
69 | array(int) PASS; | |
70 | array(int) FAIL; | |
71 | ||
72 | Thread.Queue writing_finished; | |
73 | Thread.Queue reading_finished; | |
74 | Thread.Queue write_queue; | |
75 | int timebase; | |
76 | float last_time; | |
77 | ||
78 | void create(string s) | |
79 | { | |
80 | name = s; | |
81 | reading_nodes = 0; | |
82 | owl_nodes = 0; | |
83 | connection_nodes = 0; | |
84 | uncertainty = 0.0; | |
85 | walltime = 0.0; | |
86 | pass = ({}); | |
87 | fail = ({}); | |
88 | PASS = ({}); | |
89 | FAIL = ({}); | |
90 | } | |
91 | ||
92 | static void finish() | |
93 | { | |
94 | // Write nothing if no test from the file was run at all. | |
95 | if (sizeof(pass) + sizeof(fail) + sizeof(PASS) + sizeof(FAIL) > 0) | |
96 | write("%-37s %7.2f %9d %7d %8d\n", name, cputime, reading_nodes, | |
97 | owl_nodes, connection_nodes); | |
98 | } | |
99 | ||
100 | static void program_reader(object f) | |
101 | { | |
102 | int test_number; | |
103 | ||
104 | if (debug) | |
105 | werror("Waiting for writing to be finished.\n"); | |
106 | writing_finished->read(); | |
107 | if (debug) | |
108 | werror("Finished waiting for writing to be finished.\n"); | |
109 | ||
110 | while (1) { | |
111 | string s = f->gets() - "\r"; | |
112 | float current_time = time(timebase); | |
113 | if (!s) | |
114 | break; | |
115 | if (debug) | |
116 | werror("Recv: " + s + "\n"); | |
117 | ||
118 | int number; | |
119 | string answer; | |
120 | if (sscanf(s, "=%d %s", number, answer)) { | |
121 | if (number < 10000 || number > 10005) { | |
122 | test_number = (int) number; | |
123 | string correct = correct_results[test_number]; | |
124 | if (!correct) { | |
125 | correct = "correct result missing, check the test suite"; | |
126 | correct_results[test_number] = correct; | |
127 | } | |
128 | int negate = 0; | |
129 | if (correct[0] == '!') { | |
130 | correct = correct[1..]; | |
131 | negate = 1; | |
132 | } | |
133 | correct = "^" + correct + "$"; | |
134 | object re = Regexp(correct); | |
135 | string result = (negate ^ re->match(answer)) ? "pass" : "fail"; | |
136 | ||
137 | if (result == "pass" && expected_failures[test_number]) { | |
138 | result = "PASS"; | |
139 | } | |
140 | if (result == "fail" && !expected_failures[test_number]) { | |
141 | result = "FAIL"; | |
142 | } | |
143 | this_object()[result] += ({test_number}); | |
144 | walltime += (current_time - last_time); | |
145 | if (report_slow) | |
146 | slow_moves->add_score(current_time - last_time, | |
147 | name + ":" + test_number); | |
148 | ||
149 | if (result == "PASS" || result == "FAIL" || verbose) | |
150 | write("%-15s %s %s [%s]\n", name + ":" + test_number, | |
151 | result, answer, correct_results[test_number]); | |
152 | last_time = current_time; | |
153 | } | |
154 | else if (number == 10000) | |
155 | reading_nodes += (int) answer; | |
156 | else if (number == 10001) | |
157 | owl_nodes += (int) answer; | |
158 | else if (number == 10002) | |
159 | connection_nodes += (int) answer; | |
160 | else if (number == 10003) | |
161 | cputime = (float) answer; | |
162 | else if (number == 10005) | |
163 | uncertainty += (float) answer; | |
164 | else if (number == 10004) | |
165 | break; | |
166 | } | |
167 | else if (sscanf(s, "?%s", answer)) { | |
168 | number = -1; | |
169 | sscanf(answer, "%d", number); | |
170 | write("%-15s ?%s\n", name + ":", answer); | |
171 | } | |
172 | } | |
173 | if (debug) | |
174 | werror("Reader closing down.\n"); | |
175 | finish(); | |
176 | f->close(); | |
177 | reading_finished->write("\n"); | |
178 | } | |
179 | ||
180 | static void program_writer(object f) | |
181 | { | |
182 | while (1) { | |
183 | string s = write_queue->read(); | |
184 | if (has_value(s, "quit")) | |
185 | quit_has_been_sent = 1; | |
186 | if (s == "") | |
187 | break; | |
188 | f->write(s); | |
189 | } | |
190 | f->close(); | |
191 | if (debug) | |
192 | werror("Writer closed down\n"); | |
193 | } | |
194 | ||
195 | static void program_monitor() | |
196 | { | |
197 | while (!quit_has_been_sent) { | |
198 | sleep(1); | |
199 | if (engine->status() != 0 && !quit_has_been_sent) { | |
200 | write("engine crashed in test suite %s.\n", name); | |
201 | exit(1); | |
202 | } | |
203 | } | |
204 | } | |
205 | ||
206 | void send(string|void s) | |
207 | { | |
208 | if (!s) { | |
209 | if (debug) | |
210 | werror("Finishing sending.\n"); | |
211 | } | |
212 | else { | |
213 | if (debug) | |
214 | werror("Sent: " + s + "\n"); | |
215 | write_queue->write(s + "\n"); | |
216 | } | |
217 | } | |
218 | ||
219 | void run_testsuite(string suite_name, string command, | |
220 | array(string) engine_options, | |
221 | mapping(string:mixed) options, | |
222 | array(int)|void test_numbers) | |
223 | { | |
224 | array(string) program_start_array = ({command}) + engine_options; | |
225 | ||
226 | string testsuite = Stdio.read_file(suite_name); | |
227 | if (!testsuite) { | |
228 | werror("Couldn't find " + suite_name + "\n"); | |
229 | exit(1); | |
230 | } | |
231 | ||
232 | if (options["valgrind"]) | |
233 | program_start_array = ({"valgrind"}) + program_start_array; | |
234 | ||
235 | if (options["check-unoccupied-answers"]) | |
236 | testsuite = modify_testsuite(testsuite); | |
237 | ||
238 | writing_finished = Thread.Queue(); | |
239 | reading_finished = Thread.Queue(); | |
240 | write_queue = Thread.Queue(); | |
241 | object f1 = Stdio.File(); | |
242 | object pipe1 = f1->pipe(); | |
243 | object f2 = Stdio.FILE(); | |
244 | object pipe2 = f2->pipe(); | |
245 | engine = Process.create_process(program_start_array, | |
246 | (["stdin":pipe1, "stdout":pipe2])); | |
247 | thread_create(program_reader, f2); | |
248 | thread_create(program_writer, f1); | |
249 | thread_create(program_monitor); | |
250 | ||
251 | int number; | |
252 | string answer; | |
253 | string expected; | |
254 | ||
255 | timebase = time(); | |
256 | last_time = time(timebase); | |
257 | ||
258 | correct_results = ([]); | |
259 | expected_failures = (<>); | |
260 | ||
261 | if (test_numbers && sizeof(test_numbers) == 0) | |
262 | test_numbers = 0; | |
263 | ||
264 | foreach (testsuite/"\n", string s) { | |
265 | string command; | |
266 | if (sscanf(s, "%d %s", number, command) == 2) { | |
267 | command = (command / " ")[0]; | |
268 | if (number >= 10000 && number <= 10003) | |
269 | continue; | |
270 | if (test_numbers && !has_value(test_numbers, number)) | |
271 | continue; | |
272 | if (sizeof(allowed_commands) > 0 && !allowed_commands[command]) | |
273 | continue; | |
274 | if (correct_results[(int) number]) | |
275 | write("Repeated test number " + number + ".\n"); | |
276 | send("reset_reading_node_counter"); | |
277 | send("reset_owl_node_counter"); | |
278 | send("reset_connection_node_counter"); | |
279 | send(s); | |
280 | if (sscanf(s, "%*sreg_genmove%*s") == 2) | |
281 | send("10005 move_uncertainty"); | |
282 | send("10000 get_reading_node_counter"); | |
283 | send("10001 get_owl_node_counter"); | |
284 | send("10002 get_connection_node_counter"); | |
285 | send("10003 cputime"); | |
286 | } | |
287 | else if (sscanf(s, "#? [%[^]]]%s", answer, expected)) { | |
288 | correct_results[(int)number] = answer; | |
289 | if (expected == "*") | |
290 | expected_failures[(int)number] = 1; | |
291 | } | |
292 | else | |
293 | send(s); | |
294 | } | |
295 | ||
296 | if (debug) | |
297 | werror("Signalling finish of writing\n"); | |
298 | writing_finished->write("\n"); | |
299 | send("10004 cputime"); | |
300 | reading_finished->read(); | |
301 | send("10004 quit"); | |
302 | } | |
303 | } | |
304 | ||
305 | array(Testsuite) testsuites = ({}); | |
306 | multiset(string) allowed_commands = (<>); | |
307 | ||
308 | // Replace all tests in the testsuite with new tests checking whether | |
309 | // the given answers are unoccupied vertices. | |
310 | string modify_testsuite(string testsuite) | |
311 | { | |
312 | string s = ""; | |
313 | int test_number = 0; | |
314 | Regexp re = Regexp("[^A-T]([A-T][0-9]+)(.*)"); | |
315 | foreach (testsuite / "\n", string row) { | |
316 | if ((int) row != 0) | |
317 | test_number = (int) row; | |
318 | else if (row[0..1] != "#?") | |
319 | s += row + "\n"; | |
320 | else { | |
321 | string coord; | |
322 | int n = 11; | |
323 | while (re->split(row)) { | |
324 | [coord, row] = re->split(row); | |
325 | s += sprintf("%d color %s\n", 100 * test_number + n, coord); | |
326 | s += "#? [empty]\n"; | |
327 | n++; | |
328 | } | |
329 | } | |
330 | } | |
331 | return s; | |
332 | } | |
333 | ||
334 | void final_report() | |
335 | { | |
336 | float total_time = 0.0; | |
337 | float total_cputime = 0.0; | |
338 | float total_uncertainty = 0.0; | |
339 | int reading_nodes = 0; | |
340 | int owl_nodes = 0; | |
341 | int connection_nodes = 0; | |
342 | int number_unexpected_pass = 0; | |
343 | int number_unexpected_fail = 0; | |
344 | ||
345 | foreach (testsuites, Testsuite t) { | |
346 | total_time += t->walltime; | |
347 | total_cputime += t->cputime; | |
348 | total_uncertainty += t->uncertainty; | |
349 | reading_nodes += t->reading_nodes; | |
350 | owl_nodes += t->owl_nodes; | |
351 | connection_nodes += t->connection_nodes; | |
352 | number_unexpected_pass += sizeof(t->PASS); | |
353 | number_unexpected_fail += sizeof(t->FAIL); | |
354 | } | |
355 | write("Total nodes: %d %d %d\n", reading_nodes, owl_nodes, | |
356 | connection_nodes); | |
357 | write("Total time: %.2f (%.2f)\n", total_cputime, total_time); | |
358 | write("Total uncertainty: %.2f\n", total_uncertainty); | |
359 | if (number_unexpected_pass > 0) | |
360 | write("%d PASS\n", number_unexpected_pass); | |
361 | if (number_unexpected_fail > 0) | |
362 | write("%d FAIL\n", number_unexpected_fail); | |
363 | if (report_slow) { | |
364 | write("Slowest moves:\n"); | |
365 | slow_moves->report("%s: %f seconds\n"); | |
366 | } | |
367 | } | |
368 | ||
369 | string parse_tests(mapping(string:array(int)) partial_testsuites, | |
370 | string tests) | |
371 | { | |
372 | string suite, numbers; | |
373 | if (sscanf(tests, "%[^ :]:%s", suite, numbers) != 2) { | |
374 | suite = tests; | |
375 | numbers = ""; | |
376 | } | |
377 | ||
378 | if (has_value(suite, " ")) | |
379 | return ""; | |
380 | ||
381 | if (!has_suffix(suite, ".tst")) | |
382 | suite += ".tst"; | |
383 | ||
384 | if (numbers != "") { | |
385 | if (!partial_testsuites[suite]) | |
386 | partial_testsuites[suite] = ({}); | |
387 | else if (sizeof(partial_testsuites[suite]) == 0) | |
388 | return suite; | |
389 | ||
390 | foreach (numbers / ",", string interval) { | |
391 | int start, stop; | |
392 | if (sscanf(interval, "%d-%d", start, stop) == 2) | |
393 | for (int k = start; k <= stop; k++) | |
394 | partial_testsuites[suite] |= ({k}); | |
395 | else | |
396 | partial_testsuites[suite] |= ({(int) interval}); | |
397 | } | |
398 | } | |
399 | else | |
400 | partial_testsuites[suite] = ({}); | |
401 | ||
402 | return suite; | |
403 | } | |
404 | ||
405 | int main(int argc, array(string) argv) | |
406 | { | |
407 | array(string) testsuites = ({}); | |
408 | mapping(string:array(int)) partial_testsuites = ([]); | |
409 | ||
410 | array(array(mixed)) all_options; | |
411 | ||
412 | all_options = ({ ({"help", Getopt.NO_ARG, ({"-h", "--help"})}), | |
413 | ({"verbose", Getopt.NO_ARG, ({"-v", "--verbose"})}), | |
414 | ({"valgrind", Getopt.NO_ARG, "--valgrind"}), | |
415 | ({"check-unoccupied-answers", Getopt.NO_ARG, | |
416 | "--check-unoccupied"}), | |
417 | ({"slow_moves", Getopt.HAS_ARG, ({"-s", "--slow-moves"})}), | |
418 | ({"engine", Getopt.HAS_ARG, ({"-e", "--engine"})}), | |
419 | ({"options", Getopt.HAS_ARG, ({"-o", "--options"})}), | |
420 | ({"file", Getopt.HAS_ARG, ({"-f", "--file"})}), | |
421 | ({"jobs", Getopt.HAS_ARG, ({"-j", "--jobs"})}), | |
422 | ({"limit-commands", Getopt.HAS_ARG, ({"-l", "--limit-commands"})})}); | |
423 | ||
424 | mapping(string:mixed) options = ([]); | |
425 | string engine = ""; | |
426 | array(string) engine_options = ({}); | |
427 | int jobs = 1; | |
428 | ||
429 | foreach (Getopt.find_all_options(argv, all_options), array(mixed) option) { | |
430 | [string name, mixed value] = option; | |
431 | switch (name) { | |
432 | case "help": | |
433 | write(help_message, basename(argv[0])); | |
434 | return 0; | |
435 | break; | |
436 | ||
437 | case "valgrind": | |
438 | options["valgrind"] = 1; | |
439 | break; | |
440 | ||
441 | case "check-unoccupied-answers": | |
442 | options["check-unoccupied-answers"] = 1; | |
443 | break; | |
444 | ||
445 | case "verbose": | |
446 | verbose = 1; | |
447 | break; | |
448 | ||
449 | case "engine": | |
450 | engine = value; | |
451 | break; | |
452 | ||
453 | case "options": | |
454 | engine_options += value / " "; | |
455 | break; | |
456 | ||
457 | case "slow_moves": | |
458 | report_slow = 1; | |
459 | slow_moves = Highscorelist((int) value); | |
460 | break; | |
461 | ||
462 | case "file": | |
463 | string testlist = Stdio.read_file(value); | |
464 | if (!testlist) { | |
465 | werror("Couldn't find %s\n", value); | |
466 | continue; | |
467 | } | |
468 | foreach ((testlist / "\n") - ({""}), string tests) | |
469 | testsuites |= ({parse_tests(partial_testsuites, tests)}); | |
470 | break; | |
471 | ||
472 | case "jobs": | |
473 | jobs = (int) value; | |
474 | break; | |
475 | ||
476 | case "limit-commands": | |
477 | foreach (value / ",", string command) | |
478 | allowed_commands[command] = 1; | |
479 | break; | |
480 | } | |
481 | } | |
482 | ||
483 | if (engine == "") { | |
484 | engine = "../interface/gnugo"; | |
485 | engine_options |= "--quiet --mode gtp" / " "; | |
486 | } | |
487 | ||
488 | argv = Getopt.get_args(argv)[1..]; | |
489 | foreach (argv, string tests) | |
490 | testsuites |= ({parse_tests(partial_testsuites, tests)}); | |
491 | ||
492 | testsuites -= ({""}); | |
493 | ||
494 | if (sizeof(testsuites) == 0) { | |
495 | string makefile = Stdio.read_file("Makefile.am"); | |
496 | foreach (makefile / "\n", string s) { | |
497 | string filename; | |
498 | if (sscanf(s, "%*sregress.sh $(srcdir) %s ", filename) == 2) | |
499 | testsuites += ({filename}); | |
500 | } | |
501 | } | |
502 | ||
503 | if (jobs < 1) | |
504 | jobs = 1; | |
505 | ||
506 | testsuite_queue = Thread.Queue(); | |
507 | ||
508 | for (int j = 0; j < jobs; j++) | |
509 | thread_create(run_testsuites, engine, engine_options, | |
510 | options, partial_testsuites); | |
511 | ||
512 | foreach(testsuites, string testsuite) | |
513 | testsuite_queue->write(testsuite); | |
514 | ||
515 | for (int j = 0; j < jobs; j++) | |
516 | testsuite_queue->write(""); | |
517 | ||
518 | while (testsuite_queue->size() > 0) | |
519 | sleep(1); | |
520 | ||
521 | final_report(); | |
522 | } | |
523 | ||
524 | void run_testsuites(string engine, array(string) engine_options, | |
525 | mapping(string:mixed) options, | |
526 | mapping(string:array(int)) partial_testsuites) | |
527 | { | |
528 | while (1) { | |
529 | string suite_name = testsuite_queue->read(); | |
530 | if (suite_name == "") | |
531 | break; | |
532 | ||
533 | Testsuite current_testsuite = Testsuite(suite_name - ".tst"); | |
534 | testsuites += ({current_testsuite}); | |
535 | ||
536 | current_testsuite->run_testsuite(suite_name, engine, engine_options, | |
537 | options, partial_testsuites[suite_name]); | |
538 | } | |
539 | } | |
540 | ||
541 | string help_message = | |
542 | "Usage: %s [OPTIONS]... [TESTS]...\n" | |
543 | "\n" | |
544 | "Run all regressions or a selection of them.\n" | |
545 | "Options:\n" | |
546 | " -h, --help Display this help and exit.\n" | |
547 | " -v, --verbose Show also expected results.\n" | |
548 | " --valgrind Run regressions under valgrind (very slow).\n" | |
549 | " --check-unoccupied Do not run regressions. Instead check that\n" | |
550 | " the listed answers are not occupied.\n" | |
551 | " -e, --engine=ENGINE Engine to run regressions on. Default is\n" | |
552 | " ../interface/gnugo.\n" | |
553 | " -o, --options=OPTIONS Options passed to the engine.\n" | |
554 | " -f, --file=FILE File containing a list of tests to run.\n" | |
555 | " -j, --jobs=JOBS Number of testsuites being run in parallel.\n" | |
556 | " -l, --limit-commands=COMMANDS Only run tests having certain GTP commands.\n" | |
557 | "\n" | |
558 | "Tests are listed on the command line in one of the following forms:\n" | |
559 | "reading Run all tests in the testsuite reading.tst.\n" | |
560 | "reading:4 Run test number 4 in reading.tst.\n" | |
561 | "reading:4,17,30 Run tests with numbers 4, 7, and 30 in reading.tst\n" | |
562 | "reading:4-17 Run tests with numbers between 4 and 17 in reading.tst\n" | |
563 | "\n" | |
564 | "It is also allowed to include the suffix \".tst\" above and more complex\n" | |
565 | "lists like \"reading.tst:1-3,15,17,30-50,52\" are also understood.\n" | |
566 | "The format of files used with --file is the same, with one testsuite on\n" | |
567 | "each line.\n" | |
568 | "\n" | |
569 | "If no test suite is listed on the command line or read from file, then all\n" | |
570 | "regressions listed in Makefile.am will be run.\n" | |
571 | "\n" | |
572 | "The --limit-commands option takes a comma separated list of GTP commands,\n" | |
573 | "e.g. '--limit-commands=attack,defend' to only run tactical reading tests.\n"; | |
574 | ||
575 | /* | |
576 | * Local Variables: | |
577 | * tab-width: 8 | |
578 | * c-basic-offset: 2 | |
579 | * End: | |
580 | */ |