| 1 | #!/usr/bin/env pike |
| 2 | |
| 3 | static int debug = 0; |
| 4 | static int verbose = 0; |
| 5 | Thread.Queue testsuite_queue; |
| 6 | |
| 7 | /* General class to manage a high-score list (e.g. of slow tests, tests |
| 8 | * with many nodes, ..) |
| 9 | */ |
| 10 | class Highscorelist |
| 11 | { |
| 12 | array(float) scores; |
| 13 | array(string) names; |
| 14 | int max; |
| 15 | |
| 16 | void create(int m) |
| 17 | { |
| 18 | max = m; |
| 19 | scores = ({}); |
| 20 | names = ({}); |
| 21 | } |
| 22 | |
| 23 | void add_score(float score, string name) |
| 24 | { |
| 25 | int num = sizeof(scores); |
| 26 | if (num != sizeof(names)) { |
| 27 | write("This should not happen!!"); |
| 28 | return; |
| 29 | } |
| 30 | if (num < max) { |
| 31 | scores += ({score}); |
| 32 | names += ({name}); |
| 33 | sort(scores, names); |
| 34 | } |
| 35 | else if (scores[0] < score) { |
| 36 | scores[0] = score; |
| 37 | names[0] = name; |
| 38 | sort(scores, names); |
| 39 | } |
| 40 | return; |
| 41 | } |
| 42 | |
| 43 | void report(string s) |
| 44 | { |
| 45 | for (int i = 0; i < sizeof(scores); i++) |
| 46 | write(s, names[i], scores[i]); |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | Highscorelist slow_moves; |
| 51 | int report_slow = 0; |
| 52 | |
| 53 | class Testsuite |
| 54 | { |
| 55 | string name; |
| 56 | mapping(int:string) correct_results = ([]); |
| 57 | multiset expected_failures = (<>); |
| 58 | int reading_nodes; |
| 59 | int owl_nodes; |
| 60 | int connection_nodes; |
| 61 | float walltime; |
| 62 | float cputime; |
| 63 | float uncertainty; |
| 64 | Process.create_process engine; |
| 65 | int quit_has_been_sent; |
| 66 | |
| 67 | array(int) pass; |
| 68 | array(int) fail; |
| 69 | array(int) PASS; |
| 70 | array(int) FAIL; |
| 71 | |
| 72 | Thread.Queue writing_finished; |
| 73 | Thread.Queue reading_finished; |
| 74 | Thread.Queue write_queue; |
| 75 | int timebase; |
| 76 | float last_time; |
| 77 | |
| 78 | void create(string s) |
| 79 | { |
| 80 | name = s; |
| 81 | reading_nodes = 0; |
| 82 | owl_nodes = 0; |
| 83 | connection_nodes = 0; |
| 84 | uncertainty = 0.0; |
| 85 | walltime = 0.0; |
| 86 | pass = ({}); |
| 87 | fail = ({}); |
| 88 | PASS = ({}); |
| 89 | FAIL = ({}); |
| 90 | } |
| 91 | |
| 92 | static void finish() |
| 93 | { |
| 94 | // Write nothing if no test from the file was run at all. |
| 95 | if (sizeof(pass) + sizeof(fail) + sizeof(PASS) + sizeof(FAIL) > 0) |
| 96 | write("%-37s %7.2f %9d %7d %8d\n", name, cputime, reading_nodes, |
| 97 | owl_nodes, connection_nodes); |
| 98 | } |
| 99 | |
| 100 | static void program_reader(object f) |
| 101 | { |
| 102 | int test_number; |
| 103 | |
| 104 | if (debug) |
| 105 | werror("Waiting for writing to be finished.\n"); |
| 106 | writing_finished->read(); |
| 107 | if (debug) |
| 108 | werror("Finished waiting for writing to be finished.\n"); |
| 109 | |
| 110 | while (1) { |
| 111 | string s = f->gets() - "\r"; |
| 112 | float current_time = time(timebase); |
| 113 | if (!s) |
| 114 | break; |
| 115 | if (debug) |
| 116 | werror("Recv: " + s + "\n"); |
| 117 | |
| 118 | int number; |
| 119 | string answer; |
| 120 | if (sscanf(s, "=%d %s", number, answer)) { |
| 121 | if (number < 10000 || number > 10005) { |
| 122 | test_number = (int) number; |
| 123 | string correct = correct_results[test_number]; |
| 124 | if (!correct) { |
| 125 | correct = "correct result missing, check the test suite"; |
| 126 | correct_results[test_number] = correct; |
| 127 | } |
| 128 | int negate = 0; |
| 129 | if (correct[0] == '!') { |
| 130 | correct = correct[1..]; |
| 131 | negate = 1; |
| 132 | } |
| 133 | correct = "^" + correct + "$"; |
| 134 | object re = Regexp(correct); |
| 135 | string result = (negate ^ re->match(answer)) ? "pass" : "fail"; |
| 136 | |
| 137 | if (result == "pass" && expected_failures[test_number]) { |
| 138 | result = "PASS"; |
| 139 | } |
| 140 | if (result == "fail" && !expected_failures[test_number]) { |
| 141 | result = "FAIL"; |
| 142 | } |
| 143 | this_object()[result] += ({test_number}); |
| 144 | walltime += (current_time - last_time); |
| 145 | if (report_slow) |
| 146 | slow_moves->add_score(current_time - last_time, |
| 147 | name + ":" + test_number); |
| 148 | |
| 149 | if (result == "PASS" || result == "FAIL" || verbose) |
| 150 | write("%-15s %s %s [%s]\n", name + ":" + test_number, |
| 151 | result, answer, correct_results[test_number]); |
| 152 | last_time = current_time; |
| 153 | } |
| 154 | else if (number == 10000) |
| 155 | reading_nodes += (int) answer; |
| 156 | else if (number == 10001) |
| 157 | owl_nodes += (int) answer; |
| 158 | else if (number == 10002) |
| 159 | connection_nodes += (int) answer; |
| 160 | else if (number == 10003) |
| 161 | cputime = (float) answer; |
| 162 | else if (number == 10005) |
| 163 | uncertainty += (float) answer; |
| 164 | else if (number == 10004) |
| 165 | break; |
| 166 | } |
| 167 | else if (sscanf(s, "?%s", answer)) { |
| 168 | number = -1; |
| 169 | sscanf(answer, "%d", number); |
| 170 | write("%-15s ?%s\n", name + ":", answer); |
| 171 | } |
| 172 | } |
| 173 | if (debug) |
| 174 | werror("Reader closing down.\n"); |
| 175 | finish(); |
| 176 | f->close(); |
| 177 | reading_finished->write("\n"); |
| 178 | } |
| 179 | |
| 180 | static void program_writer(object f) |
| 181 | { |
| 182 | while (1) { |
| 183 | string s = write_queue->read(); |
| 184 | if (has_value(s, "quit")) |
| 185 | quit_has_been_sent = 1; |
| 186 | if (s == "") |
| 187 | break; |
| 188 | f->write(s); |
| 189 | } |
| 190 | f->close(); |
| 191 | if (debug) |
| 192 | werror("Writer closed down\n"); |
| 193 | } |
| 194 | |
| 195 | static void program_monitor() |
| 196 | { |
| 197 | while (!quit_has_been_sent) { |
| 198 | sleep(1); |
| 199 | if (engine->status() != 0 && !quit_has_been_sent) { |
| 200 | write("engine crashed in test suite %s.\n", name); |
| 201 | exit(1); |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | void send(string|void s) |
| 207 | { |
| 208 | if (!s) { |
| 209 | if (debug) |
| 210 | werror("Finishing sending.\n"); |
| 211 | } |
| 212 | else { |
| 213 | if (debug) |
| 214 | werror("Sent: " + s + "\n"); |
| 215 | write_queue->write(s + "\n"); |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | void run_testsuite(string suite_name, string command, |
| 220 | array(string) engine_options, |
| 221 | mapping(string:mixed) options, |
| 222 | array(int)|void test_numbers) |
| 223 | { |
| 224 | array(string) program_start_array = ({command}) + engine_options; |
| 225 | |
| 226 | string testsuite = Stdio.read_file(suite_name); |
| 227 | if (!testsuite) { |
| 228 | werror("Couldn't find " + suite_name + "\n"); |
| 229 | exit(1); |
| 230 | } |
| 231 | |
| 232 | if (options["valgrind"]) |
| 233 | program_start_array = ({"valgrind"}) + program_start_array; |
| 234 | |
| 235 | if (options["check-unoccupied-answers"]) |
| 236 | testsuite = modify_testsuite(testsuite); |
| 237 | |
| 238 | writing_finished = Thread.Queue(); |
| 239 | reading_finished = Thread.Queue(); |
| 240 | write_queue = Thread.Queue(); |
| 241 | object f1 = Stdio.File(); |
| 242 | object pipe1 = f1->pipe(); |
| 243 | object f2 = Stdio.FILE(); |
| 244 | object pipe2 = f2->pipe(); |
| 245 | engine = Process.create_process(program_start_array, |
| 246 | (["stdin":pipe1, "stdout":pipe2])); |
| 247 | thread_create(program_reader, f2); |
| 248 | thread_create(program_writer, f1); |
| 249 | thread_create(program_monitor); |
| 250 | |
| 251 | int number; |
| 252 | string answer; |
| 253 | string expected; |
| 254 | |
| 255 | timebase = time(); |
| 256 | last_time = time(timebase); |
| 257 | |
| 258 | correct_results = ([]); |
| 259 | expected_failures = (<>); |
| 260 | |
| 261 | if (test_numbers && sizeof(test_numbers) == 0) |
| 262 | test_numbers = 0; |
| 263 | |
| 264 | foreach (testsuite/"\n", string s) { |
| 265 | string command; |
| 266 | if (sscanf(s, "%d %s", number, command) == 2) { |
| 267 | command = (command / " ")[0]; |
| 268 | if (number >= 10000 && number <= 10003) |
| 269 | continue; |
| 270 | if (test_numbers && !has_value(test_numbers, number)) |
| 271 | continue; |
| 272 | if (sizeof(allowed_commands) > 0 && !allowed_commands[command]) |
| 273 | continue; |
| 274 | if (correct_results[(int) number]) |
| 275 | write("Repeated test number " + number + ".\n"); |
| 276 | send("reset_reading_node_counter"); |
| 277 | send("reset_owl_node_counter"); |
| 278 | send("reset_connection_node_counter"); |
| 279 | send(s); |
| 280 | if (sscanf(s, "%*sreg_genmove%*s") == 2) |
| 281 | send("10005 move_uncertainty"); |
| 282 | send("10000 get_reading_node_counter"); |
| 283 | send("10001 get_owl_node_counter"); |
| 284 | send("10002 get_connection_node_counter"); |
| 285 | send("10003 cputime"); |
| 286 | } |
| 287 | else if (sscanf(s, "#? [%[^]]]%s", answer, expected)) { |
| 288 | correct_results[(int)number] = answer; |
| 289 | if (expected == "*") |
| 290 | expected_failures[(int)number] = 1; |
| 291 | } |
| 292 | else |
| 293 | send(s); |
| 294 | } |
| 295 | |
| 296 | if (debug) |
| 297 | werror("Signalling finish of writing\n"); |
| 298 | writing_finished->write("\n"); |
| 299 | send("10004 cputime"); |
| 300 | reading_finished->read(); |
| 301 | send("10004 quit"); |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | array(Testsuite) testsuites = ({}); |
| 306 | multiset(string) allowed_commands = (<>); |
| 307 | |
| 308 | // Replace all tests in the testsuite with new tests checking whether |
| 309 | // the given answers are unoccupied vertices. |
| 310 | string modify_testsuite(string testsuite) |
| 311 | { |
| 312 | string s = ""; |
| 313 | int test_number = 0; |
| 314 | Regexp re = Regexp("[^A-T]([A-T][0-9]+)(.*)"); |
| 315 | foreach (testsuite / "\n", string row) { |
| 316 | if ((int) row != 0) |
| 317 | test_number = (int) row; |
| 318 | else if (row[0..1] != "#?") |
| 319 | s += row + "\n"; |
| 320 | else { |
| 321 | string coord; |
| 322 | int n = 11; |
| 323 | while (re->split(row)) { |
| 324 | [coord, row] = re->split(row); |
| 325 | s += sprintf("%d color %s\n", 100 * test_number + n, coord); |
| 326 | s += "#? [empty]\n"; |
| 327 | n++; |
| 328 | } |
| 329 | } |
| 330 | } |
| 331 | return s; |
| 332 | } |
| 333 | |
| 334 | void final_report() |
| 335 | { |
| 336 | float total_time = 0.0; |
| 337 | float total_cputime = 0.0; |
| 338 | float total_uncertainty = 0.0; |
| 339 | int reading_nodes = 0; |
| 340 | int owl_nodes = 0; |
| 341 | int connection_nodes = 0; |
| 342 | int number_unexpected_pass = 0; |
| 343 | int number_unexpected_fail = 0; |
| 344 | |
| 345 | foreach (testsuites, Testsuite t) { |
| 346 | total_time += t->walltime; |
| 347 | total_cputime += t->cputime; |
| 348 | total_uncertainty += t->uncertainty; |
| 349 | reading_nodes += t->reading_nodes; |
| 350 | owl_nodes += t->owl_nodes; |
| 351 | connection_nodes += t->connection_nodes; |
| 352 | number_unexpected_pass += sizeof(t->PASS); |
| 353 | number_unexpected_fail += sizeof(t->FAIL); |
| 354 | } |
| 355 | write("Total nodes: %d %d %d\n", reading_nodes, owl_nodes, |
| 356 | connection_nodes); |
| 357 | write("Total time: %.2f (%.2f)\n", total_cputime, total_time); |
| 358 | write("Total uncertainty: %.2f\n", total_uncertainty); |
| 359 | if (number_unexpected_pass > 0) |
| 360 | write("%d PASS\n", number_unexpected_pass); |
| 361 | if (number_unexpected_fail > 0) |
| 362 | write("%d FAIL\n", number_unexpected_fail); |
| 363 | if (report_slow) { |
| 364 | write("Slowest moves:\n"); |
| 365 | slow_moves->report("%s: %f seconds\n"); |
| 366 | } |
| 367 | } |
| 368 | |
| 369 | string parse_tests(mapping(string:array(int)) partial_testsuites, |
| 370 | string tests) |
| 371 | { |
| 372 | string suite, numbers; |
| 373 | if (sscanf(tests, "%[^ :]:%s", suite, numbers) != 2) { |
| 374 | suite = tests; |
| 375 | numbers = ""; |
| 376 | } |
| 377 | |
| 378 | if (has_value(suite, " ")) |
| 379 | return ""; |
| 380 | |
| 381 | if (!has_suffix(suite, ".tst")) |
| 382 | suite += ".tst"; |
| 383 | |
| 384 | if (numbers != "") { |
| 385 | if (!partial_testsuites[suite]) |
| 386 | partial_testsuites[suite] = ({}); |
| 387 | else if (sizeof(partial_testsuites[suite]) == 0) |
| 388 | return suite; |
| 389 | |
| 390 | foreach (numbers / ",", string interval) { |
| 391 | int start, stop; |
| 392 | if (sscanf(interval, "%d-%d", start, stop) == 2) |
| 393 | for (int k = start; k <= stop; k++) |
| 394 | partial_testsuites[suite] |= ({k}); |
| 395 | else |
| 396 | partial_testsuites[suite] |= ({(int) interval}); |
| 397 | } |
| 398 | } |
| 399 | else |
| 400 | partial_testsuites[suite] = ({}); |
| 401 | |
| 402 | return suite; |
| 403 | } |
| 404 | |
| 405 | int main(int argc, array(string) argv) |
| 406 | { |
| 407 | array(string) testsuites = ({}); |
| 408 | mapping(string:array(int)) partial_testsuites = ([]); |
| 409 | |
| 410 | array(array(mixed)) all_options; |
| 411 | |
| 412 | all_options = ({ ({"help", Getopt.NO_ARG, ({"-h", "--help"})}), |
| 413 | ({"verbose", Getopt.NO_ARG, ({"-v", "--verbose"})}), |
| 414 | ({"valgrind", Getopt.NO_ARG, "--valgrind"}), |
| 415 | ({"check-unoccupied-answers", Getopt.NO_ARG, |
| 416 | "--check-unoccupied"}), |
| 417 | ({"slow_moves", Getopt.HAS_ARG, ({"-s", "--slow-moves"})}), |
| 418 | ({"engine", Getopt.HAS_ARG, ({"-e", "--engine"})}), |
| 419 | ({"options", Getopt.HAS_ARG, ({"-o", "--options"})}), |
| 420 | ({"file", Getopt.HAS_ARG, ({"-f", "--file"})}), |
| 421 | ({"jobs", Getopt.HAS_ARG, ({"-j", "--jobs"})}), |
| 422 | ({"limit-commands", Getopt.HAS_ARG, ({"-l", "--limit-commands"})})}); |
| 423 | |
| 424 | mapping(string:mixed) options = ([]); |
| 425 | string engine = ""; |
| 426 | array(string) engine_options = ({}); |
| 427 | int jobs = 1; |
| 428 | |
| 429 | foreach (Getopt.find_all_options(argv, all_options), array(mixed) option) { |
| 430 | [string name, mixed value] = option; |
| 431 | switch (name) { |
| 432 | case "help": |
| 433 | write(help_message, basename(argv[0])); |
| 434 | return 0; |
| 435 | break; |
| 436 | |
| 437 | case "valgrind": |
| 438 | options["valgrind"] = 1; |
| 439 | break; |
| 440 | |
| 441 | case "check-unoccupied-answers": |
| 442 | options["check-unoccupied-answers"] = 1; |
| 443 | break; |
| 444 | |
| 445 | case "verbose": |
| 446 | verbose = 1; |
| 447 | break; |
| 448 | |
| 449 | case "engine": |
| 450 | engine = value; |
| 451 | break; |
| 452 | |
| 453 | case "options": |
| 454 | engine_options += value / " "; |
| 455 | break; |
| 456 | |
| 457 | case "slow_moves": |
| 458 | report_slow = 1; |
| 459 | slow_moves = Highscorelist((int) value); |
| 460 | break; |
| 461 | |
| 462 | case "file": |
| 463 | string testlist = Stdio.read_file(value); |
| 464 | if (!testlist) { |
| 465 | werror("Couldn't find %s\n", value); |
| 466 | continue; |
| 467 | } |
| 468 | foreach ((testlist / "\n") - ({""}), string tests) |
| 469 | testsuites |= ({parse_tests(partial_testsuites, tests)}); |
| 470 | break; |
| 471 | |
| 472 | case "jobs": |
| 473 | jobs = (int) value; |
| 474 | break; |
| 475 | |
| 476 | case "limit-commands": |
| 477 | foreach (value / ",", string command) |
| 478 | allowed_commands[command] = 1; |
| 479 | break; |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | if (engine == "") { |
| 484 | engine = "../interface/gnugo"; |
| 485 | engine_options |= "--quiet --mode gtp" / " "; |
| 486 | } |
| 487 | |
| 488 | argv = Getopt.get_args(argv)[1..]; |
| 489 | foreach (argv, string tests) |
| 490 | testsuites |= ({parse_tests(partial_testsuites, tests)}); |
| 491 | |
| 492 | testsuites -= ({""}); |
| 493 | |
| 494 | if (sizeof(testsuites) == 0) { |
| 495 | string makefile = Stdio.read_file("Makefile.am"); |
| 496 | foreach (makefile / "\n", string s) { |
| 497 | string filename; |
| 498 | if (sscanf(s, "%*sregress.sh $(srcdir) %s ", filename) == 2) |
| 499 | testsuites += ({filename}); |
| 500 | } |
| 501 | } |
| 502 | |
| 503 | if (jobs < 1) |
| 504 | jobs = 1; |
| 505 | |
| 506 | testsuite_queue = Thread.Queue(); |
| 507 | |
| 508 | for (int j = 0; j < jobs; j++) |
| 509 | thread_create(run_testsuites, engine, engine_options, |
| 510 | options, partial_testsuites); |
| 511 | |
| 512 | foreach(testsuites, string testsuite) |
| 513 | testsuite_queue->write(testsuite); |
| 514 | |
| 515 | for (int j = 0; j < jobs; j++) |
| 516 | testsuite_queue->write(""); |
| 517 | |
| 518 | while (testsuite_queue->size() > 0) |
| 519 | sleep(1); |
| 520 | |
| 521 | final_report(); |
| 522 | } |
| 523 | |
| 524 | void run_testsuites(string engine, array(string) engine_options, |
| 525 | mapping(string:mixed) options, |
| 526 | mapping(string:array(int)) partial_testsuites) |
| 527 | { |
| 528 | while (1) { |
| 529 | string suite_name = testsuite_queue->read(); |
| 530 | if (suite_name == "") |
| 531 | break; |
| 532 | |
| 533 | Testsuite current_testsuite = Testsuite(suite_name - ".tst"); |
| 534 | testsuites += ({current_testsuite}); |
| 535 | |
| 536 | current_testsuite->run_testsuite(suite_name, engine, engine_options, |
| 537 | options, partial_testsuites[suite_name]); |
| 538 | } |
| 539 | } |
| 540 | |
| 541 | string help_message = |
| 542 | "Usage: %s [OPTIONS]... [TESTS]...\n" |
| 543 | "\n" |
| 544 | "Run all regressions or a selection of them.\n" |
| 545 | "Options:\n" |
| 546 | " -h, --help Display this help and exit.\n" |
| 547 | " -v, --verbose Show also expected results.\n" |
| 548 | " --valgrind Run regressions under valgrind (very slow).\n" |
| 549 | " --check-unoccupied Do not run regressions. Instead check that\n" |
| 550 | " the listed answers are not occupied.\n" |
| 551 | " -e, --engine=ENGINE Engine to run regressions on. Default is\n" |
| 552 | " ../interface/gnugo.\n" |
| 553 | " -o, --options=OPTIONS Options passed to the engine.\n" |
| 554 | " -f, --file=FILE File containing a list of tests to run.\n" |
| 555 | " -j, --jobs=JOBS Number of testsuites being run in parallel.\n" |
| 556 | " -l, --limit-commands=COMMANDS Only run tests having certain GTP commands.\n" |
| 557 | "\n" |
| 558 | "Tests are listed on the command line in one of the following forms:\n" |
| 559 | "reading Run all tests in the testsuite reading.tst.\n" |
| 560 | "reading:4 Run test number 4 in reading.tst.\n" |
| 561 | "reading:4,17,30 Run tests with numbers 4, 7, and 30 in reading.tst\n" |
| 562 | "reading:4-17 Run tests with numbers between 4 and 17 in reading.tst\n" |
| 563 | "\n" |
| 564 | "It is also allowed to include the suffix \".tst\" above and more complex\n" |
| 565 | "lists like \"reading.tst:1-3,15,17,30-50,52\" are also understood.\n" |
| 566 | "The format of files used with --file is the same, with one testsuite on\n" |
| 567 | "each line.\n" |
| 568 | "\n" |
| 569 | "If no test suite is listed on the command line or read from file, then all\n" |
| 570 | "regressions listed in Makefile.am will be run.\n" |
| 571 | "\n" |
| 572 | "The --limit-commands option takes a comma separated list of GTP commands,\n" |
| 573 | "e.g. '--limit-commands=attack,defend' to only run tactical reading tests.\n"; |
| 574 | |
| 575 | /* |
| 576 | * Local Variables: |
| 577 | * tab-width: 8 |
| 578 | * c-basic-offset: 2 |
| 579 | * End: |
| 580 | */ |