Updated README: Equal sign not required with `--mode` flag.
[sgk-go] / patterns / extract_fuseki.c
CommitLineData
7eeb782e
AT
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
2 * This is GNU Go, a Go program. Contact gnugo@gnu.org, or see *
3 * http://www.gnu.org/software/gnugo/ for more information. *
4 * *
5 * Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, *
6 * 2008 and 2009 by the Free Software Foundation. *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of the GNU General Public License as *
10 * published by the Free Software Foundation - version 3 or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License in file COPYING for more details. *
17 * *
18 * You should have received a copy of the GNU General Public *
19 * License along with this program; if not, write to the Free *
20 * Software Foundation, Inc., 51 Franklin Street, Fifth Floor, *
21 * Boston, MA 02111, USA. *
22\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23
24/* Extract fuseki patterns from the initial moves of a collection
25 * of games.
26 *
27 * This program finds the most common positions from the initial moves
28 * of a collection of games, and generates patterns in patterns.db
29 * format for the most common moves in these positions.
30 *
31 * Positions are identified by Zobrist hash values, completely
32 * ignoring the risk for hash collisions. In order to take all
33 * symmetries into account, we compute 8 hash values, one for each
34 * transformation of the board. Rather than playing on 8 boards in
35 * parallel, we construct 8 transformed copies of the Zobrist hash
36 * tables and compute one hash value for each of these. To get a
37 * transformation invariant hash, we finally sort the 8 hash values.
38 *
39 * Example:
40 * extract_fuseki sgflist 9 8 400
41 *
42 * generates (up to) 400 patterns, considering the 8 first moves of
43 * the 9x9 games listed in the file sgflist, and writes the patterns
44 * to stdout. sgflist is a file containing sgf filenames, one per line.
45 *
46 * The generated patterns may look like, e.g.
47 * Pattern Fuseki33
48 * # 3/18
49 *
50 * |.........
51 * |.........
52 * |...*.X...
53 * |.........
54 * |....O....
55 * |.........
56 * |.........
57 * |.........
58 * |.........
59 * +---------
60 *
61 * :8,-,value(3)
62 *
63 * The comment line gives the information that this position has been
64 * found 18 times among the analyzed games, and 3 out of these 18 times,
65 * the move * has been played. The same number 3 is entered as pattern
66 * value on the colon line for use by the fuseki module.
67 */
68
69/*
70 * Notes on the statistics:
71 *
72 * The statistics code assumes that every input file is valid. Use
73 * the output file option to sort out which input files are valid, and
74 * check output for problems. Rerun after fixing/removing invalid files.
75 *
76 * Outcome is defined by RE in sgf. Files without a parsable RE, or which
77 * do not have a winner, are invalid and need to be excluded.
78 *
79 * Pearson chi squared at 5% is used to test significance of
80 * differences among moves at a given position. Moves excluded by
81 * popularity rules are grouped together and considered as one. A
82 * positive result means that among all possible moves in a position,
83 * there's a difference somewhere. The next question is where. One
84 * clue comes from dchisq, which is the contribution to the overall
85 * chi squared for each move, with larger meaning higher impact on
86 * significance of overall result. Another comes from post hoc tests.
87 * Each pair of moves from a position with a statistically significant
88 * impact of move choice is compared, again with Pearson chi squared
89 * at 5%, and the positive tests printed. No correction is done for
90 * multiple tests. Pairs with less than 6 total moves are not tested,
91 * so it's possible for there to be a significant overall result
92 * without any positive post hocs. In this case, the overall result is
93 * doubtful as well.
94 *
95 * If the interest is solely in statistics, using min_pos_freq to
96 * avoid positions without enough data to hope for significance makes
97 * sense: 6 at a minimum.
98 *
99 * Note that the popularity exclusion rules can result in patterns being
100 * left in the db which have no parent in the db.
101 *
102 */
103
104#include <stdlib.h>
105#include <stdio.h>
106#include <string.h>
107#include <limits.h>
108#include <math.h>
109#include "liberty.h"
110#include "gg_utils.h"
111#include "random.h"
112#include "../sgf/sgftree.h"
113
114#define USAGE "\n\
115Usage: extract_fuseki files boardsize moves patterns handicap strength half_board min_pos_freq min_move_percent min_move_freq [output file]\n\
116files: The name of a file listing sgf files to examine,\n\
117 one filename per line.\n\
118boardsize: Only consider games with this size.\n\
119moves: Number of moves considered in each game.\n\
120handicap: 0 - no handicap, 1 - any game, 2-9 - two to nine handicap stones\n\
121 10 any handicap game\n\
122strength: The lowest strength of the players (1k-30k)\n\
123half_board: 0 - full board patterns, 1 - half board patterns\n\
124min_pos_freq: how many times a position must occur before patterns\n\
125 from it are generated\n\
126min_move_percent: minimum popularity relative to most popular move \n\
127 (counted by unique players) required of a move \n\
128 in a given position before it gets a pattern\n\
129min_move_freq: minimum number of unique players who must play a move\n\
130 before it gets a pattern\n\
131output file: Optional (if this exists, extract_fuseki will sort the games instead)\n\
132"
133
134/* Maximum length of sgf filename. */
135#define BUFSIZE 1000
136
137/* Number of moves to consider in each game, given as argument.*/
138int moves_per_game;
139
140/* Flag checking the setting for generating half board patterns */
141int half_board_patterns = 0;
142
143/* Maximum number of patterns to generate */
144#define MAX_PATTERNS_TO_EXTRACT 100000
145
146
147/* Handicap value, given as argument.*/
148int handicap_value;
149
150/* Lowest strength, given as argument.*/
151int player_strength;
152
153
154/* Min # of times a position must be seen before moves from it become
155 * patterns.
156 * Might want this larger to ensure reasonable statistics, 6 or more, say
157 * or smaller to hit every move down to unique games, 2 say;
158 * or even keep churning out moves with 1.
159 *
160 * Given as argument.
161*/
162
163int min_position_freq;
164
165
166/* popularity arguments */
167double min_move_percent;
168int min_move_freq;
169
170
171/* Number of games to analyze. */
172int number_of_games;
173
174/* Dynamically allocated array marking the games that could not be
175 * used for some reason.
176 */
177int *unused_games;
178
179/* WARN 1 warns about unused games. */
180/* WARN 2 also notes assumptions about metainfo. */
181#define WARN 1
182
183
184/* Dynamically allocated list of sgf file names. */
185char **sgf_names;
186
187/* Zobrist hash tables, rotated and reflected into all 8 transformations. */
188unsigned int O_hash[8][MAX_BOARD][MAX_BOARD];
189unsigned int X_hash[8][MAX_BOARD][MAX_BOARD];
190unsigned int move_hash[8][MAX_BOARD][MAX_BOARD];
191
192/* A board is hashed 8 times, once for each transformation, and these
193 * numbers are sorted into a transformation invariant hash.
194 */
195struct invariant_hash {
196 unsigned int values[8];
197};
198
199/* This is defined in engine/matchpat.c */
200extern const int transformations[8][2][2];
201
202
203/* A situation is the combination of a board position and the move to
204 * be made. We use the invariant hashes excluding and including the move
205 * as identification. If are interested in positions, we only use the first
206 * hash value.
207 *
208 * We ignore the possibility of a hash collision.
209 *
210 * outcome is the color which won the game
211 * player is the (hashed) name of the player who made the move
212 */
213struct situation {
214 struct invariant_hash pre;
215 struct invariant_hash post;
216 int outcome;
217 unsigned int player;
218};
219
220/* Dynamically allocated table of situations encountered in the analysis. */
221struct situation *situation_table;
222int number_of_situations;
223
224/* Data type for frequencies of e.g. situations or positions. 'index'
225 * is the index into situation_table.
226 */
227struct frequency {
228 int index;
229 int n;
230 int n_win;
231 int n_player;
232};
233
234/* Position frequency table. */
235struct frequency *frequency_table;
236int number_of_distinct_positions;
237
238/* The most common situations are called winners. These are the ones
239 * we generate patterns for.
240 *
241 * 'index' is normally an index into situation_table, or -1 for
242 * special aggregate entry (with no pattern) to collect stats for
243 * unpopular moves
244 *
245 * pre is hash[0], and must be stored here for aggregate
246 */
247struct winner {
248 int index;
249 unsigned int pre;
250 int position_frequency;
251 int move_frequency;
252 int n_player;
253 int position_success;
254 int move_success;
255 char pattern[MAX_BOARD][MAX_BOARD];
256};
257
258/* Dynamically allocated table of winners. */
259struct winner *winning_moves;
260int number_of_winning_moves;
261
262/* critical values of chisquare distribution with n degrees of freedom */
263/* p < 0.05
264 */
265double chisquarecrit05[] = {
266 3.8415, 5.9915, 7.8147, 9.4877, 11.0705, 12.5916, 14.0671, 15.5073,
267 16.9190, 18.3070, 19.6751, 21.0261, 22.3620, 23.6848, 24.9958, 26.2962,
268 27.5871, 28.8693, 30.1435, 31.4104, 32.67057, 33.92444, 35.17246,
269 36.41503, 37.65248, 38.88514, 40.11327, 41.33714, 42.55697, 43.77297,
270 44.98534, 46.19426, 47.39988, 48.60237, 49.80185, 50.99846, 52.19232,
271 53.38354, 54.57223, 55.75848, 56.94239, 58.12404, 59.30351, 60.48089,
272 61.65623, 62.82962, 64.00111, 65.17077, 66.33865, 67.50481};
273
274/* p < 0.10, should be same size as 05 */
275double chisquarecrit10[] = {
276 2.7055, 4.6052, 6.2514, 7.7794, 9.2364, 10.6446, 12.0170, 13.3616,
277 14.6837, 15.9872, 17.2750, 18.5493, 19.8119, 21.0641, 22.3071, 23.5418,
278 24.7690, 25.9894, 27.2036, 28.4120, 29.61509, 30.81328, 32.00690,
279 33.19624, 34.38159, 35.56317, 36.74122, 37.91592, 39.08747, 40.25602,
280 41.42174, 42.58475, 43.74518, 44.90316, 46.05879, 47.21217, 48.36341,
281 49.51258, 50.65977, 51.80506, 52.94851, 54.09020, 55.23019, 56.36854,
282 57.50530, 58.64054, 59.77429, 60.90661, 62.03754, 63.16712};
283
284double chisquarecrit01[] = {
285 6.63489660102121, 9.21034037197618, 11.3448667301444, 13.2767041359876,
286 15.086272469389, 16.8118938297709, 18.4753069065824, 20.0902350296632,
287 21.6659943334619, 23.2092511589544, 24.7249703113183, 26.2169673055359,
288 27.6882496104570, 29.1412377406728, 30.5779141668925, 31.9999269088152,
289 33.4086636050046, 34.8053057347051, 36.1908691292701, 37.5662347866250,
290 38.9321726835161, 40.2893604375938, 41.6383981188585, 42.9798201393516,
291 44.3141048962192, 45.6416826662832, 46.9629421247514, 48.2782357703155,
292 49.5878844728988, 50.8921813115171, 52.1913948331919, 53.4857718362354,
293 54.7755397601104, 56.0609087477891, 57.3420734338592, 58.619214501687,
294 59.8925000450869, 61.1620867636897, 62.4281210161849, 63.6907397515645,
295 64.9500713352112, 66.2062362839932, 67.4593479223258, 68.7095129693454,
296 69.9568320658382, 71.2014002483115, 72.4433073765482, 73.6826385201058,
297 74.9194743084782, 76.1538912490127};
298
299double chisquarecrit001[] = {
300 10.8275661706627, 13.8155105579643, 16.2662361962381, 18.4668269529032,
301 20.5150056524329, 22.4577444848253, 24.3218863478569, 26.1244815583761,
302 27.8771648712566, 29.5882984450744, 31.26413362024, 32.9094904073602,
303 34.5281789748709, 36.1232736803981, 37.6972982183538, 39.2523547907685,
304 40.7902167069025, 42.31239633168, 43.8201959645175, 45.3147466181259,
305 46.7970380415613, 48.2679422908352, 49.7282324664315, 51.1785977773774,
306 52.6196557761728, 54.0519623885766, 55.4760202057452, 56.8922853933536,
307 58.3011734897949, 59.7030643044299, 61.0983060810581, 62.4872190570885,
308 63.870098522345, 65.2472174609424, 66.618828843701, 67.9851676260242,
309 69.3464524962412, 70.702887411505, 72.0546629519878, 73.401957518991,
310 74.7449383984238, 76.0837627077, 77.418578241314, 78.749524228043,
311 80.076732010819, 81.40032565871, 82.720422519124, 84.0371337172235,
312 85.350564608593, 86.6608151904032};
313
314/*
315 * Append the files that are sorted to a specific file
316 */
317
318static void
319write_sgf_filenames(const char *name, char *filenames[])
320{
321 int n;
322 FILE *namefile = fopen(name, "a");
323 if (!namefile) {
324 fprintf(stderr, "Fatal error, couldn't open %s.\n", name);
325 exit(EXIT_FAILURE);
326 }
327
328 for (n = 0; n < number_of_games; n++) {
329 if (unused_games[n] == 0)
330 fprintf(namefile, "%s\n", filenames[n]);
331 }
332}
333
334
335/* Read the sgf file names. These are assumed to be stored one per
336 * line in the file with the name given by 'name'. The sgf file names
337 * are copied into dynamically allocated memory by strdup() and
338 * pointers to the names are stored into the 'filenames[]' array. It
339 * is assumed that 'filenames' has been allocated sufficiently large
340 * before this this function is called. If 'filenames' is NULL, the
341 * sgf file names are only counted. The number of sgf file names is
342 * returned.
343 */
344static int
345read_sgf_filenames(const char *name, char *filenames[])
346{
347 int n;
348 char buf[BUFSIZE];
349 FILE *namefile = fopen(name, "r");
350 if (!namefile) {
351 fprintf(stderr, "Fatal error, couldn't open %s.\n", name);
352 exit(EXIT_FAILURE);
353 }
354
355 n = 0;
356 while (fgets(buf, BUFSIZE, namefile) != NULL) {
357 if (filenames != NULL) {
358 if (buf[strlen(buf) - 2] == '\r') {
359 buf[strlen(buf) - 2] = '\0';
360 /* Delete carriage return character, if any. */
361 }
362 else {
363 buf[strlen(buf) - 1] = '\0';
364 /* Delete newline character. */
365 }
366
367 filenames[n] = strdup(buf);
368 if (filenames[n] == NULL) {
369 fprintf(stderr, "Fatal error, strdup() failed.\n");
370 exit(EXIT_FAILURE);
371 }
372 }
373 n++;
374 }
375
376 return n;
377}
378
379/* Fill one of the zobrist hash tables with random numbers. */
380static void
381init_zobrist_table(unsigned int hash[8][MAX_BOARD][MAX_BOARD])
382{
383 unsigned int k;
384 int m, n;
385 int i, j;
386 int mid = board_size/2;
387
388 for (m = 0; m < board_size; m++)
389 for (n = 0; n < board_size; n++) {
390 hash[0][m][n] = 0;
391 for (k = 0; 32*k < CHAR_BIT*sizeof(hash[0][0][0]); k++)
392 hash[0][m][n] |= gg_urand() << k*32;
393 }
394
395 /* Fill in all transformations of the hash table. */
396 for (k = 1; k < 8; k++)
397 for (m = 0; m < board_size; m++)
398 for (n = 0; n < board_size; n++) {
399 TRANSFORM2(m-mid, n-mid, &i, &j, k);
400 hash[k][m][n] = hash[0][i+mid][j+mid];
401 }
402
403 /* Debug output. */
404 if (0) {
405 for (k = 0; k < 8; k++) {
406 for (m = 0; m < board_size; m++) {
407 for (n = 0; n < board_size; n++)
408 fprintf(stderr, "%8x ", hash[k][m][n]);
409 fprintf(stderr, "\n");
410 }
411 fprintf(stderr, "\n");
412 fprintf(stderr, "\n");
413 }
414 }
415}
416
417/* Initialize all Zobrist hash tables with random numbers. */
418static void
419init_zobrist_numbers(void)
420{
421 gg_srand(1);
422 init_zobrist_table(O_hash);
423 init_zobrist_table(X_hash);
424 init_zobrist_table(move_hash);
425}
426
427/* Initialize the situation_table array. */
428static void
429init_situations(void)
430{
431 situation_table = calloc(moves_per_game * number_of_games,
432 sizeof(*situation_table));
433 if (!situation_table) {
434 fprintf(stderr, "Fatal error, failed to allocate situations table.\n");
435 exit(EXIT_FAILURE);
436 }
437 number_of_situations = 0;
438}
439
440/* Compare two hash values. Used for sorting the hash values in the
441 * invariant hash.
442 */
443static int
444compare_numbers(const void *a, const void *b)
445{
446 unsigned int aa = *((const unsigned int *) a);
447 unsigned int bb = *((const unsigned int *) b);
448 if (aa > bb)
449 return 1;
450 if (aa < bb)
451 return -1;
452 return 0;
453}
454
455/* Compute hash values for all transformations of the position
456 * currently in the p[][] array. The hash values are not sorted by
457 * this function.
458 */
459static void
460common_hash_board(struct invariant_hash *hash, int color_to_play)
461{
462 int m, n;
463 int k;
464
465 for (k = 0; k < 8; k++)
466 hash->values[k] = 0;
467
468 for (m = 0; m < board_size; m++)
469 for (n = 0; n < board_size; n++) {
470 for (k = 0; k < 8; k++) {
471 if (BOARD(m, n) == color_to_play)
472 hash->values[k] ^= O_hash[k][m][n];
473 else if (BOARD(m, n) != EMPTY)
474 hash->values[k] ^= X_hash[k][m][n];
475 }
476 }
477}
478
479/* Compute invariant hash for the current position. */
480static void
481hash_board(struct invariant_hash *hash, int color_to_play)
482{
483 common_hash_board(hash, color_to_play);
484 /* Sort the 8 hash values. */
485 gg_sort(hash->values, 8, sizeof(hash->values[0]), compare_numbers);
486}
487
488/* Compute invariant hash for the current situation, i.e. position
489 * plus a move to be made.
490 */
491static void
492hash_board_and_move(struct invariant_hash *hash, int color_to_play,
493 int m, int n)
494{
495 int k;
496
497 common_hash_board(hash, color_to_play);
498
499 for (k = 0; k < 8; k++)
500 hash->values[k] ^= move_hash[k][m][n];
501
502 /* Notice that we of course must wait with sorting until we have
503 * added the move to the hash values.
504 */
505 gg_sort(hash->values, 8, sizeof(hash->values[0]), compare_numbers);
506}
507
508
509/* the so called X31 hash from gtk, for hashing strings */
510static unsigned int
511hash_string(const char *v)
512{
513 unsigned int h = 0;
514 while (*v != '\0') {
515 h = (h << 5) - h + *v;
516 v++;
517 }
518 return h;
519}
520
521/* Adapted from play_sgf_tree() in engine/sgfutils.c. Returns the
522 * next move from the game record in (*m, *n) and color in *color. If
523 * handicap stones are encountered, these are put on the board
524 * immediately. Return value is 1 if another move was found in the
525 * game record, 0 otherwise.
526 */
527static int
528get_move_from_sgf(SGFNode *node, int *m, int *n, int *color)
529{
530 SGFProperty *prop;
531 int i, j;
532
533 for (prop = node->props; prop; prop = prop->next) {
534 if (!prop || !prop->name || !node) {
535 /* something wrong with the SGF file properties */
536 if (1)
537 fprintf(stderr, "Something wrong with the SGF file properties.\n");
538 return 0;
539 }
540 switch (prop->name) {
541 case SGFAB:
542 get_moveXY(prop, &i, &j, board_size);
543 /* Put handicap stones on the board at once. */
544 add_stone(POS(i, j), BLACK);
545 break;
546
547 case SGFAW:
548 if (0)
549 fprintf(stderr, "Warning: white stone added.\n");
550 return 0;
551 break;
552
553 case SGFPL:
554 if (0)
555 fprintf(stderr, "Warning: PL property encountered.\n");
556 return 0;
557 break;
558
559 case SGFW:
560 case SGFB:
561 *color = (prop->name == SGFW) ? WHITE : BLACK;
562
563 if (!get_moveXY(prop, m, n, board_size)) {
564 if (0)
565 fprintf(stderr, "Warning: failed to get move coordinates.\n");
566 return 0;
567 }
568 return 1;
569 break;
570 }
571 }
572
573 return 0;
574}
575
576/* Add a situation to the situation_table array. */
577static void
578add_situation(struct invariant_hash *pre, struct invariant_hash *post,
579 int outcome, unsigned int player)
580{
581 situation_table[number_of_situations].pre = *pre;
582 situation_table[number_of_situations].post = *post;
583 situation_table[number_of_situations].outcome = outcome;
584 situation_table[number_of_situations].player = player;
585 number_of_situations++;
586}
587
588/* Compare two situations. Used (indirectly, see compare_situations2)
589 * for sorting the situation_table array
590 * and when building frequency tables for the different moves at the
591 * same position.
592 */
593static int
594compare_situations(const void *a, const void *b)
595{
596 const struct situation *aa = a;
597 const struct situation *bb = b;
598 int k;
599
600 for (k = 0; k < 8; k++) {
601 if (aa->pre.values[k] > bb->pre.values[k])
602 return 1;
603 if (aa->pre.values[k] < bb->pre.values[k])
604 return -1;
605 }
606
607 for (k = 0; k < 8; k++) {
608 if (aa->post.values[k] > bb->post.values[k])
609 return 1;
610 if (aa->post.values[k] < bb->post.values[k])
611 return -1;
612 }
613
614 return 0;
615}
616
617static int
618compare_situations2(const void *a, const void *b)
619{
620 const struct situation *aa = a;
621 const struct situation *bb = b;
622 int r = compare_situations(a, b);
623 if (r != 0)
624 return r;
625 if (aa->player > bb->player)
626 return 1;
627 if (aa->player < bb->player)
628 return -1;
629
630 return 0;
631}
632
633/* Compare two positions. Used when building frequency table for the
634 * different positions encountered.
635 */
636static int
637compare_positions(const void *a, const void *b)
638{
639 const struct situation *aa = a;
640 const struct situation *bb = b;
641 int k;
642
643 for (k = 0; k < 8; k++) {
644 if (aa->pre.values[k] > bb->pre.values[k])
645 return 1;
646 if (aa->pre.values[k] < bb->pre.values[k])
647 return -1;
648 }
649
650 return 0;
651}
652
653/* Compare two frequency table entries. The returned values are
654 * "backwards" because we always want to sort frequencies in falling
655 * order.
656 *
657 * The first version counts every game equally, the second version
658 * counts a game only once per unique player.
659 */
660static int
661compare_frequencies(const void *a, const void *b)
662{
663 const struct frequency *aa = a;
664 const struct frequency *bb = b;
665
666 if (aa->n < bb->n)
667 return 1;
668
669 if (aa->n > bb->n)
670 return -1;
671
672 return 0;
673}
674
675static int
676compare_frequencies2(const void *a, const void *b)
677{
678 const struct frequency *aa = a;
679 const struct frequency *bb = b;
680
681 if (aa->n_player < bb->n_player)
682 return 1;
683
684 if (aa->n_player > bb->n_player)
685 return -1;
686
687 return 0;
688}
689
690/*
691 * find_region answers in what region the move is.
692 * There are 9 regions, corners, sides and center.
693 */
694
695static int
696find_region(int m, int n)
697{
698 if (m < 7) {
699 if (n < 7)
700 return 0;
701 else if (n > 11)
702 return 1;
703 else if (n > 6 && m < 5)
704 return 6;
705 }
706 else if (m > 11) {
707 if (n < 7)
708 return 2;
709 else if (n > 11)
710 return 3;
711 else if (n > 6 && m > 13)
712 return 7;
713 }
714 else if (m > 6) {
715 if (n < 5)
716 return 4;
717 else if (n > 13)
718 return 5;
719 }
720 /* otherwise in center */
721 return 8;
722}
723
724/* If this situation is listed among the winners, fill in the pattern
725 * entry of the winner struct.
726 */
727static void
728store_pattern_if_winner(struct invariant_hash *pre,
729 struct invariant_hash *post,
730 int color, int m, int n)
731{
732 int k;
733 struct situation s;
734 int region = 8;
735 int i, j;
736 int move_number = 1;
737 s.pre = *pre;
738 s.post = *post;
739
740 for (k = 0; k < number_of_winning_moves; k++) {
741 if (winning_moves[k].index != -1
742 && compare_situations(&situation_table[winning_moves[k].index],
743 &s) == 0) {
744 /* This is a winner. Record the pattern. */
745 for (i = 0; i < board_size; i++)
746 for (j = 0; j < board_size; j++) {
747 if (BOARD(i, j) == EMPTY)
748 winning_moves[k].pattern[i][j] = '.';
749 else if (BOARD(i, j) == color) {
750 winning_moves[k].pattern[i][j] = 'O';
751 move_number++;
752 }
753 else if ((color == WHITE && BOARD(i, j) == BLACK)
754 || (color == BLACK && BOARD(i, j) == WHITE)) {
755 winning_moves[k].pattern[i][j] = 'X';
756 move_number++;
757 }
758 else { /* something is wrong */
759 fprintf(stderr, "Error in store_pattern_if_winner: %d\n", k);
760 winning_moves[k].pattern[i][j] = '.';
761 }
762 }
763 winning_moves[k].pattern[m][n] = '*';
764 /* Add ? in areas far away from the move. */
765 if (half_board_patterns == 1 && move_number > 3 && board_size == 19)
766 region = find_region(m, n);
767 if (region != 8) {
768 for (i = 0; i < board_size; i++) {
769 for (j = 0; j < board_size; j++) {
770 if (region == 0) {
771 if (i + j > 23)
772 winning_moves[k].pattern[i][j] = '?';
773 }
774 else if (region == 1) {
775 if (i - j > 5)
776 winning_moves[k].pattern[i][j] = '?';
777 }
778 else if (region == 2) {
779 if (i + board_size - j < 14)
780 winning_moves[k].pattern[i][j] = '?';
781 }
782 else if (region == 3) {
783 if (i + j < 13)
784 winning_moves[k].pattern[i][j] = '?';
785 }
786 else if (region == 4) {
787 if (j > 10)
788 winning_moves[k].pattern[i][j] = '?';
789 }
790 else if (region == 5) {
791 if (j < 8)
792 winning_moves[k].pattern[i][j] = '?';
793 }
794 else if (region == 6) {
795 if (i > 10)
796 winning_moves[k].pattern[i][j] = '?';
797 }
798 else if (region == 7) {
799 if (i < 8)
800 winning_moves[k].pattern[i][j] = '?';
801 }
802 }
803 }
804 }
805 }
806 }
807}
808
809/* Play through the initial moves of a game. If 'collect_statistics'
810 * is set, store all encountered situations in the situation_table
811 * array. 'collect_statistics' will be set to the color which won the
812 * game. Otherwise, see if there are any winners among the situations
813 * and store the corresponding pattern so that it can subsequently be
814 * printed. Return 0 if there was some problem with the game record,
815 * e.g. fewer moves than expected.
816 */
817static int
818examine_game(SGFNode *sgf, int collect_statistics)
819{
820 int k;
821 int m, n;
822 SGFNode *node = sgf;
823 struct invariant_hash prehash;
824 struct invariant_hash posthash;
825 int color;
826 char *PW, *PB;
827 unsigned int white_player, black_player;
828
829 if (!sgfGetCharProperty(sgf, "PW", &PW))
830 white_player = hash_string("");
831 else
832 white_player = hash_string(PW);
833
834 if (!sgfGetCharProperty(sgf, "PB", &PB))
835 black_player = hash_string("");
836 else
837 black_player = hash_string(PB);
838
839 /* Call the engine to clear the board. */
840 clear_board();
841
842 /* Loop through the first moves_per_game moves of each game. */
843 for (k = 0; k < moves_per_game && node != NULL; node = node->child) {
844 if (!get_move_from_sgf(node, &m, &n, &color)) {
845 if (k > 0) {
846 /* something is wrong with the file */
847 if (0)
848 fprintf(stderr, "move number:%d\n", k);
849 return 0;
850 }
851 continue;
852 }
853 gg_assert(m >= 0 && m < board_size && n >= 0 && n <= board_size);
854 hash_board(&prehash, color);
855 hash_board_and_move(&posthash, color, m, n);
856 if (collect_statistics != EMPTY)
857 add_situation(&prehash, &posthash, collect_statistics == color,
858 color == WHITE ? white_player : black_player);
859 else
860 store_pattern_if_winner(&prehash, &posthash, color, m, n);
861 play_move(POS(m, n), color);
862
863 /* Debug output. */
864 if (0) {
865 int l;
866 for (l = 0; l < 8; l++)
867 fprintf(stderr, "%8x ", prehash.values[l]);
868 fprintf(stderr, " ");
869 for (l = 0; l < 8; l++)
870 fprintf(stderr, "%8x ", posthash.values[l]);
871 fprintf(stderr, "\n");
872 showboard(0);
873 }
874 k++;
875 }
876 if (!node) {
877 if (0)
878 fprintf(stderr, "Node error\n");
879 return 0;
880 }
881
882 return 1;
883}
884
885/* Tests if the player has enough strength in the game to be interesting
886 * for the library
887 */
888
889static int
890enough_strength(char *strength)
891{
892 int length = 0;
893 int i = 0;
894 int kyu = 30;
895 if (player_strength >= 30)
896 return 1;
897
898 length = strlen(strength);
899 /* check if dan or pro player */
900 for (i = 0; i < length; i++)
901 if (strength[i] == 'd' || strength[i] == 'D'
902 || strength[i] == 'p' || strength[i] == 'P')
903 return 1;
904
905 /* get the kyu strength as an integer */
906 for (i = 0; i < length; i++) {
907 if (strength[i] == 'k')
908 strength[i] = '\0';
909 kyu = atoi(strength);
910 if (kyu == 0) {
911 if (player_strength >= 30)
912 return 1;
913 else
914 return 0;
915 }
916 }
917
918 if (kyu <= player_strength)
919 return 1;
920
921 /* not enough strength */
922 return 0;
923}
924
925
926/*
927 * used by both sort_games and collect_situations to
928 * check validity of a game record
929 * 0 means failure for any reason
930 * 1 means probably okay, without going through moves
931 */
932static int
933check_game(SGFNode *sgf, char *sgfname)
934{
935 int handicap, size;
936 char *WR, *BR; /* white rank */
937 char thirty_kyu[] = "30k";
938 char *RE;
939
940 size = 19;
941 if (!sgfGetIntProperty(sgf, "SZ", &size)) {
942 if (WARN > 1)
943 fprintf(stderr, "Warning: no SZ in sgf file %s , assuming size %d\n",
944 sgfname, size);
945 }
946 if (size != board_size) {
947 if (WARN)
948 fprintf(stderr, "Warning: wrong size of board %d in sgf file %s .\n",
949 size, sgfname);
950 return 0;
951 }
952
953 /* No handicap games */
954 if (handicap_value == 0) {
955 if (sgfGetIntProperty(sgf, "HA", &handicap) && handicap > 1) {
956 if (WARN)
957 fprintf(stderr,
958 "No handicap games allowed, sgf file %s has handicap %d\n",
959 sgfname, handicap);
960 return 0;
961 }
962 }
963
964 /* Only handicap games */
965 if (handicap_value > 1) {
966 if (!sgfGetIntProperty(sgf, "HA", &handicap)) {
967 if (WARN)
968 fprintf(stderr, "Sgf file %s is not a handicap game\n", sgfname);
969 return 0;
970 }
971
972 /* only specific handicap games */
973 if (handicap_value != 10 && handicap != handicap_value) {
974 if (WARN)
975 fprintf(stderr,
976 "Sgf file %s has wrong number of handicap stones %d\n",
977 sgfname, handicap);
978 return 0;
979 }
980
981 /* any reasonable handicap games */
982 if (handicap_value == 10 && (handicap < 2 || handicap > 9)) {
983 if (WARN)
984 fprintf(stderr,
985 "Sgf file %s has wrong/weird number of handicap stones %d\n",
986 sgfname, handicap);
987 return 0;
988 }
989 }
990
991 /* Examine strength of players in the game and compare it
992 * with minimum player strength.
993 */
994
995 BR = thirty_kyu;
996 if (!sgfGetCharProperty(sgf, "BR", &BR)) {
997 if (WARN > 1)
998 fprintf(stderr, "No black strength in sgf file %s assuming %s\n",
999 sgfname, BR);
1000 }
1001 if (!enough_strength(BR)) {
1002 if (WARN)
1003 fprintf(stderr, "Wrong black strength %s in sgf file %s\n", BR, sgfname);
1004 return 0;
1005 }
1006
1007 WR = thirty_kyu;
1008 if (!sgfGetCharProperty(sgf, "WR", &WR)) {
1009 if (WARN > 1)
1010 fprintf(stderr, "No white strength in sgf file %s assuming %s\n",
1011 sgfname, WR);
1012 }
1013 if (!enough_strength(WR)) {
1014 if (WARN)
1015 fprintf(stderr, "Wrong white strength %s in sgf file %s\n", WR, sgfname);
1016 return 0;
1017 }
1018
1019 /* Must have result. */
1020 if (!sgfGetCharProperty(sgf, "RE", &RE)) {
1021 if (WARN)
1022 fprintf(stderr, "No result in game %s\n", sgfname);
1023 return 0;
1024 }
1025
1026 if (strncmp(RE, "B+", 2) != 0 && strncmp(RE, "W+", 2) != 0) {
1027 if (WARN)
1028 fprintf(stderr, "Couldn't parse winner in result %s from game %s\n",
1029 RE, sgfname);
1030 return 0;
1031 }
1032
1033 /* Looks okay. */
1034 return 1;
1035}
1036
1037/*
1038 * Sort out the games that can be used.
1039 */
1040
1041static void
1042sort_games(void)
1043{
1044 int k;
1045
1046 for (k = 0; k < number_of_games; k++) {
1047 SGFNode *sgf;
1048
1049 /* Progress output. */
1050 if (k % 500 == 0)
1051 fprintf(stderr, "Sorting number %d, %s\n", k, sgf_names[k]);
1052
1053 sgf = readsgffilefuseki(sgf_names[k], 0);
1054
1055
1056 if (!sgf) {
1057 if (WARN)
1058 fprintf(stderr, "Warning: Couldn't open sgf file %s number %d.\n",
1059 sgf_names[k], k);
1060 unused_games[k] = 1; /* the game could not be used */
1061 continue;
1062 }
1063
1064 if (!check_game(sgf, sgf_names[k]))
1065 unused_games[k] = 1;
1066
1067 /* Free memory of SGF file */
1068 sgfFreeNode(sgf);
1069 }
1070}
1071
1072
1073/* Play through the initial moves of all games and collect hash values
1074 * for the encountered situations.
1075 */
1076static void
1077collect_situations(void)
1078{
1079 int k;
1080 int winner; /* who won the game in question */
1081
1082 init_situations();
1083 for (k = 0; k < number_of_games; k++) {
1084 SGFNode *sgf;
1085 char *RE;
1086
1087 /* Progress output. */
1088 if (k % 500 == 0)
1089 fprintf(stderr, "Reading number %d, %s\n", k, sgf_names[k]);
1090
1091 sgf = readsgffilefuseki(sgf_names[k], moves_per_game);
1092
1093 if (!sgf) {
1094 if (WARN)
1095 fprintf(stderr, "Warning: Couldn't open sgf file %s.\n", sgf_names[k]);
1096 unused_games[k] = 1; /* the game could not be used */
1097 continue;
1098 }
1099
1100 if (!check_game(sgf, sgf_names[k])) {
1101 unused_games[k] = 1;
1102 sgfFreeNode(sgf);
1103 continue;
1104 }
1105
1106 if (!sgfGetCharProperty(sgf, "RE", &RE)) {
1107 gg_assert(0);
1108 }
1109
1110 if (strncmp(RE, "B+", 2) == 0)
1111 winner = BLACK;
1112 else if (strncmp(RE, "W+", 2) == 0)
1113 winner = WHITE;
1114 else {
1115 gg_assert(0);
1116 }
1117
1118 if (!examine_game(sgf, winner)) {
1119 if (WARN)
1120 fprintf(stderr, "Warning: Problem with sgf file %s\n", sgf_names[k]);
1121 unused_games[k] = 1; /* the game could not be used */
1122 }
1123
1124 /* Free memory of SGF file */
1125 sgfFreeNode(sgf);
1126 }
1127}
1128
1129/* Find the most common positions and moves, for which we want to
1130 * generate patterns.
1131 */
1132static void
1133analyze_statistics(void)
1134{
1135 int k;
1136 /* Sort all the collected situations. */
1137 gg_sort(situation_table, number_of_situations, sizeof(*situation_table),
1138 compare_situations2);
1139
1140 /* Debug output. */
1141 if (0) {
1142 int i, k;
1143 for (i = 0; i < number_of_situations; i++) {
1144 fprintf(stderr, "%4d ", i);
1145 for (k = 0; k < 8; k++)
1146 fprintf(stderr, "%8x ", situation_table[i].pre.values[k]);
1147 fprintf(stderr, " ");
1148 for (k = 0; k < 8; k++)
1149 fprintf(stderr, "%8x ", situation_table[i].post.values[k]);
1150 fprintf(stderr, "\n");
1151 }
1152 }
1153
1154 /* Set up frequency table. */
1155 frequency_table = calloc(number_of_situations, sizeof(*frequency_table));
1156 if (!frequency_table) {
1157 fprintf(stderr, "Fatal error, failed to allocate frequency table.\n");
1158 exit(EXIT_FAILURE);
1159 }
1160 number_of_distinct_positions = 0;
1161
1162 /* Make frequency analysis of the positions before the moves. */
1163 for (k = 0; k < number_of_situations; k++) {
1164 if (k == 0 || compare_positions(&situation_table[k],
1165 &situation_table[k-1]) != 0) {
1166 frequency_table[number_of_distinct_positions].index = k;
1167 frequency_table[number_of_distinct_positions].n = 0;
1168 frequency_table[number_of_distinct_positions].n_win = 0;
1169 frequency_table[number_of_distinct_positions].n_player = 0;
1170 number_of_distinct_positions++;
1171 }
1172 frequency_table[number_of_distinct_positions-1].n++;
1173 frequency_table[number_of_distinct_positions-1].n_win += situation_table[k].outcome;
1174 if (frequency_table[number_of_distinct_positions-1].n == 1
1175 || situation_table[k].player != situation_table[k-1].player)
1176 frequency_table[number_of_distinct_positions-1].n_player++;
1177 }
1178
1179 /* Sort the frequency table, in falling order. */
1180 gg_sort(frequency_table, number_of_distinct_positions,
1181 sizeof(*frequency_table), compare_frequencies);
1182
1183 /* Debug output. */
1184 if (0) {
1185 int l;
1186 for (l = 0; l < number_of_distinct_positions; l++) {
1187 fprintf(stderr, "%4d %5d\n", frequency_table[l].n,
1188 frequency_table[l].index);
1189 }
1190 }
1191
1192 /* Set up winners array. */
1193 winning_moves = calloc(MAX_PATTERNS_TO_EXTRACT, sizeof(*winning_moves));
1194 if (!winning_moves) {
1195 fprintf(stderr, "Fatal error, failed to allocate winning moves table.\n");
1196 exit(EXIT_FAILURE);
1197 }
1198 number_of_winning_moves = 0;
1199
1200 /* Starting with the most common position, find the most common
1201 * moves for each position, until the number of patterns to be
1202 * generated is reached.
1203 */
1204 for (k = 0; k < number_of_distinct_positions; k++) {
1205 int index = frequency_table[k].index;
1206 int i;
1207
1208 /* Build a new frequency table for the different moves in this position. */
1209 struct frequency move_frequencies[MAX_BOARD * MAX_BOARD];
1210 int number_of_moves = 0;
1211
1212 /* A position must occur a minimum before we analyze and record
1213 * the moves from it.
1214 */
1215 if (frequency_table[k].n < min_position_freq)
1216 break;
1217
1218 for (i = index; ;i++) {
1219 if (i == number_of_situations
1220 || (i > index
1221 && compare_positions(&situation_table[i],
1222 &situation_table[i-1]) != 0))
1223 break;
1224
1225 if (i == index || compare_situations(&situation_table[i],
1226 &situation_table[i-1]) != 0) {
1227 move_frequencies[number_of_moves].index = i;
1228 move_frequencies[number_of_moves].n = 0;
1229 move_frequencies[number_of_moves].n_win = 0;
1230 move_frequencies[number_of_moves].n_player = 0;
1231 number_of_moves++;
1232 }
1233 move_frequencies[number_of_moves-1].n++;
1234 move_frequencies[number_of_moves-1].n_win += situation_table[i].outcome;
1235
1236 if (move_frequencies[number_of_moves-1].n == 1
1237 || situation_table[i].player != situation_table[i-1].player)
1238 move_frequencies[number_of_moves-1].n_player++;
1239 }
1240
1241 /* Sort the moves, in falling order. */
1242 gg_sort(move_frequencies, number_of_moves,
1243 sizeof(*move_frequencies), compare_frequencies2);
1244
1245 /* Debug output. */
1246 if (0) {
1247 for (i = 0; i < number_of_moves; i++) {
1248 fprintf(stderr, "%4d %3d %4d\n", index, move_frequencies[i].n,
1249 move_frequencies[i].index);
1250 }
1251 }
1252
1253 /* Add the moves to the list of winners. */
1254 for (i = 0; i < number_of_moves; i++) {
1255 /* This is where the cut-off of moves is decided
1256 * based on popularity from command line arguments.
1257 */
1258
1259 if (0.01 * min_move_percent*move_frequencies[0].n_player
1260 > move_frequencies[i].n_player
1261 || move_frequencies[i].n_player < min_move_freq) {
1262 winning_moves[number_of_winning_moves].index = -1;
1263 winning_moves[number_of_winning_moves].pre =
1264 situation_table[frequency_table[k].index].pre.values[0];
1265 winning_moves[number_of_winning_moves].position_frequency =
1266 frequency_table[k].n;
1267 winning_moves[number_of_winning_moves].n_player = 0;
1268 winning_moves[number_of_winning_moves].move_frequency = 0;
1269 winning_moves[number_of_winning_moves].position_success =
1270 frequency_table[k].n_win;
1271 winning_moves[number_of_winning_moves].move_success = 0;
1272
1273 while (i < number_of_moves) {
1274 gg_assert(0.01 * min_move_percent*move_frequencies[0].n_player
1275 > move_frequencies[i].n_player
1276 || move_frequencies[i].n_player < min_move_freq);
1277 gg_assert(situation_table[move_frequencies[i].index].pre.values[0]
1278 == winning_moves[number_of_winning_moves].pre);
1279 winning_moves[number_of_winning_moves].n_player +=
1280 move_frequencies[i].n_player;
1281 winning_moves[number_of_winning_moves].move_frequency +=
1282 move_frequencies[i].n;
1283 winning_moves[number_of_winning_moves].move_success +=
1284 move_frequencies[i].n_win;
1285 i++;
1286 }
1287 number_of_winning_moves++;
1288 continue;
1289 }
1290
1291 winning_moves[number_of_winning_moves].index = move_frequencies[i].index;
1292 winning_moves[number_of_winning_moves].pre =
1293 situation_table[frequency_table[k].index].pre.values[0];
1294 winning_moves[number_of_winning_moves].position_frequency =
1295 frequency_table[k].n;
1296 winning_moves[number_of_winning_moves].move_frequency =
1297 move_frequencies[i].n;
1298 winning_moves[number_of_winning_moves].n_player =
1299 move_frequencies[i].n_player;
1300
1301 winning_moves[number_of_winning_moves].position_success =
1302 frequency_table[k].n_win;
1303 winning_moves[number_of_winning_moves].move_success =
1304 move_frequencies[i].n_win;
1305 number_of_winning_moves++;
1306
1307 if (number_of_winning_moves == MAX_PATTERNS_TO_EXTRACT)
1308 break;
1309 }
1310
1311 if (number_of_winning_moves == MAX_PATTERNS_TO_EXTRACT)
1312 break;
1313 }
1314
1315 /* Debug output. */
1316 if (0) {
1317 int i;
1318 for (i = 0; i < number_of_winning_moves; i++) {
1319 fprintf(stderr, "%4d %3d %3d\n",
1320 winning_moves[i].index,
1321 winning_moves[i].position_frequency,
1322 winning_moves[i].move_frequency);
1323 }
1324 }
1325}
1326
1327/* Scan through the games a second time to pick up the patterns
1328 * corresponding to the winning moves.
1329 */
1330static void
1331generate_patterns(void)
1332{
1333 int k;
1334 SGFNode *sgf;
1335 for (k = 0; k < number_of_games; k++) {
1336
1337 /* Progress output. */
1338 if (k % 500 == 0)
1339 fprintf(stderr, "Generating number %d, %s\n", k, sgf_names[k]);
1340
1341 /* Check if this game is a valid game. */
1342 if (unused_games[k]) {
1343 if (0)
1344 fprintf(stderr, "Not used\n");
1345 continue;
1346 }
1347
1348 sgf = readsgffilefuseki(sgf_names[k], moves_per_game);
1349 if (!sgf) {
1350 fprintf(stderr, "Warning: Couldn't open sgf file %s.\n", sgf_names[k]);
1351 continue;
1352 }
1353
1354 examine_game(sgf, 0);
1355
1356 /* Free memory of SGF file. */
1357 sgfFreeNode(sgf);
1358 }
1359}
1360
1361/* Print the winning patterns in patterns.db format on stdout. */
1362static void
1363print_patterns(void)
1364{
1365 int k, l;
1366 int m, n;
1367 double chisq = 0.0;
1368 int df = 0;
1369 unsigned int pre = situation_table[winning_moves[0].index].pre.values[0];
1370 int first_in_set = 0;
1371 gg_assert(winning_moves[0].index != -1);
1372 l = 1;
1373 for (k = 0; k < number_of_winning_moves; k++) {
1374 /* Do not print erroneous patterns. */
1375 if (winning_moves[k].pattern[0][0] != '\0'
1376 || winning_moves[k].index == -1) {
1377 double grand_sum = winning_moves[k].position_frequency;
1378 double grand_wins = winning_moves[k].position_success;
1379#if 0
1380 double grand_losses = grand_sum - grand_wins;
1381#endif
1382 double row_sum = winning_moves[k].move_frequency;
1383 double wins = winning_moves[k].move_success;
1384 double losses = row_sum - wins;
1385 double expect_wins = row_sum*grand_wins/grand_sum;
1386 double expect_losses = row_sum - expect_wins;
1387 /* We're _not_ using a Yates corrected chisquare.
1388 * Two reasons: 1. It's never correct for > 2x2 table
1389 * 2. Our marginals are sampled, not fixed, so
1390 * Yates and usual Fisher exact are wrong distribution.
1391 * Straight chi squared is best.
1392 */
1393 double dchisq = 0.0;
1394 /* This was Yates line. It's wrong. */
1395#if 0
1396 if (expect_wins > 0.0)
1397 dchisq += pow(gg_abs(wins - expect_wins) - 0.5, 2) / expect_wins;
1398#endif
1399
1400 if (expect_wins > 0.0)
1401 dchisq += pow(wins - expect_wins, 2) / expect_wins;
1402 if (expect_losses > 0.0)
1403 dchisq += pow(losses - expect_losses, 2) / expect_losses;
1404
1405 gg_assert(winning_moves[k].index == -1
1406 || (situation_table[winning_moves[k].index].pre.values[0]
1407 == winning_moves[k].pre));
1408
1409 /* Did we just finish a set? If so, print totals and reset. */
1410 if (winning_moves[k].pre != pre) {
1411 /* p-value is 1 - incomplete gamma function(d.o.f/2, chisq/2)
1412 * variable df is number of moves, actual d.o.f is df-1
1413 * k is referring to the pattern _after_ the set we just completed.
1414 */
1415 printf("\n### Summary of pattern pre 0x%08x\n### N Chi_squared df: %d %g %d ",
1416 pre, winning_moves[k-1].position_frequency, chisq, df - 1);
1417 /* and array is indexed at zero for d.o.f = 1... */
1418 if (df-1 < 1)
1419 printf("NS\n\n");
1420 else if (df - 1 < (int) (sizeof(chisquarecrit05) / sizeof(double))
1421 && chisq > chisquarecrit05[df-2]) {
1422 /* The overall result is significant at 5%. In this case, at
1423 * least some authorities will allow us to examine several
1424 * individual contrasts w/o futher correction. We compare
1425 * every pair of moves, which is perhaps too many, but the
1426 * purpose is to give the human expert (who would in any
1427 * case be required to examine the output) some sense of
1428 * where the differences are. Something like a Bonferroni
1429 * correction could result in a significant test overall,
1430 * but no significant contrasts, which is obviously
1431 * nonsense. The significance of the overall result must
1432 * come from somewhere.
1433 */
1434 int m, n;
1435 if (chisq > chisquarecrit001[df-2])
1436 printf("!!! p < 0.001\n");
1437 else if (chisq > chisquarecrit01[df-2])
1438 printf("!!! p < 0.01\n");
1439 else
1440 printf("!!! p < 0.05\n");
1441 for (m = first_in_set; m < k; m++) {
1442 for (n = m + 1; n < k; n++) {
1443 double grand_sum = (winning_moves[m].move_frequency
1444 + winning_moves[n].move_frequency);
1445 double grand_wins = (winning_moves[m].move_success
1446 + winning_moves[n].move_success);
1447#if 0
1448 double grand_losses = grand_sum - grand_wins;
1449#endif
1450 double row_sum_m = winning_moves[m].move_frequency;
1451 double row_sum_n = winning_moves[n].move_frequency;
1452
1453 double wins_m = winning_moves[m].move_success;
1454 double losses_m = row_sum_m - wins_m;
1455 double wins_n = winning_moves[n].move_success;
1456 double losses_n = row_sum_n - wins_n;
1457
1458 double expect_wins_m = row_sum_m * grand_wins/grand_sum;
1459 double expect_losses_m = row_sum_m - expect_wins_m;
1460 double expect_wins_n = row_sum_n * grand_wins/grand_sum;
1461 double expect_losses_n = row_sum_n - expect_wins_n;
1462 double dchisq_m = 0.0;
1463 double dchisq_n = 0.0;
1464 if (expect_wins_m > 0.0)
1465 dchisq_m += pow(wins_m - expect_wins_m, 2) / expect_wins_m;
1466 if (expect_losses_m > 0.0)
1467 dchisq_m += pow(losses_m - expect_losses_m, 2) / expect_losses_m;
1468 if (expect_wins_n > 0.0)
1469 dchisq_n += pow(wins_n - expect_wins_n, 2) / expect_wins_n;
1470 if (expect_losses_n > 0.0)
1471 dchisq_n += pow(losses_n - expect_losses_n, 2) / expect_losses_n;
1472 /* We demand at least N=6. Nonsense with smaller N. */
1473 if (dchisq_m + dchisq_n > chisquarecrit05[0] && grand_sum > 5) {
1474 printf("#### 0x%08x %c 0x%08x (p < 0.05) chisq = %g N = %g\n",
1475 situation_table[winning_moves[m].index].post.values[0],
1476 (1.0 * wins_m / row_sum_m
1477 > 1.0 * wins_n / row_sum_n) ? '>' : '<',
1478 situation_table[winning_moves[n].index].post.values[0],
1479 dchisq_m + dchisq_n, grand_sum);
1480 }
1481 }
1482 }
1483 printf("\n\n");
1484 }
1485 else if (df-1 < (int) (sizeof(chisquarecrit10) / sizeof(double))
1486 && chisq > chisquarecrit10[df - 2])
1487 printf("??? p < 0.10\n\n");
1488 else if (!(df - 1 < (int) (sizeof(chisquarecrit05) / sizeof(double))))
1489 printf("df out of range...\n\n");
1490 else
1491 printf("NS\n\n");
1492
1493 pre = winning_moves[k].pre;
1494#if 0
1495 pre = situation_table[winning_moves[k].index].pre.values[0];
1496#endif
1497 first_in_set = k;
1498 chisq = 0.0;
1499 df = 0;
1500 }
1501 /* increment totals */
1502 chisq += dchisq;
1503 df++;
1504
1505 if (winning_moves[k].index == -1) {
1506 printf("# Unpopular moves\n");
1507 printf("# pre: 0x%08x\n", winning_moves[k].pre);
1508 printf("# post: could be various\n");
1509 printf("# frequency: %d/%d\n",
1510 winning_moves[k].move_frequency,
1511 winning_moves[k].position_frequency);
1512 printf("# unique players: %d\n", winning_moves[k].n_player);
1513 printf("# wins: %d/%d\n\n",
1514 winning_moves[k].move_success,
1515 winning_moves[k].position_success);
1516 printf("# success: %.1f%% vs %.1f%% for this position overall, dchisq = %g\n\n",
1517 100.0 * winning_moves[k].move_success / winning_moves[k].move_frequency,
1518 100.0 * winning_moves[k].position_success / winning_moves[k].position_frequency,
1519 dchisq);
1520 }
1521 else {
1522 printf("Pattern F-H%d-%d\n", handicap_value, l);
1523 printf("# pre : 0x%08x\n",
1524 situation_table[winning_moves[k].index].pre.values[0]);
1525 printf("# post: 0x%08x\n",
1526 situation_table[winning_moves[k].index].post.values[0]);
1527 printf("# frequency: %d/%d\n", winning_moves[k].move_frequency,
1528 winning_moves[k].position_frequency);
1529 printf("# unique players: %d\n", winning_moves[k].n_player);
1530 printf("# wins: %d/%d\n\n", winning_moves[k].move_success,
1531 winning_moves[k].position_success);
1532 printf("# success: %.1f%% vs %.1f%% for this position overall, dchisq = %g\n\n",
1533 100.0 * winning_moves[k].move_success / winning_moves[k].move_frequency,
1534 100.0 * winning_moves[k].position_success / winning_moves[k].position_frequency,
1535 dchisq);
1536
1537 printf("+");
1538 for (n = 0; n < board_size; n++)
1539 printf("-");
1540
1541 printf("+\n");
1542 for (m = 0; m < board_size; m++) {
1543 printf("|");
1544 for (n = 0; n < board_size; n++) {
1545 if (winning_moves[k].pattern[m][n] == '\0') {
1546 fprintf(stderr, "Something wrong in print pattern\n");
1547 printf(".");
1548 }
1549 else
1550 printf("%c", winning_moves[k].pattern[m][n]);
1551 }
1552 printf("|\n");
1553 }
1554
1555 printf("+");
1556 for (n = 0; n < board_size; n++)
1557 printf("-");
1558 printf("+");
1559
1560 printf("\n\n:8,-,value(%d)\n\n\n", winning_moves[k].n_player);
1561 l++;
1562 }
1563 }
1564 else {
1565 fprintf(stderr,
1566 "Skipping pattern pre 0x%08x post 0x%08x, stats may be wrong...\n",
1567 situation_table[winning_moves[k].index].pre.values[0],
1568 situation_table[winning_moves[k].index].post.values[0]);
1569 }
1570 }
1571}
1572
1573int
1574main(int argc, char *argv[])
1575{
1576 int number_of_unused_games = 0;
1577 int i = 0;
1578
1579 /* Check number of arguments. */
1580 if (argc < 10) {
1581 fprintf(stderr, USAGE);
1582 exit(EXIT_FAILURE);
1583 }
1584
1585 /* Check arguments. */
1586 board_size = atoi(argv[2]);
1587 if (board_size % 2 == 0) {
1588 fprintf(stderr, "Fatal error, only odd boardsizes supported: %d.\n",
1589 board_size);
1590 exit(EXIT_FAILURE);
1591 }
1592 if (board_size < 9 || board_size > 19)
1593 fprintf(stderr, "Warning: strange boardsize: %d.\n", board_size);
1594
1595 moves_per_game = atoi(argv[3]);
1596 if (moves_per_game < 1 || moves_per_game > 20)
1597 fprintf(stderr, "Warning: strange number of moves per game: %d.\n",
1598 moves_per_game);
1599
1600 handicap_value = atoi(argv[4]);
1601 if (handicap_value < 0 || handicap_value > 10)
1602 fprintf(stderr, "Warning: unusual handicap value: %d.\n",
1603 handicap_value);
1604
1605 player_strength = atoi(argv[5]);
1606 if (player_strength < 0 || player_strength > 30)
1607 fprintf(stderr, "Warning: wrong lowest strength: %d.\n",
1608 player_strength);
1609
1610 half_board_patterns = atoi(argv[6]);
1611 if (half_board_patterns != 0 && half_board_patterns != 1) {
1612 fprintf(stderr,
1613 "Warning: incorrect half_board_flag (0 or 1). Setting the value to 0.\n");
1614 half_board_patterns = 0;
1615 }
1616
1617 min_position_freq = atoi(argv[7]);
1618 if (min_position_freq < 1) {
1619 fprintf(stderr, "Warning: setting min_position_freq to 1\n");
1620 min_position_freq = 1;
1621 }
1622
1623 min_move_percent = atof(argv[8]);
1624 if (min_move_percent < 0. || min_move_percent > 100.) {
1625 fprintf(stderr, "Warning: strange min_move_percent %g, setting to 1%%\n",
1626 min_move_percent);
1627 min_move_percent = 1.0;
1628 }
1629
1630 min_move_freq = atoi(argv[9]);
1631 if (min_move_freq < 0)
1632 fprintf(stderr, "Warning: strange min_move_freq %d\n", min_move_freq);
1633
1634 /* Count the number of sgf files. */
1635 number_of_games = read_sgf_filenames(argv[1], NULL);
1636
1637 /* Allocate space for the list of unused files. */
1638 unused_games = calloc(number_of_games, sizeof(*unused_games));
1639 if (unused_games == NULL) {
1640 fprintf(stderr, "Fatal error, failed to allocate memory.\n");
1641 exit(EXIT_FAILURE);
1642 }
1643
1644 /* Allocate space for the list of sgf file names. */
1645 sgf_names = calloc(number_of_games, sizeof(*sgf_names));
1646 if (sgf_names == NULL) {
1647 fprintf(stderr, "Fatal error, failed to allocate memory.\n");
1648 exit(EXIT_FAILURE);
1649 }
1650
1651 /* Read the list of sgf files and store in memory. */
1652 read_sgf_filenames(argv[1], sgf_names);
1653
1654 /* Save memory by sorting out the games that can be used first */
1655 if (argv[10] != NULL) {
1656 fprintf(stderr, "Starting game sort\n");
1657 sort_games();
1658 fprintf(stderr, "Starting game writes\n");
1659 write_sgf_filenames(argv[10], sgf_names);
1660 }
1661 else {
1662 /* Build tables of random numbers for Zobrist hashing. */
1663 init_zobrist_numbers();
1664
1665 /* Play through the initial moves of all games and collect hash values
1666 * for the encountered situations.
1667 */
1668 collect_situations();
1669 fprintf(stderr, "collect OK.\n");
1670
1671 /* Find the most common positions and moves, for which we want to
1672 * generate patterns.
1673 */
1674 analyze_statistics();
1675 fprintf(stderr, "analyze OK.\n");
1676
1677 /* Generate patterns from the chosen positions and moves.
1678 */
1679 generate_patterns();
1680 fprintf(stderr, "generate OK.\n");
1681
1682 printf("attribute_map value_only\n\n\n");
1683 printf("# ");
1684 for (i = 0; i < argc; i++)
1685 printf("%s ", argv[i]);
1686 printf("\n\n\n");
1687
1688 /* Write the patterns to stdout in patterns.db format.
1689 */
1690 print_patterns();
1691
1692 /* Tell the user everything worked out fine */
1693 fprintf(stderr, "The pattern database was produced with no errors.\n");
1694
1695 for (i = 0; i < number_of_games; i++)
1696 if (unused_games[i])
1697 number_of_unused_games++;
1698
1699 fprintf(stderr, "Out of %d games, %d were not used.\n",
1700 number_of_games, number_of_unused_games);
1701 }
1702
1703 return 0;
1704}
1705
1706/*
1707 * Local Variables:
1708 * tab-width: 8
1709 * c-basic-offset: 2
1710 * End:
1711 */