Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | package Search::Dict; |
2 | require 5.000; | |
3 | require Exporter; | |
4 | ||
5 | use strict; | |
6 | ||
7 | our $VERSION = '1.02'; | |
8 | our @ISA = qw(Exporter); | |
9 | our @EXPORT = qw(look); | |
10 | ||
11 | =head1 NAME | |
12 | ||
13 | Search::Dict, look - search for key in dictionary file | |
14 | ||
15 | =head1 SYNOPSIS | |
16 | ||
17 | use Search::Dict; | |
18 | look *FILEHANDLE, $key, $dict, $fold; | |
19 | ||
20 | use Search::Dict; | |
21 | look *FILEHANDLE, $params; | |
22 | ||
23 | =head1 DESCRIPTION | |
24 | ||
25 | Sets file position in FILEHANDLE to be first line greater than or equal | |
26 | (stringwise) to I<$key>. Returns the new file position, or -1 if an error | |
27 | occurs. | |
28 | ||
29 | The flags specify dictionary order and case folding: | |
30 | ||
31 | If I<$dict> is true, search by dictionary order (ignore anything but word | |
32 | characters and whitespace). The default is honour all characters. | |
33 | ||
34 | If I<$fold> is true, ignore case. The default is to honour case. | |
35 | ||
36 | If there are only three arguments and the third argument is a hash | |
37 | reference, the keys of that hash can have values C<dict>, C<fold>, and | |
38 | C<comp> or C<xfrm> (see below), and their correponding values will be | |
39 | used as the parameters. | |
40 | ||
41 | If a comparison subroutine (comp) is defined, it must return less than zero, | |
42 | zero, or greater than zero, if the first comparand is less than, | |
43 | equal, or greater than the second comparand. | |
44 | ||
45 | If a transformation subroutine (xfrm) is defined, its value is used to | |
46 | transform the lines read from the filehandle before their comparison. | |
47 | ||
48 | =cut | |
49 | ||
50 | sub look { | |
51 | my($fh,$key,$dict,$fold) = @_; | |
52 | my ($comp, $xfrm); | |
53 | if (@_ == 3 && ref $dict eq 'HASH') { | |
54 | my $params = $dict; | |
55 | $dict = 0; | |
56 | $dict = $params->{dict} if exists $params->{dict}; | |
57 | $fold = $params->{fold} if exists $params->{fold}; | |
58 | $comp = $params->{comp} if exists $params->{comp}; | |
59 | $xfrm = $params->{xfrm} if exists $params->{xfrm}; | |
60 | } | |
61 | $comp = sub { $_[0] cmp $_[1] } unless defined $comp; | |
62 | local($_); | |
63 | my(@stat) = stat($fh) | |
64 | or return -1; | |
65 | my($size, $blksize) = @stat[7,11]; | |
66 | $blksize ||= 8192; | |
67 | $key =~ s/[^\w\s]//g if $dict; | |
68 | $key = lc $key if $fold; | |
69 | # find the right block | |
70 | my($min, $max) = (0, int($size / $blksize)); | |
71 | my $mid; | |
72 | while ($max - $min > 1) { | |
73 | $mid = int(($max + $min) / 2); | |
74 | seek($fh, $mid * $blksize, 0) | |
75 | or return -1; | |
76 | <$fh> if $mid; # probably a partial line | |
77 | $_ = <$fh>; | |
78 | $_ = $xfrm->($_) if defined $xfrm; | |
79 | chomp; | |
80 | s/[^\w\s]//g if $dict; | |
81 | $_ = lc $_ if $fold; | |
82 | if (defined($_) && $comp->($_, $key) < 0) { | |
83 | $min = $mid; | |
84 | } | |
85 | else { | |
86 | $max = $mid; | |
87 | } | |
88 | } | |
89 | # find the right line | |
90 | $min *= $blksize; | |
91 | seek($fh,$min,0) | |
92 | or return -1; | |
93 | <$fh> if $min; | |
94 | for (;;) { | |
95 | $min = tell($fh); | |
96 | defined($_ = <$fh>) | |
97 | or last; | |
98 | $_ = $xfrm->($_) if defined $xfrm; | |
99 | chomp; | |
100 | s/[^\w\s]//g if $dict; | |
101 | $_ = lc $_ if $fold; | |
102 | last if $comp->($_, $key) >= 0; | |
103 | } | |
104 | seek($fh,$min,0); | |
105 | $min; | |
106 | } | |
107 | ||
108 | 1; |