Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Sort performance test. |
2 | ||
3 | See main() for command line syntax. | |
4 | See tabulate() for output format. | |
5 | ||
6 | """ | |
7 | ||
8 | import sys | |
9 | import time | |
10 | import random | |
11 | import marshal | |
12 | import tempfile | |
13 | import os | |
14 | ||
15 | td = tempfile.gettempdir() | |
16 | ||
17 | def randfloats(n): | |
18 | """Return a list of n random floats in [0, 1).""" | |
19 | # Generating floats is expensive, so this writes them out to a file in | |
20 | # a temp directory. If the file already exists, it just reads them | |
21 | # back in and shuffles them a bit. | |
22 | fn = os.path.join(td, "rr%06d" % n) | |
23 | try: | |
24 | fp = open(fn, "rb") | |
25 | except IOError: | |
26 | r = random.random | |
27 | result = [r() for i in xrange(n)] | |
28 | try: | |
29 | try: | |
30 | fp = open(fn, "wb") | |
31 | marshal.dump(result, fp) | |
32 | fp.close() | |
33 | fp = None | |
34 | finally: | |
35 | if fp: | |
36 | try: | |
37 | os.unlink(fn) | |
38 | except os.error: | |
39 | pass | |
40 | except IOError, msg: | |
41 | print "can't write", fn, ":", msg | |
42 | else: | |
43 | result = marshal.load(fp) | |
44 | fp.close() | |
45 | # Shuffle it a bit... | |
46 | for i in range(10): | |
47 | i = random.randrange(n) | |
48 | temp = result[:i] | |
49 | del result[:i] | |
50 | temp.reverse() | |
51 | result.extend(temp) | |
52 | del temp | |
53 | assert len(result) == n | |
54 | return result | |
55 | ||
56 | def flush(): | |
57 | sys.stdout.flush() | |
58 | ||
59 | def doit(L): | |
60 | t0 = time.clock() | |
61 | L.sort() | |
62 | t1 = time.clock() | |
63 | print "%6.2f" % (t1-t0), | |
64 | flush() | |
65 | ||
66 | def tabulate(r): | |
67 | """Tabulate sort speed for lists of various sizes. | |
68 | ||
69 | The sizes are 2**i for i in r (the argument, a list). | |
70 | ||
71 | The output displays i, 2**i, and the time to sort arrays of 2**i | |
72 | floating point numbers with the following properties: | |
73 | ||
74 | *sort: random data | |
75 | \sort: descending data | |
76 | /sort: ascending data | |
77 | 3sort: ascending, then 3 random exchanges | |
78 | +sort: ascending, then 10 random at the end | |
79 | %sort: ascending, then randomly replace 1% of the elements w/ random values | |
80 | ~sort: many duplicates | |
81 | =sort: all equal | |
82 | !sort: worst case scenario | |
83 | ||
84 | """ | |
85 | cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"]) | |
86 | fmt = ("%2s %7s" + " %6s"*len(cases)) | |
87 | print fmt % (("i", "2**i") + cases) | |
88 | for i in r: | |
89 | n = 1 << i | |
90 | L = randfloats(n) | |
91 | print "%2d %7d" % (i, n), | |
92 | flush() | |
93 | doit(L) # *sort | |
94 | L.reverse() | |
95 | doit(L) # \sort | |
96 | doit(L) # /sort | |
97 | ||
98 | # Do 3 random exchanges. | |
99 | for dummy in range(3): | |
100 | i1 = random.randrange(n) | |
101 | i2 = random.randrange(n) | |
102 | L[i1], L[i2] = L[i2], L[i1] | |
103 | doit(L) # 3sort | |
104 | ||
105 | # Replace the last 10 with random floats. | |
106 | if n >= 10: | |
107 | L[-10:] = [random.random() for dummy in range(10)] | |
108 | doit(L) # +sort | |
109 | ||
110 | # Replace 1% of the elements at random. | |
111 | for dummy in xrange(n // 100): | |
112 | L[random.randrange(n)] = random.random() | |
113 | doit(L) # %sort | |
114 | ||
115 | # Arrange for lots of duplicates. | |
116 | if n > 4: | |
117 | del L[4:] | |
118 | L = L * (n // 4) | |
119 | # Force the elements to be distinct objects, else timings can be | |
120 | # artificially low. | |
121 | L = map(lambda x: --x, L) | |
122 | doit(L) # ~sort | |
123 | del L | |
124 | ||
125 | # All equal. Again, force the elements to be distinct objects. | |
126 | L = map(abs, [-0.5] * n) | |
127 | doit(L) # =sort | |
128 | del L | |
129 | ||
130 | # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case | |
131 | # for an older implementation of quicksort, which used the median | |
132 | # of the first, last and middle elements as the pivot. | |
133 | half = n // 2 | |
134 | L = range(half - 1, -1, -1) | |
135 | L.extend(range(half)) | |
136 | # Force to float, so that the timings are comparable. This is | |
137 | # significantly faster if we leave tham as ints. | |
138 | L = map(float, L) | |
139 | doit(L) # !sort | |
140 | ||
141 | ||
142 | def main(): | |
143 | """Main program when invoked as a script. | |
144 | ||
145 | One argument: tabulate a single row. | |
146 | Two arguments: tabulate a range (inclusive). | |
147 | Extra arguments are used to seed the random generator. | |
148 | ||
149 | """ | |
150 | # default range (inclusive) | |
151 | k1 = 15 | |
152 | k2 = 20 | |
153 | if sys.argv[1:]: | |
154 | # one argument: single point | |
155 | k1 = k2 = int(sys.argv[1]) | |
156 | if sys.argv[2:]: | |
157 | # two arguments: specify range | |
158 | k2 = int(sys.argv[2]) | |
159 | if sys.argv[3:]: | |
160 | # derive random seed from remaining arguments | |
161 | x = 1 | |
162 | for a in sys.argv[3:]: | |
163 | x = 69069 * x + hash(a) | |
164 | random.seed(x) | |
165 | r = range(k1, k2+1) # include the end point | |
166 | tabulate(r) | |
167 | ||
168 | if __name__ == '__main__': | |
169 | main() |