| 1 | """Sort performance test. |
| 2 | |
| 3 | See main() for command line syntax. |
| 4 | See tabulate() for output format. |
| 5 | |
| 6 | """ |
| 7 | |
| 8 | import sys |
| 9 | import time |
| 10 | import random |
| 11 | import marshal |
| 12 | import tempfile |
| 13 | import os |
| 14 | |
| 15 | td = tempfile.gettempdir() |
| 16 | |
| 17 | def randfloats(n): |
| 18 | """Return a list of n random floats in [0, 1).""" |
| 19 | # Generating floats is expensive, so this writes them out to a file in |
| 20 | # a temp directory. If the file already exists, it just reads them |
| 21 | # back in and shuffles them a bit. |
| 22 | fn = os.path.join(td, "rr%06d" % n) |
| 23 | try: |
| 24 | fp = open(fn, "rb") |
| 25 | except IOError: |
| 26 | r = random.random |
| 27 | result = [r() for i in xrange(n)] |
| 28 | try: |
| 29 | try: |
| 30 | fp = open(fn, "wb") |
| 31 | marshal.dump(result, fp) |
| 32 | fp.close() |
| 33 | fp = None |
| 34 | finally: |
| 35 | if fp: |
| 36 | try: |
| 37 | os.unlink(fn) |
| 38 | except os.error: |
| 39 | pass |
| 40 | except IOError, msg: |
| 41 | print "can't write", fn, ":", msg |
| 42 | else: |
| 43 | result = marshal.load(fp) |
| 44 | fp.close() |
| 45 | # Shuffle it a bit... |
| 46 | for i in range(10): |
| 47 | i = random.randrange(n) |
| 48 | temp = result[:i] |
| 49 | del result[:i] |
| 50 | temp.reverse() |
| 51 | result.extend(temp) |
| 52 | del temp |
| 53 | assert len(result) == n |
| 54 | return result |
| 55 | |
| 56 | def flush(): |
| 57 | sys.stdout.flush() |
| 58 | |
| 59 | def doit(L): |
| 60 | t0 = time.clock() |
| 61 | L.sort() |
| 62 | t1 = time.clock() |
| 63 | print "%6.2f" % (t1-t0), |
| 64 | flush() |
| 65 | |
| 66 | def tabulate(r): |
| 67 | """Tabulate sort speed for lists of various sizes. |
| 68 | |
| 69 | The sizes are 2**i for i in r (the argument, a list). |
| 70 | |
| 71 | The output displays i, 2**i, and the time to sort arrays of 2**i |
| 72 | floating point numbers with the following properties: |
| 73 | |
| 74 | *sort: random data |
| 75 | \sort: descending data |
| 76 | /sort: ascending data |
| 77 | 3sort: ascending, then 3 random exchanges |
| 78 | +sort: ascending, then 10 random at the end |
| 79 | %sort: ascending, then randomly replace 1% of the elements w/ random values |
| 80 | ~sort: many duplicates |
| 81 | =sort: all equal |
| 82 | !sort: worst case scenario |
| 83 | |
| 84 | """ |
| 85 | cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"]) |
| 86 | fmt = ("%2s %7s" + " %6s"*len(cases)) |
| 87 | print fmt % (("i", "2**i") + cases) |
| 88 | for i in r: |
| 89 | n = 1 << i |
| 90 | L = randfloats(n) |
| 91 | print "%2d %7d" % (i, n), |
| 92 | flush() |
| 93 | doit(L) # *sort |
| 94 | L.reverse() |
| 95 | doit(L) # \sort |
| 96 | doit(L) # /sort |
| 97 | |
| 98 | # Do 3 random exchanges. |
| 99 | for dummy in range(3): |
| 100 | i1 = random.randrange(n) |
| 101 | i2 = random.randrange(n) |
| 102 | L[i1], L[i2] = L[i2], L[i1] |
| 103 | doit(L) # 3sort |
| 104 | |
| 105 | # Replace the last 10 with random floats. |
| 106 | if n >= 10: |
| 107 | L[-10:] = [random.random() for dummy in range(10)] |
| 108 | doit(L) # +sort |
| 109 | |
| 110 | # Replace 1% of the elements at random. |
| 111 | for dummy in xrange(n // 100): |
| 112 | L[random.randrange(n)] = random.random() |
| 113 | doit(L) # %sort |
| 114 | |
| 115 | # Arrange for lots of duplicates. |
| 116 | if n > 4: |
| 117 | del L[4:] |
| 118 | L = L * (n // 4) |
| 119 | # Force the elements to be distinct objects, else timings can be |
| 120 | # artificially low. |
| 121 | L = map(lambda x: --x, L) |
| 122 | doit(L) # ~sort |
| 123 | del L |
| 124 | |
| 125 | # All equal. Again, force the elements to be distinct objects. |
| 126 | L = map(abs, [-0.5] * n) |
| 127 | doit(L) # =sort |
| 128 | del L |
| 129 | |
| 130 | # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case |
| 131 | # for an older implementation of quicksort, which used the median |
| 132 | # of the first, last and middle elements as the pivot. |
| 133 | half = n // 2 |
| 134 | L = range(half - 1, -1, -1) |
| 135 | L.extend(range(half)) |
| 136 | # Force to float, so that the timings are comparable. This is |
| 137 | # significantly faster if we leave tham as ints. |
| 138 | L = map(float, L) |
| 139 | doit(L) # !sort |
| 140 | print |
| 141 | |
| 142 | def main(): |
| 143 | """Main program when invoked as a script. |
| 144 | |
| 145 | One argument: tabulate a single row. |
| 146 | Two arguments: tabulate a range (inclusive). |
| 147 | Extra arguments are used to seed the random generator. |
| 148 | |
| 149 | """ |
| 150 | # default range (inclusive) |
| 151 | k1 = 15 |
| 152 | k2 = 20 |
| 153 | if sys.argv[1:]: |
| 154 | # one argument: single point |
| 155 | k1 = k2 = int(sys.argv[1]) |
| 156 | if sys.argv[2:]: |
| 157 | # two arguments: specify range |
| 158 | k2 = int(sys.argv[2]) |
| 159 | if sys.argv[3:]: |
| 160 | # derive random seed from remaining arguments |
| 161 | x = 1 |
| 162 | for a in sys.argv[3:]: |
| 163 | x = 69069 * x + hash(a) |
| 164 | random.seed(x) |
| 165 | r = range(k1, k2+1) # include the end point |
| 166 | tabulate(r) |
| 167 | |
| 168 | if __name__ == '__main__': |
| 169 | main() |