| 1 | """Generic output formatting. |
| 2 | |
| 3 | Formatter objects transform an abstract flow of formatting events into |
| 4 | specific output events on writer objects. Formatters manage several stack |
| 5 | structures to allow various properties of a writer object to be changed and |
| 6 | restored; writers need not be able to handle relative changes nor any sort |
| 7 | of ``change back'' operation. Specific writer properties which may be |
| 8 | controlled via formatter objects are horizontal alignment, font, and left |
| 9 | margin indentations. A mechanism is provided which supports providing |
| 10 | arbitrary, non-exclusive style settings to a writer as well. Additional |
| 11 | interfaces facilitate formatting events which are not reversible, such as |
| 12 | paragraph separation. |
| 13 | |
| 14 | Writer objects encapsulate device interfaces. Abstract devices, such as |
| 15 | file formats, are supported as well as physical devices. The provided |
| 16 | implementations all work with abstract devices. The interface makes |
| 17 | available mechanisms for setting the properties which formatter objects |
| 18 | manage and inserting data into the output. |
| 19 | """ |
| 20 | |
| 21 | import sys |
| 22 | |
| 23 | |
| 24 | AS_IS = None |
| 25 | |
| 26 | |
| 27 | class NullFormatter: |
| 28 | """A formatter which does nothing. |
| 29 | |
| 30 | If the writer parameter is omitted, a NullWriter instance is created. |
| 31 | No methods of the writer are called by NullFormatter instances. |
| 32 | |
| 33 | Implementations should inherit from this class if implementing a writer |
| 34 | interface but don't need to inherit any implementation. |
| 35 | |
| 36 | """ |
| 37 | |
| 38 | def __init__(self, writer=None): |
| 39 | if writer is None: |
| 40 | writer = NullWriter() |
| 41 | self.writer = writer |
| 42 | def end_paragraph(self, blankline): pass |
| 43 | def add_line_break(self): pass |
| 44 | def add_hor_rule(self, *args, **kw): pass |
| 45 | def add_label_data(self, format, counter, blankline=None): pass |
| 46 | def add_flowing_data(self, data): pass |
| 47 | def add_literal_data(self, data): pass |
| 48 | def flush_softspace(self): pass |
| 49 | def push_alignment(self, align): pass |
| 50 | def pop_alignment(self): pass |
| 51 | def push_font(self, x): pass |
| 52 | def pop_font(self): pass |
| 53 | def push_margin(self, margin): pass |
| 54 | def pop_margin(self): pass |
| 55 | def set_spacing(self, spacing): pass |
| 56 | def push_style(self, *styles): pass |
| 57 | def pop_style(self, n=1): pass |
| 58 | def assert_line_data(self, flag=1): pass |
| 59 | |
| 60 | |
| 61 | class AbstractFormatter: |
| 62 | """The standard formatter. |
| 63 | |
| 64 | This implementation has demonstrated wide applicability to many writers, |
| 65 | and may be used directly in most circumstances. It has been used to |
| 66 | implement a full-featured World Wide Web browser. |
| 67 | |
| 68 | """ |
| 69 | |
| 70 | # Space handling policy: blank spaces at the boundary between elements |
| 71 | # are handled by the outermost context. "Literal" data is not checked |
| 72 | # to determine context, so spaces in literal data are handled directly |
| 73 | # in all circumstances. |
| 74 | |
| 75 | def __init__(self, writer): |
| 76 | self.writer = writer # Output device |
| 77 | self.align = None # Current alignment |
| 78 | self.align_stack = [] # Alignment stack |
| 79 | self.font_stack = [] # Font state |
| 80 | self.margin_stack = [] # Margin state |
| 81 | self.spacing = None # Vertical spacing state |
| 82 | self.style_stack = [] # Other state, e.g. color |
| 83 | self.nospace = 1 # Should leading space be suppressed |
| 84 | self.softspace = 0 # Should a space be inserted |
| 85 | self.para_end = 1 # Just ended a paragraph |
| 86 | self.parskip = 0 # Skipped space between paragraphs? |
| 87 | self.hard_break = 1 # Have a hard break |
| 88 | self.have_label = 0 |
| 89 | |
| 90 | def end_paragraph(self, blankline): |
| 91 | if not self.hard_break: |
| 92 | self.writer.send_line_break() |
| 93 | self.have_label = 0 |
| 94 | if self.parskip < blankline and not self.have_label: |
| 95 | self.writer.send_paragraph(blankline - self.parskip) |
| 96 | self.parskip = blankline |
| 97 | self.have_label = 0 |
| 98 | self.hard_break = self.nospace = self.para_end = 1 |
| 99 | self.softspace = 0 |
| 100 | |
| 101 | def add_line_break(self): |
| 102 | if not (self.hard_break or self.para_end): |
| 103 | self.writer.send_line_break() |
| 104 | self.have_label = self.parskip = 0 |
| 105 | self.hard_break = self.nospace = 1 |
| 106 | self.softspace = 0 |
| 107 | |
| 108 | def add_hor_rule(self, *args, **kw): |
| 109 | if not self.hard_break: |
| 110 | self.writer.send_line_break() |
| 111 | self.writer.send_hor_rule(*args, **kw) |
| 112 | self.hard_break = self.nospace = 1 |
| 113 | self.have_label = self.para_end = self.softspace = self.parskip = 0 |
| 114 | |
| 115 | def add_label_data(self, format, counter, blankline = None): |
| 116 | if self.have_label or not self.hard_break: |
| 117 | self.writer.send_line_break() |
| 118 | if not self.para_end: |
| 119 | self.writer.send_paragraph((blankline and 1) or 0) |
| 120 | if isinstance(format, str): |
| 121 | self.writer.send_label_data(self.format_counter(format, counter)) |
| 122 | else: |
| 123 | self.writer.send_label_data(format) |
| 124 | self.nospace = self.have_label = self.hard_break = self.para_end = 1 |
| 125 | self.softspace = self.parskip = 0 |
| 126 | |
| 127 | def format_counter(self, format, counter): |
| 128 | label = '' |
| 129 | for c in format: |
| 130 | if c == '1': |
| 131 | label = label + ('%d' % counter) |
| 132 | elif c in 'aA': |
| 133 | if counter > 0: |
| 134 | label = label + self.format_letter(c, counter) |
| 135 | elif c in 'iI': |
| 136 | if counter > 0: |
| 137 | label = label + self.format_roman(c, counter) |
| 138 | else: |
| 139 | label = label + c |
| 140 | return label |
| 141 | |
| 142 | def format_letter(self, case, counter): |
| 143 | label = '' |
| 144 | while counter > 0: |
| 145 | counter, x = divmod(counter-1, 26) |
| 146 | # This makes a strong assumption that lowercase letters |
| 147 | # and uppercase letters form two contiguous blocks, with |
| 148 | # letters in order! |
| 149 | s = chr(ord(case) + x) |
| 150 | label = s + label |
| 151 | return label |
| 152 | |
| 153 | def format_roman(self, case, counter): |
| 154 | ones = ['i', 'x', 'c', 'm'] |
| 155 | fives = ['v', 'l', 'd'] |
| 156 | label, index = '', 0 |
| 157 | # This will die of IndexError when counter is too big |
| 158 | while counter > 0: |
| 159 | counter, x = divmod(counter, 10) |
| 160 | if x == 9: |
| 161 | label = ones[index] + ones[index+1] + label |
| 162 | elif x == 4: |
| 163 | label = ones[index] + fives[index] + label |
| 164 | else: |
| 165 | if x >= 5: |
| 166 | s = fives[index] |
| 167 | x = x-5 |
| 168 | else: |
| 169 | s = '' |
| 170 | s = s + ones[index]*x |
| 171 | label = s + label |
| 172 | index = index + 1 |
| 173 | if case == 'I': |
| 174 | return label.upper() |
| 175 | return label |
| 176 | |
| 177 | def add_flowing_data(self, data): |
| 178 | if not data: return |
| 179 | # The following looks a bit convoluted but is a great improvement over |
| 180 | # data = regsub.gsub('[' + string.whitespace + ']+', ' ', data) |
| 181 | prespace = data[:1].isspace() |
| 182 | postspace = data[-1:].isspace() |
| 183 | data = " ".join(data.split()) |
| 184 | if self.nospace and not data: |
| 185 | return |
| 186 | elif prespace or self.softspace: |
| 187 | if not data: |
| 188 | if not self.nospace: |
| 189 | self.softspace = 1 |
| 190 | self.parskip = 0 |
| 191 | return |
| 192 | if not self.nospace: |
| 193 | data = ' ' + data |
| 194 | self.hard_break = self.nospace = self.para_end = \ |
| 195 | self.parskip = self.have_label = 0 |
| 196 | self.softspace = postspace |
| 197 | self.writer.send_flowing_data(data) |
| 198 | |
| 199 | def add_literal_data(self, data): |
| 200 | if not data: return |
| 201 | if self.softspace: |
| 202 | self.writer.send_flowing_data(" ") |
| 203 | self.hard_break = data[-1:] == '\n' |
| 204 | self.nospace = self.para_end = self.softspace = \ |
| 205 | self.parskip = self.have_label = 0 |
| 206 | self.writer.send_literal_data(data) |
| 207 | |
| 208 | def flush_softspace(self): |
| 209 | if self.softspace: |
| 210 | self.hard_break = self.para_end = self.parskip = \ |
| 211 | self.have_label = self.softspace = 0 |
| 212 | self.nospace = 1 |
| 213 | self.writer.send_flowing_data(' ') |
| 214 | |
| 215 | def push_alignment(self, align): |
| 216 | if align and align != self.align: |
| 217 | self.writer.new_alignment(align) |
| 218 | self.align = align |
| 219 | self.align_stack.append(align) |
| 220 | else: |
| 221 | self.align_stack.append(self.align) |
| 222 | |
| 223 | def pop_alignment(self): |
| 224 | if self.align_stack: |
| 225 | del self.align_stack[-1] |
| 226 | if self.align_stack: |
| 227 | self.align = align = self.align_stack[-1] |
| 228 | self.writer.new_alignment(align) |
| 229 | else: |
| 230 | self.align = None |
| 231 | self.writer.new_alignment(None) |
| 232 | |
| 233 | def push_font(self, (size, i, b, tt)): |
| 234 | if self.softspace: |
| 235 | self.hard_break = self.para_end = self.softspace = 0 |
| 236 | self.nospace = 1 |
| 237 | self.writer.send_flowing_data(' ') |
| 238 | if self.font_stack: |
| 239 | csize, ci, cb, ctt = self.font_stack[-1] |
| 240 | if size is AS_IS: size = csize |
| 241 | if i is AS_IS: i = ci |
| 242 | if b is AS_IS: b = cb |
| 243 | if tt is AS_IS: tt = ctt |
| 244 | font = (size, i, b, tt) |
| 245 | self.font_stack.append(font) |
| 246 | self.writer.new_font(font) |
| 247 | |
| 248 | def pop_font(self): |
| 249 | if self.font_stack: |
| 250 | del self.font_stack[-1] |
| 251 | if self.font_stack: |
| 252 | font = self.font_stack[-1] |
| 253 | else: |
| 254 | font = None |
| 255 | self.writer.new_font(font) |
| 256 | |
| 257 | def push_margin(self, margin): |
| 258 | self.margin_stack.append(margin) |
| 259 | fstack = filter(None, self.margin_stack) |
| 260 | if not margin and fstack: |
| 261 | margin = fstack[-1] |
| 262 | self.writer.new_margin(margin, len(fstack)) |
| 263 | |
| 264 | def pop_margin(self): |
| 265 | if self.margin_stack: |
| 266 | del self.margin_stack[-1] |
| 267 | fstack = filter(None, self.margin_stack) |
| 268 | if fstack: |
| 269 | margin = fstack[-1] |
| 270 | else: |
| 271 | margin = None |
| 272 | self.writer.new_margin(margin, len(fstack)) |
| 273 | |
| 274 | def set_spacing(self, spacing): |
| 275 | self.spacing = spacing |
| 276 | self.writer.new_spacing(spacing) |
| 277 | |
| 278 | def push_style(self, *styles): |
| 279 | if self.softspace: |
| 280 | self.hard_break = self.para_end = self.softspace = 0 |
| 281 | self.nospace = 1 |
| 282 | self.writer.send_flowing_data(' ') |
| 283 | for style in styles: |
| 284 | self.style_stack.append(style) |
| 285 | self.writer.new_styles(tuple(self.style_stack)) |
| 286 | |
| 287 | def pop_style(self, n=1): |
| 288 | del self.style_stack[-n:] |
| 289 | self.writer.new_styles(tuple(self.style_stack)) |
| 290 | |
| 291 | def assert_line_data(self, flag=1): |
| 292 | self.nospace = self.hard_break = not flag |
| 293 | self.para_end = self.parskip = self.have_label = 0 |
| 294 | |
| 295 | |
| 296 | class NullWriter: |
| 297 | """Minimal writer interface to use in testing & inheritance. |
| 298 | |
| 299 | A writer which only provides the interface definition; no actions are |
| 300 | taken on any methods. This should be the base class for all writers |
| 301 | which do not need to inherit any implementation methods. |
| 302 | |
| 303 | """ |
| 304 | def __init__(self): pass |
| 305 | def flush(self): pass |
| 306 | def new_alignment(self, align): pass |
| 307 | def new_font(self, font): pass |
| 308 | def new_margin(self, margin, level): pass |
| 309 | def new_spacing(self, spacing): pass |
| 310 | def new_styles(self, styles): pass |
| 311 | def send_paragraph(self, blankline): pass |
| 312 | def send_line_break(self): pass |
| 313 | def send_hor_rule(self, *args, **kw): pass |
| 314 | def send_label_data(self, data): pass |
| 315 | def send_flowing_data(self, data): pass |
| 316 | def send_literal_data(self, data): pass |
| 317 | |
| 318 | |
| 319 | class AbstractWriter(NullWriter): |
| 320 | """A writer which can be used in debugging formatters, but not much else. |
| 321 | |
| 322 | Each method simply announces itself by printing its name and |
| 323 | arguments on standard output. |
| 324 | |
| 325 | """ |
| 326 | |
| 327 | def new_alignment(self, align): |
| 328 | print "new_alignment(%r)" % (align,) |
| 329 | |
| 330 | def new_font(self, font): |
| 331 | print "new_font(%r)" % (font,) |
| 332 | |
| 333 | def new_margin(self, margin, level): |
| 334 | print "new_margin(%r, %d)" % (margin, level) |
| 335 | |
| 336 | def new_spacing(self, spacing): |
| 337 | print "new_spacing(%r)" % (spacing,) |
| 338 | |
| 339 | def new_styles(self, styles): |
| 340 | print "new_styles(%r)" % (styles,) |
| 341 | |
| 342 | def send_paragraph(self, blankline): |
| 343 | print "send_paragraph(%r)" % (blankline,) |
| 344 | |
| 345 | def send_line_break(self): |
| 346 | print "send_line_break()" |
| 347 | |
| 348 | def send_hor_rule(self, *args, **kw): |
| 349 | print "send_hor_rule()" |
| 350 | |
| 351 | def send_label_data(self, data): |
| 352 | print "send_label_data(%r)" % (data,) |
| 353 | |
| 354 | def send_flowing_data(self, data): |
| 355 | print "send_flowing_data(%r)" % (data,) |
| 356 | |
| 357 | def send_literal_data(self, data): |
| 358 | print "send_literal_data(%r)" % (data,) |
| 359 | |
| 360 | |
| 361 | class DumbWriter(NullWriter): |
| 362 | """Simple writer class which writes output on the file object passed in |
| 363 | as the file parameter or, if file is omitted, on standard output. The |
| 364 | output is simply word-wrapped to the number of columns specified by |
| 365 | the maxcol parameter. This class is suitable for reflowing a sequence |
| 366 | of paragraphs. |
| 367 | |
| 368 | """ |
| 369 | |
| 370 | def __init__(self, file=None, maxcol=72): |
| 371 | self.file = file or sys.stdout |
| 372 | self.maxcol = maxcol |
| 373 | NullWriter.__init__(self) |
| 374 | self.reset() |
| 375 | |
| 376 | def reset(self): |
| 377 | self.col = 0 |
| 378 | self.atbreak = 0 |
| 379 | |
| 380 | def send_paragraph(self, blankline): |
| 381 | self.file.write('\n'*blankline) |
| 382 | self.col = 0 |
| 383 | self.atbreak = 0 |
| 384 | |
| 385 | def send_line_break(self): |
| 386 | self.file.write('\n') |
| 387 | self.col = 0 |
| 388 | self.atbreak = 0 |
| 389 | |
| 390 | def send_hor_rule(self, *args, **kw): |
| 391 | self.file.write('\n') |
| 392 | self.file.write('-'*self.maxcol) |
| 393 | self.file.write('\n') |
| 394 | self.col = 0 |
| 395 | self.atbreak = 0 |
| 396 | |
| 397 | def send_literal_data(self, data): |
| 398 | self.file.write(data) |
| 399 | i = data.rfind('\n') |
| 400 | if i >= 0: |
| 401 | self.col = 0 |
| 402 | data = data[i+1:] |
| 403 | data = data.expandtabs() |
| 404 | self.col = self.col + len(data) |
| 405 | self.atbreak = 0 |
| 406 | |
| 407 | def send_flowing_data(self, data): |
| 408 | if not data: return |
| 409 | atbreak = self.atbreak or data[0].isspace() |
| 410 | col = self.col |
| 411 | maxcol = self.maxcol |
| 412 | write = self.file.write |
| 413 | for word in data.split(): |
| 414 | if atbreak: |
| 415 | if col + len(word) >= maxcol: |
| 416 | write('\n') |
| 417 | col = 0 |
| 418 | else: |
| 419 | write(' ') |
| 420 | col = col + 1 |
| 421 | write(word) |
| 422 | col = col + len(word) |
| 423 | atbreak = 1 |
| 424 | self.col = col |
| 425 | self.atbreak = data[-1].isspace() |
| 426 | |
| 427 | |
| 428 | def test(file = None): |
| 429 | w = DumbWriter() |
| 430 | f = AbstractFormatter(w) |
| 431 | if file is not None: |
| 432 | fp = open(file) |
| 433 | elif sys.argv[1:]: |
| 434 | fp = open(sys.argv[1]) |
| 435 | else: |
| 436 | fp = sys.stdin |
| 437 | while 1: |
| 438 | line = fp.readline() |
| 439 | if not line: |
| 440 | break |
| 441 | if line == '\n': |
| 442 | f.end_paragraph(1) |
| 443 | else: |
| 444 | f.add_flowing_data(line) |
| 445 | f.end_paragraph(0) |
| 446 | |
| 447 | |
| 448 | if __name__ == '__main__': |
| 449 | test() |