| 1 | #!/usr/local/bin/python |
| 2 | |
| 3 | ############################################################################### |
| 4 | # Takes a chapter as input and adds internal links and numbering to all |
| 5 | # of the H1, H2, H3, H4 and H5 sections. |
| 6 | # |
| 7 | # Every heading HTML tag (H1, H2 etc) is given an autogenerated name to link |
| 8 | # to. However, if the name is not an autogenerated name from a previous run, |
| 9 | # it will be kept. If it is autogenerated, it might change on subsequent runs |
| 10 | # of this program. Thus if you want to create links to one of the headings, |
| 11 | # then change the heading link name to something that does not look like an |
| 12 | # autogenerated link name. |
| 13 | ############################################################################### |
| 14 | |
| 15 | import sys |
| 16 | import re |
| 17 | import string |
| 18 | |
| 19 | ############################################################################### |
| 20 | # Functions |
| 21 | ############################################################################### |
| 22 | |
| 23 | # Regexs for <a name="..."></a> |
| 24 | alink = re.compile(r"<a *name *= *\"(.*)\"></a>", re.IGNORECASE) |
| 25 | heading = re.compile(r"(_nn\d)", re.IGNORECASE) |
| 26 | |
| 27 | def getheadingname(m): |
| 28 | autogeneratedheading = True; |
| 29 | if m.group(1) != None: |
| 30 | amatch = alink.match(m.group(1)) |
| 31 | if amatch: |
| 32 | # A non-autogenerated heading - keep it |
| 33 | headingname = amatch.group(1) |
| 34 | autogeneratedheading = heading.match(headingname) |
| 35 | if autogeneratedheading: |
| 36 | # The heading name was either non-existent or autogenerated, |
| 37 | # We can create a new heading / change the existing heading |
| 38 | headingname = "%s_nn%d" % (filenamebase, nameindex) |
| 39 | return headingname |
| 40 | |
| 41 | ############################################################################### |
| 42 | # Main program |
| 43 | ############################################################################### |
| 44 | |
| 45 | if len(sys.argv) != 3: |
| 46 | print "usage: makechap.py filename num" |
| 47 | sys.exit(1) |
| 48 | |
| 49 | filename = sys.argv[1] |
| 50 | filenamebase = string.split(filename,".")[0] |
| 51 | num = int(sys.argv[2]) |
| 52 | |
| 53 | section = 0 |
| 54 | subsection = 0 |
| 55 | subsubsection = 0 |
| 56 | nameindex = 0 |
| 57 | |
| 58 | name = "" |
| 59 | |
| 60 | # Regexs for <h1>,... <h5> sections |
| 61 | |
| 62 | h1 = re.compile(r".*?<H1>(<a.*a>)*[\d\.\s]*(.*?)</H1>", re.IGNORECASE) |
| 63 | h2 = re.compile(r".*?<H2>(<a.*a>)*[\d\.\s]*(.*?)</H2>", re.IGNORECASE) |
| 64 | h3 = re.compile(r".*?<H3>(<a.*a>)*[\d\.\s]*(.*?)</H3>", re.IGNORECASE) |
| 65 | h4 = re.compile(r".*?<H4>(<a.*a>)*[\d\.\s]*(.*?)</H4>", re.IGNORECASE) |
| 66 | h5 = re.compile(r".*?<H5>(<a.*a>)*[\d\.\s]*(.*?)</H5>", re.IGNORECASE) |
| 67 | |
| 68 | data = open(filename).read() # Read data |
| 69 | open(filename+".bak","w").write(data) # Make backup |
| 70 | |
| 71 | lines = data.splitlines() |
| 72 | result = [ ] # This is the result of postprocessing the file |
| 73 | index = "<!-- INDEX -->\n<div class=\"sectiontoc\">\n" # index contains the index for adding at the top of the file. Also printed to stdout. |
| 74 | |
| 75 | skip = 0 |
| 76 | skipspace = 0 |
| 77 | |
| 78 | for s in lines: |
| 79 | if s == "<!-- INDEX -->": |
| 80 | if not skip: |
| 81 | skip = 1 |
| 82 | else: |
| 83 | skip = 0 |
| 84 | continue; |
| 85 | if skip: |
| 86 | continue |
| 87 | |
| 88 | if not s and skipspace: |
| 89 | continue |
| 90 | |
| 91 | if skipspace: |
| 92 | result.append("") |
| 93 | result.append("") |
| 94 | skipspace = 0 |
| 95 | |
| 96 | m = h1.match(s) |
| 97 | if m: |
| 98 | prevheadingtext = m.group(2) |
| 99 | nameindex += 1 |
| 100 | headingname = getheadingname(m) |
| 101 | result.append("""<H1><a name="%s"></a>%d %s</H1>""" % (headingname,num,prevheadingtext)) |
| 102 | result.append("@INDEX@") |
| 103 | section = 0 |
| 104 | subsection = 0 |
| 105 | subsubsection = 0 |
| 106 | subsubsubsection = 0 |
| 107 | name = prevheadingtext |
| 108 | skipspace = 1 |
| 109 | continue |
| 110 | m = h2.match(s) |
| 111 | if m: |
| 112 | prevheadingtext = m.group(2) |
| 113 | nameindex += 1 |
| 114 | section += 1 |
| 115 | headingname = getheadingname(m) |
| 116 | result.append("""<H2><a name="%s"></a>%d.%d %s</H2>""" % (headingname,num,section, prevheadingtext)) |
| 117 | |
| 118 | if subsubsubsection: |
| 119 | index += "</ul>\n" |
| 120 | if subsubsection: |
| 121 | index += "</ul>\n" |
| 122 | if subsection: |
| 123 | index += "</ul>\n" |
| 124 | if section == 1: |
| 125 | index += "<ul>\n" |
| 126 | |
| 127 | index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) |
| 128 | subsection = 0 |
| 129 | subsubsection = 0 |
| 130 | subsubsubsection = 0 |
| 131 | skipspace = 1 |
| 132 | continue |
| 133 | m = h3.match(s) |
| 134 | if m: |
| 135 | prevheadingtext = m.group(2) |
| 136 | nameindex += 1 |
| 137 | subsection += 1 |
| 138 | headingname = getheadingname(m) |
| 139 | result.append("""<H3><a name="%s"></a>%d.%d.%d %s</H3>""" % (headingname,num,section, subsection, prevheadingtext)) |
| 140 | |
| 141 | if subsubsubsection: |
| 142 | index += "</ul>\n" |
| 143 | if subsubsection: |
| 144 | index += "</ul>\n" |
| 145 | if subsection == 1: |
| 146 | index += "<ul>\n" |
| 147 | |
| 148 | index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) |
| 149 | subsubsection = 0 |
| 150 | skipspace = 1 |
| 151 | continue |
| 152 | m = h4.match(s) |
| 153 | if m: |
| 154 | prevheadingtext = m.group(2) |
| 155 | nameindex += 1 |
| 156 | subsubsection += 1 |
| 157 | subsubsubsection = 0 |
| 158 | headingname = getheadingname(m) |
| 159 | result.append("""<H4><a name="%s"></a>%d.%d.%d.%d %s</H4>""" % (headingname,num,section, subsection, subsubsection, prevheadingtext)) |
| 160 | |
| 161 | if subsubsubsection: |
| 162 | index += "</ul>\n" |
| 163 | if subsubsection == 1: |
| 164 | index += "<ul>\n" |
| 165 | |
| 166 | index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) |
| 167 | skipspace = 1 |
| 168 | continue |
| 169 | m = h5.match(s) |
| 170 | if m: |
| 171 | prevheadingtext = m.group(2) |
| 172 | nameindex += 1 |
| 173 | subsubsubsection += 1 |
| 174 | headingname = getheadingname(m) |
| 175 | result.append("""<H5><a name="%s"></a>%d.%d.%d.%d.%d %s</H5>""" % (headingname,num,section, subsection, subsubsection, subsubsubsection, prevheadingtext)) |
| 176 | |
| 177 | if subsubsubsection == 1: |
| 178 | index += "<ul>\n" |
| 179 | |
| 180 | index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) |
| 181 | skipspace = 1 |
| 182 | continue |
| 183 | |
| 184 | result.append(s) |
| 185 | |
| 186 | if subsubsubsection: |
| 187 | index += "</ul>\n" |
| 188 | |
| 189 | if subsubsection: |
| 190 | index += "</ul>\n" |
| 191 | |
| 192 | if subsection: |
| 193 | index += "</ul>\n" |
| 194 | |
| 195 | if section: |
| 196 | index += "</ul>\n" |
| 197 | |
| 198 | index += "</div>\n<!-- INDEX -->\n" |
| 199 | |
| 200 | data = "\n".join(result) |
| 201 | |
| 202 | data = data.replace("@INDEX@",index) + "\n"; |
| 203 | |
| 204 | # Write the file back out |
| 205 | open(filename,"w").write(data) |
| 206 | |
| 207 | # Print the TOC data to stdout correcting the anchor links for external referencing |
| 208 | |
| 209 | index = index.replace("<li><a href=\"#","<li><a href=\"%s#" % filename) |
| 210 | print """<h3><a href="%s#%s">%d %s</a></h3>\n""" % (filename,filenamebase,num,name) |
| 211 | print index |
| 212 | |