| 1 | <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [ |
| 2 | |
| 3 | <!-- Process this file with docbook-to-man to generate an nroff manual |
| 4 | page: `docbook-to-man manpage.sgml > manpage.1'. You may view |
| 5 | the manual page with: `docbook-to-man manpage.sgml | nroff -man | |
| 6 | less'. A typical entry in a Makefile or Makefile.am is: |
| 7 | |
| 8 | manpage.1: manpage.sgml |
| 9 | docbook-to-man $< > $@ |
| 10 | --> |
| 11 | |
| 12 | <!-- Fill in your name for FIRSTNAME and SURNAME. --> |
| 13 | <!ENTITY dhfirstname "<firstname>Scott</firstname>"> |
| 14 | <!ENTITY dhsurname "<surname>Bronson</surname>"> |
| 15 | <!-- Please adjust the date whenever revising the manpage. --> |
| 16 | <!ENTITY dhdate "<date>December 5, 2001</date>"> |
| 17 | <!-- SECTION should be 1-8, maybe w/ subsection other parameters are |
| 18 | allowed: see man(7), man(1). --> |
| 19 | <!ENTITY dhsection "<manvolnum>1</manvolnum>"> |
| 20 | <!ENTITY dhemail "<email>bronson@rinspin.com</email>"> |
| 21 | <!ENTITY dhusername "Scott Bronson"> |
| 22 | <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>"> |
| 23 | <!ENTITY dhpackage "xmlwf"> |
| 24 | |
| 25 | <!ENTITY debian "<productname>Debian GNU/Linux</productname>"> |
| 26 | <!ENTITY gnu "<acronym>GNU</acronym>"> |
| 27 | ]> |
| 28 | |
| 29 | <refentry> |
| 30 | <refentryinfo> |
| 31 | <address> |
| 32 | &dhemail; |
| 33 | </address> |
| 34 | <author> |
| 35 | &dhfirstname; |
| 36 | &dhsurname; |
| 37 | </author> |
| 38 | <copyright> |
| 39 | <year>2001</year> |
| 40 | <holder>&dhusername;</holder> |
| 41 | </copyright> |
| 42 | &dhdate; |
| 43 | </refentryinfo> |
| 44 | <refmeta> |
| 45 | &dhucpackage; |
| 46 | |
| 47 | &dhsection; |
| 48 | </refmeta> |
| 49 | <refnamediv> |
| 50 | <refname>&dhpackage;</refname> |
| 51 | |
| 52 | <refpurpose>Determines if an XML document is well-formed</refpurpose> |
| 53 | </refnamediv> |
| 54 | <refsynopsisdiv> |
| 55 | <cmdsynopsis> |
| 56 | <command>&dhpackage;</command> |
| 57 | <arg><option>-s</option></arg> |
| 58 | <arg><option>-n</option></arg> |
| 59 | <arg><option>-p</option></arg> |
| 60 | <arg><option>-x</option></arg> |
| 61 | |
| 62 | <arg><option>-e <replaceable>encoding</replaceable></option></arg> |
| 63 | <arg><option>-w</option></arg> |
| 64 | |
| 65 | <arg><option>-d <replaceable>output-dir</replaceable></option></arg> |
| 66 | <arg><option>-c</option></arg> |
| 67 | <arg><option>-m</option></arg> |
| 68 | |
| 69 | <arg><option>-r</option></arg> |
| 70 | <arg><option>-t</option></arg> |
| 71 | |
| 72 | <arg><option>-v</option></arg> |
| 73 | |
| 74 | <arg>file ...</arg> |
| 75 | </cmdsynopsis> |
| 76 | </refsynopsisdiv> |
| 77 | |
| 78 | <refsect1> |
| 79 | <title>DESCRIPTION</title> |
| 80 | |
| 81 | <para> |
| 82 | <command>&dhpackage;</command> uses the Expat library to |
| 83 | determine if an XML document is well-formed. It is |
| 84 | non-validating. |
| 85 | </para> |
| 86 | |
| 87 | <para> |
| 88 | If you do not specify any files on the command-line, and you |
| 89 | have a recent version of <command>&dhpackage;</command>, the |
| 90 | input file will be read from standard input. |
| 91 | </para> |
| 92 | |
| 93 | </refsect1> |
| 94 | |
| 95 | <refsect1> |
| 96 | <title>WELL-FORMED DOCUMENTS</title> |
| 97 | |
| 98 | <para> |
| 99 | A well-formed document must adhere to the |
| 100 | following rules: |
| 101 | </para> |
| 102 | |
| 103 | <itemizedlist> |
| 104 | <listitem><para> |
| 105 | The file begins with an XML declaration. For instance, |
| 106 | <literal><?xml version="1.0" standalone="yes"?></literal>. |
| 107 | <emphasis>NOTE:</emphasis> |
| 108 | <command>&dhpackage;</command> does not currently |
| 109 | check for a valid XML declaration. |
| 110 | </para></listitem> |
| 111 | <listitem><para> |
| 112 | Every start tag is either empty (<tag/>) |
| 113 | or has a corresponding end tag. |
| 114 | </para></listitem> |
| 115 | <listitem><para> |
| 116 | There is exactly one root element. This element must contain |
| 117 | all other elements in the document. Only comments, white |
| 118 | space, and processing instructions may come after the close |
| 119 | of the root element. |
| 120 | </para></listitem> |
| 121 | <listitem><para> |
| 122 | All elements nest properly. |
| 123 | </para></listitem> |
| 124 | <listitem><para> |
| 125 | All attribute values are enclosed in quotes (either single |
| 126 | or double). |
| 127 | </para></listitem> |
| 128 | </itemizedlist> |
| 129 | |
| 130 | <para> |
| 131 | If the document has a DTD, and it strictly complies with that |
| 132 | DTD, then the document is also considered <emphasis>valid</emphasis>. |
| 133 | <command>&dhpackage;</command> is a non-validating parser -- |
| 134 | it does not check the DTD. However, it does support |
| 135 | external entities (see the <option>-x</option> option). |
| 136 | </para> |
| 137 | </refsect1> |
| 138 | |
| 139 | <refsect1> |
| 140 | <title>OPTIONS</title> |
| 141 | |
| 142 | <para> |
| 143 | When an option includes an argument, you may specify the argument either |
| 144 | separately ("<option>-d</option> output") or concatenated with the |
| 145 | option ("<option>-d</option>output"). <command>&dhpackage;</command> |
| 146 | supports both. |
| 147 | </para> |
| 148 | |
| 149 | <variablelist> |
| 150 | |
| 151 | <varlistentry> |
| 152 | <term><option>-c</option></term> |
| 153 | <listitem> |
| 154 | <para> |
| 155 | If the input file is well-formed and <command>&dhpackage;</command> |
| 156 | doesn't encounter any errors, the input file is simply copied to |
| 157 | the output directory unchanged. |
| 158 | This implies no namespaces (turns off <option>-n</option>) and |
| 159 | requires <option>-d</option> to specify an output file. |
| 160 | </para> |
| 161 | </listitem> |
| 162 | </varlistentry> |
| 163 | |
| 164 | <varlistentry> |
| 165 | <term><option>-d output-dir</option></term> |
| 166 | <listitem> |
| 167 | <para> |
| 168 | Specifies a directory to contain transformed |
| 169 | representations of the input files. |
| 170 | By default, <option>-d</option> outputs a canonical representation |
| 171 | (described below). |
| 172 | You can select different output formats using <option>-c</option> |
| 173 | and <option>-m</option>. |
| 174 | </para> |
| 175 | <para> |
| 176 | The output filenames will |
| 177 | be exactly the same as the input filenames or "STDIN" if the input is |
| 178 | coming from standard input. Therefore, you must be careful that the |
| 179 | output file does not go into the same directory as the input |
| 180 | file. Otherwise, <command>&dhpackage;</command> will delete the |
| 181 | input file before it generates the output file (just like running |
| 182 | <literal>cat < file > file</literal> in most shells). |
| 183 | </para> |
| 184 | <para> |
| 185 | Two structurally equivalent XML documents have a byte-for-byte |
| 186 | identical canonical XML representation. |
| 187 | Note that ignorable white space is considered significant and |
| 188 | is treated equivalently to data. |
| 189 | More on canonical XML can be found at |
| 190 | http://www.jclark.com/xml/canonxml.html . |
| 191 | </para> |
| 192 | </listitem> |
| 193 | </varlistentry> |
| 194 | |
| 195 | <varlistentry> |
| 196 | <term><option>-e encoding</option></term> |
| 197 | <listitem> |
| 198 | <para> |
| 199 | Specifies the character encoding for the document, overriding |
| 200 | any document encoding declaration. <command>&dhpackage;</command> |
| 201 | supports four built-in encodings: |
| 202 | <literal>US-ASCII</literal>, |
| 203 | <literal>UTF-8</literal>, |
| 204 | <literal>UTF-16</literal>, and |
| 205 | <literal>ISO-8859-1</literal>. |
| 206 | Also see the <option>-w</option> option. |
| 207 | </para> |
| 208 | </listitem> |
| 209 | </varlistentry> |
| 210 | |
| 211 | <varlistentry> |
| 212 | <term><option>-m</option></term> |
| 213 | <listitem> |
| 214 | <para> |
| 215 | Outputs some strange sort of XML file that completely |
| 216 | describes the the input file, including character postitions. |
| 217 | Requires <option>-d</option> to specify an output file. |
| 218 | </para> |
| 219 | </listitem> |
| 220 | </varlistentry> |
| 221 | |
| 222 | <varlistentry> |
| 223 | <term><option>-n</option></term> |
| 224 | <listitem> |
| 225 | <para> |
| 226 | Turns on namespace processing. (describe namespaces) |
| 227 | <option>-c</option> disables namespaces. |
| 228 | </para> |
| 229 | </listitem> |
| 230 | </varlistentry> |
| 231 | |
| 232 | <varlistentry> |
| 233 | <term><option>-p</option></term> |
| 234 | <listitem> |
| 235 | <para> |
| 236 | Tells xmlwf to process external DTDs and parameter |
| 237 | entities. |
| 238 | </para> |
| 239 | <para> |
| 240 | Normally <command>&dhpackage;</command> never parses parameter |
| 241 | entities. <option>-p</option> tells it to always parse them. |
| 242 | <option>-p</option> implies <option>-x</option>. |
| 243 | </para> |
| 244 | </listitem> |
| 245 | </varlistentry> |
| 246 | |
| 247 | <varlistentry> |
| 248 | <term><option>-r</option></term> |
| 249 | <listitem> |
| 250 | <para> |
| 251 | Normally <command>&dhpackage;</command> memory-maps the XML file |
| 252 | before parsing; this can result in faster parsing on many |
| 253 | platforms. |
| 254 | <option>-r</option> turns off memory-mapping and uses normal file |
| 255 | IO calls instead. |
| 256 | Of course, memory-mapping is automatically turned off |
| 257 | when reading from standard input. |
| 258 | </para> |
| 259 | <para> |
| 260 | Use of memory-mapping can cause some platforms to report |
| 261 | substantially higher memory usage for |
| 262 | <command>&dhpackage;</command>, but this appears to be a matter of |
| 263 | the operating system reporting memory in a strange way; there is |
| 264 | not a leak in <command>&dhpackage;</command>. |
| 265 | </para> |
| 266 | </listitem> |
| 267 | </varlistentry> |
| 268 | |
| 269 | <varlistentry> |
| 270 | <term><option>-s</option></term> |
| 271 | <listitem> |
| 272 | <para> |
| 273 | Prints an error if the document is not standalone. |
| 274 | A document is standalone if it has no external subset and no |
| 275 | references to parameter entities. |
| 276 | </para> |
| 277 | </listitem> |
| 278 | </varlistentry> |
| 279 | |
| 280 | <varlistentry> |
| 281 | <term><option>-t</option></term> |
| 282 | <listitem> |
| 283 | <para> |
| 284 | Turns on timings. This tells Expat to parse the entire file, |
| 285 | but not perform any processing. |
| 286 | This gives a fairly accurate idea of the raw speed of Expat itself |
| 287 | without client overhead. |
| 288 | <option>-t</option> turns off most of the output options |
| 289 | (<option>-d</option>, <option>-m</option>, <option>-c</option>, |
| 290 | ...). |
| 291 | </para> |
| 292 | </listitem> |
| 293 | </varlistentry> |
| 294 | |
| 295 | <varlistentry> |
| 296 | <term><option>-v</option></term> |
| 297 | <listitem> |
| 298 | <para> |
| 299 | Prints the version of the Expat library being used, including some |
| 300 | information on the compile-time configuration of the library, and |
| 301 | then exits. |
| 302 | </para> |
| 303 | </listitem> |
| 304 | </varlistentry> |
| 305 | |
| 306 | <varlistentry> |
| 307 | <term><option>-w</option></term> |
| 308 | <listitem> |
| 309 | <para> |
| 310 | Enables support for Windows code pages. |
| 311 | Normally, <command>&dhpackage;</command> will throw an error if it |
| 312 | runs across an encoding that it is not equipped to handle itself. With |
| 313 | <option>-w</option>, &dhpackage; will try to use a Windows code |
| 314 | page. See also <option>-e</option>. |
| 315 | </para> |
| 316 | </listitem> |
| 317 | </varlistentry> |
| 318 | |
| 319 | <varlistentry> |
| 320 | <term><option>-x</option></term> |
| 321 | <listitem> |
| 322 | <para> |
| 323 | Turns on parsing external entities. |
| 324 | </para> |
| 325 | <para> |
| 326 | Non-validating parsers are not required to resolve external |
| 327 | entities, or even expand entities at all. |
| 328 | Expat always expands internal entities (?), |
| 329 | but external entity parsing must be enabled explicitly. |
| 330 | </para> |
| 331 | <para> |
| 332 | External entities are simply entities that obtain their |
| 333 | data from outside the XML file currently being parsed. |
| 334 | </para> |
| 335 | <para> |
| 336 | This is an example of an internal entity: |
| 337 | <literallayout> |
| 338 | <!ENTITY vers '1.0.2'> |
| 339 | </literallayout> |
| 340 | </para> |
| 341 | <para> |
| 342 | And here are some examples of external entities: |
| 343 | |
| 344 | <literallayout> |
| 345 | <!ENTITY header SYSTEM "header-&vers;.xml"> (parsed) |
| 346 | <!ENTITY logo SYSTEM "logo.png" PNG> (unparsed) |
| 347 | </literallayout> |
| 348 | |
| 349 | </para> |
| 350 | </listitem> |
| 351 | </varlistentry> |
| 352 | |
| 353 | <varlistentry> |
| 354 | <term><option>--</option></term> |
| 355 | <listitem> |
| 356 | <para> |
| 357 | (Two hyphens.) |
| 358 | Terminates the list of options. This is only needed if a filename |
| 359 | starts with a hyphen. For example: |
| 360 | </para> |
| 361 | <literallayout> |
| 362 | &dhpackage; -- -myfile.xml |
| 363 | </literallayout> |
| 364 | <para> |
| 365 | will run <command>&dhpackage;</command> on the file |
| 366 | <filename>-myfile.xml</filename>. |
| 367 | </para> |
| 368 | </listitem> |
| 369 | </varlistentry> |
| 370 | </variablelist> |
| 371 | |
| 372 | <para> |
| 373 | Older versions of <command>&dhpackage;</command> do not support |
| 374 | reading from standard input. |
| 375 | </para> |
| 376 | </refsect1> |
| 377 | |
| 378 | <refsect1> |
| 379 | <title>OUTPUT</title> |
| 380 | <para> |
| 381 | If an input file is not well-formed, |
| 382 | <command>&dhpackage;</command> prints a single line describing |
| 383 | the problem to standard output. If a file is well formed, |
| 384 | <command>&dhpackage;</command> outputs nothing. |
| 385 | Note that the result code is <emphasis>not</emphasis> set. |
| 386 | </para> |
| 387 | </refsect1> |
| 388 | |
| 389 | <refsect1> |
| 390 | <title>BUGS</title> |
| 391 | <para> |
| 392 | According to the W3C standard, an XML file without a |
| 393 | declaration at the beginning is not considered well-formed. |
| 394 | However, <command>&dhpackage;</command> allows this to pass. |
| 395 | </para> |
| 396 | <para> |
| 397 | <command>&dhpackage;</command> returns a 0 - noerr result, |
| 398 | even if the file is not well-formed. There is no good way for |
| 399 | a program to use <command>&dhpackage;</command> to quickly |
| 400 | check a file -- it must parse <command>&dhpackage;</command>'s |
| 401 | standard output. |
| 402 | </para> |
| 403 | <para> |
| 404 | The errors should go to standard error, not standard output. |
| 405 | </para> |
| 406 | <para> |
| 407 | There should be a way to get <option>-d</option> to send its |
| 408 | output to standard output rather than forcing the user to send |
| 409 | it to a file. |
| 410 | </para> |
| 411 | <para> |
| 412 | I have no idea why anyone would want to use the |
| 413 | <option>-d</option>, <option>-c</option>, and |
| 414 | <option>-m</option> options. If someone could explain it to |
| 415 | me, I'd like to add this information to this manpage. |
| 416 | </para> |
| 417 | </refsect1> |
| 418 | |
| 419 | <refsect1> |
| 420 | <title>ALTERNATIVES</title> |
| 421 | <para> |
| 422 | Here are some XML validators on the web: |
| 423 | |
| 424 | <literallayout> |
| 425 | http://www.hcrc.ed.ac.uk/~richard/xml-check.html |
| 426 | http://www.stg.brown.edu/service/xmlvalid/ |
| 427 | http://www.scripting.com/frontier5/xml/code/xmlValidator.html |
| 428 | http://www.xml.com/pub/a/tools/ruwf/check.html |
| 429 | </literallayout> |
| 430 | |
| 431 | </para> |
| 432 | </refsect1> |
| 433 | |
| 434 | <refsect1> |
| 435 | <title>SEE ALSO</title> |
| 436 | <para> |
| 437 | |
| 438 | <literallayout> |
| 439 | The Expat home page: http://www.libexpat.org/ |
| 440 | The W3 XML specification: http://www.w3.org/TR/REC-xml |
| 441 | </literallayout> |
| 442 | |
| 443 | </para> |
| 444 | </refsect1> |
| 445 | |
| 446 | <refsect1> |
| 447 | <title>AUTHOR</title> |
| 448 | <para> |
| 449 | This manual page was written by &dhusername; &dhemail; for |
| 450 | the &debian; system (but may be used by others). Permission is |
| 451 | granted to copy, distribute and/or modify this document under |
| 452 | the terms of the <acronym>GNU</acronym> Free Documentation |
| 453 | License, Version 1.1. |
| 454 | </para> |
| 455 | </refsect1> |
| 456 | </refentry> |
| 457 | |
| 458 | <!-- Keep this comment at the end of the file |
| 459 | Local variables: |
| 460 | mode: sgml |
| 461 | sgml-omittag:t |
| 462 | sgml-shorttag:t |
| 463 | sgml-minimize-attributes:nil |
| 464 | sgml-always-quote-attributes:t |
| 465 | sgml-indent-step:2 |
| 466 | sgml-indent-data:t |
| 467 | sgml-parent-document:nil |
| 468 | sgml-default-dtd-file:nil |
| 469 | sgml-exposed-tags:nil |
| 470 | sgml-local-catalogs:nil |
| 471 | sgml-local-ecat-files:nil |
| 472 | End: |
| 473 | --> |