| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> |
| 2 | <html> |
| 3 | <head> |
| 4 | <link rel="STYLESHEET" href="ref.css" type='text/css' /> |
| 5 | <link rel="SHORTCUT ICON" href="../icons/pyfav.png" type="image/png" /> |
| 6 | <link rel='start' href='../index.html' title='Python Documentation Index' /> |
| 7 | <link rel="first" href="ref.html" title='Python Reference Manual' /> |
| 8 | <link rel='contents' href='contents.html' title="Contents" /> |
| 9 | <link rel='index' href='genindex.html' title='Index' /> |
| 10 | <link rel='last' href='about.html' title='About this document...' /> |
| 11 | <link rel='help' href='about.html' title='About this document...' /> |
| 12 | <link rel="next" href="string-catenation.html" /> |
| 13 | <link rel="prev" href="literals.html" /> |
| 14 | <link rel="parent" href="literals.html" /> |
| 15 | <link rel="next" href="string-catenation.html" /> |
| 16 | <meta name='aesop' content='information' /> |
| 17 | <title>2.4.1 String literals</title> |
| 18 | </head> |
| 19 | <body> |
| 20 | <DIV CLASS="navigation"> |
| 21 | <div id='top-navigation-panel' xml:id='top-navigation-panel'> |
| 22 | <table align="center" width="100%" cellpadding="0" cellspacing="2"> |
| 23 | <tr> |
| 24 | <td class='online-navigation'><a rel="prev" title="2.4 Literals" |
| 25 | href="literals.html"><img src='../icons/previous.png' |
| 26 | border='0' height='32' alt='Previous Page' width='32' /></A></td> |
| 27 | <td class='online-navigation'><a rel="parent" title="2.4 Literals" |
| 28 | href="literals.html"><img src='../icons/up.png' |
| 29 | border='0' height='32' alt='Up One Level' width='32' /></A></td> |
| 30 | <td class='online-navigation'><a rel="next" title="2.4.2 String literal concatenation" |
| 31 | href="string-catenation.html"><img src='../icons/next.png' |
| 32 | border='0' height='32' alt='Next Page' width='32' /></A></td> |
| 33 | <td align="center" width="100%">Python Reference Manual</td> |
| 34 | <td class='online-navigation'><a rel="contents" title="Table of Contents" |
| 35 | href="contents.html"><img src='../icons/contents.png' |
| 36 | border='0' height='32' alt='Contents' width='32' /></A></td> |
| 37 | <td class='online-navigation'><img src='../icons/blank.png' |
| 38 | border='0' height='32' alt='' width='32' /></td> |
| 39 | <td class='online-navigation'><a rel="index" title="Index" |
| 40 | href="genindex.html"><img src='../icons/index.png' |
| 41 | border='0' height='32' alt='Index' width='32' /></A></td> |
| 42 | </tr></table> |
| 43 | <div class='online-navigation'> |
| 44 | <b class="navlabel">Previous:</b> |
| 45 | <a class="sectref" rel="prev" href="literals.html">2.4 Literals</A> |
| 46 | <b class="navlabel">Up:</b> |
| 47 | <a class="sectref" rel="parent" href="literals.html">2.4 Literals</A> |
| 48 | <b class="navlabel">Next:</b> |
| 49 | <a class="sectref" rel="next" href="string-catenation.html">2.4.2 String literal concatenation</A> |
| 50 | </div> |
| 51 | <hr /></div> |
| 52 | </DIV> |
| 53 | <!--End of Navigation Panel--> |
| 54 | |
| 55 | <H2><A NAME="SECTION004410000000000000000"></A><A NAME="strings"></A><a id='l2h-13' xml:id='l2h-13'></a> |
| 56 | <BR> |
| 57 | 2.4.1 String literals |
| 58 | </H2> |
| 59 | |
| 60 | <P> |
| 61 | String literals are described by the following lexical definitions: |
| 62 | |
| 63 | <P> |
| 64 | |
| 65 | <dl><dd class="grammar"> |
| 66 | <div class="productions"> |
| 67 | <table> |
| 68 | <tr> |
| 69 | <td><a id='tok-stringliteral' xml:id='tok-stringliteral'>stringliteral</a></td> |
| 70 | <td>::=</td> |
| 71 | <td>[<a class='grammartoken' href="strings.html#tok-stringprefix">stringprefix</a>](<a class='grammartoken' href="strings.html#tok-shortstring">shortstring</a> | <a class='grammartoken' href="strings.html#tok-longstring">longstring</a>)</td></tr> |
| 72 | <tr> |
| 73 | <td><a id='tok-stringprefix' xml:id='tok-stringprefix'>stringprefix</a></td> |
| 74 | <td>::=</td> |
| 75 | <td>"r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR"</td></tr> |
| 76 | <tr> |
| 77 | <td><a id='tok-shortstring' xml:id='tok-shortstring'>shortstring</a></td> |
| 78 | <td>::=</td> |
| 79 | <td>"'" <a class='grammartoken' href="strings.html#tok-shortstringitem">shortstringitem</a>* "'" |
| 80 | | '"' <a class='grammartoken' href="strings.html#tok-shortstringitem">shortstringitem</a>* '"'</td></tr> |
| 81 | <tr> |
| 82 | <td><a id='tok-longstring' xml:id='tok-longstring'>longstring</a></td> |
| 83 | <td>::=</td> |
| 84 | <td>"'''" <a class='grammartoken' href="strings.html#tok-longstringitem">longstringitem</a>* "'''"</td></tr> |
| 85 | <tr> |
| 86 | <td></td> |
| 87 | <td></td> |
| 88 | <td><code>| '"""' <a class='grammartoken' href="strings.html#tok-longstringitem">longstringitem</a>* '"""'</code></td></tr> |
| 89 | <tr> |
| 90 | <td><a id='tok-shortstringitem' xml:id='tok-shortstringitem'>shortstringitem</a></td> |
| 91 | <td>::=</td> |
| 92 | <td><a class='grammartoken' href="strings.html#tok-shortstringchar">shortstringchar</a> | <a class='grammartoken' href="strings.html#tok-escapeseq">escapeseq</a></td></tr> |
| 93 | <tr> |
| 94 | <td><a id='tok-longstringitem' xml:id='tok-longstringitem'>longstringitem</a></td> |
| 95 | <td>::=</td> |
| 96 | <td><a class='grammartoken' href="strings.html#tok-longstringchar">longstringchar</a> | <a class='grammartoken' href="strings.html#tok-escapeseq">escapeseq</a></td></tr> |
| 97 | <tr> |
| 98 | <td><a id='tok-shortstringchar' xml:id='tok-shortstringchar'>shortstringchar</a></td> |
| 99 | <td>::=</td> |
| 100 | <td><any source character except "\" or newline or the quote></td></tr> |
| 101 | <tr> |
| 102 | <td><a id='tok-longstringchar' xml:id='tok-longstringchar'>longstringchar</a></td> |
| 103 | <td>::=</td> |
| 104 | <td><any source character except "\"></td></tr> |
| 105 | <tr> |
| 106 | <td><a id='tok-escapeseq' xml:id='tok-escapeseq'>escapeseq</a></td> |
| 107 | <td>::=</td> |
| 108 | <td>"\" <any ASCII character></td></tr> |
| 109 | </table> |
| 110 | </div> |
| 111 | <a class="grammar-footer" |
| 112 | href="grammar.txt" type="text/plain" |
| 113 | >Download entire grammar as text.</a> |
| 114 | </dd></dl> |
| 115 | |
| 116 | <P> |
| 117 | One syntactic restriction not indicated by these productions is that |
| 118 | whitespace is not allowed between the <a class='grammartoken' href="strings.html#tok-stringprefix">stringprefix</a> and |
| 119 | the rest of the string literal. The source character set is defined |
| 120 | by the encoding declaration; it is ASCII if no encoding declaration |
| 121 | is given in the source file; see section <A href="encodings.html#encodings">2.1.4</A>. |
| 122 | |
| 123 | <P> |
| 124 | In plain English: String literals can be enclosed in matching single |
| 125 | quotes (<code>'</code>) or double quotes (<code>"</code>). They can also be |
| 126 | enclosed in matching groups of three single or double quotes (these |
| 127 | are generally referred to as <em>triple-quoted strings</em>). The |
| 128 | backslash (<code>\</code>) character is used to escape characters that |
| 129 | otherwise have a special meaning, such as newline, backslash itself, |
| 130 | or the quote character. String literals may optionally be prefixed |
| 131 | with a letter "<tt class="character">r</tt>" or "<tt class="character">R</tt>"; such strings are called |
| 132 | <i class="dfn">raw strings</i><a id='l2h-14' xml:id='l2h-14'></a> and use different rules for interpreting |
| 133 | backslash escape sequences. A prefix of "<tt class="character">u</tt>" or "<tt class="character">U</tt>" |
| 134 | makes the string a Unicode string. Unicode strings use the Unicode character |
| 135 | set as defined by the Unicode Consortium and ISO 10646. Some additional |
| 136 | escape sequences, described below, are available in Unicode strings. |
| 137 | The two prefix characters may be combined; in this case, "<tt class="character">u</tt>" must |
| 138 | appear before "<tt class="character">r</tt>". |
| 139 | |
| 140 | <P> |
| 141 | In triple-quoted strings, |
| 142 | unescaped newlines and quotes are allowed (and are retained), except |
| 143 | that three unescaped quotes in a row terminate the string. (A |
| 144 | ``quote'' is the character used to open the string, i.e. either |
| 145 | <code>'</code> or <code>"</code>.) |
| 146 | |
| 147 | <P> |
| 148 | Unless an "<tt class="character">r</tt>" or "<tt class="character">R</tt>" prefix is present, escape |
| 149 | sequences in strings are interpreted according to rules similar |
| 150 | to those used by Standard C. The recognized escape sequences are: |
| 151 | |
| 152 | <P> |
| 153 | <div class="center"><table class="realtable"> |
| 154 | <thead> |
| 155 | <tr> |
| 156 | <th class="left" >Escape Sequence</th> |
| 157 | <th class="left" >Meaning</th> |
| 158 | <th class="center">Notes</th> |
| 159 | </tr> |
| 160 | </thead> |
| 161 | <tbody> |
| 162 | <tr><td class="left" valign="baseline"><code>\<var>newline</var></code></td> |
| 163 | <td class="left" >Ignored</td> |
| 164 | <td class="center"></td></tr> |
| 165 | <tr><td class="left" valign="baseline"><code>\\</code></td> |
| 166 | <td class="left" >Backslash (<code>\</code>)</td> |
| 167 | <td class="center"></td></tr> |
| 168 | <tr><td class="left" valign="baseline"><code>\'</code></td> |
| 169 | <td class="left" >Single quote (<code>'</code>)</td> |
| 170 | <td class="center"></td></tr> |
| 171 | <tr><td class="left" valign="baseline"><code>\"</code></td> |
| 172 | <td class="left" >Double quote (<code>"</code>)</td> |
| 173 | <td class="center"></td></tr> |
| 174 | <tr><td class="left" valign="baseline"><code>\a</code></td> |
| 175 | <td class="left" >ASCII Bell (BEL)</td> |
| 176 | <td class="center"></td></tr> |
| 177 | <tr><td class="left" valign="baseline"><code>\b</code></td> |
| 178 | <td class="left" >ASCII Backspace (BS)</td> |
| 179 | <td class="center"></td></tr> |
| 180 | <tr><td class="left" valign="baseline"><code>\f</code></td> |
| 181 | <td class="left" >ASCII Formfeed (FF)</td> |
| 182 | <td class="center"></td></tr> |
| 183 | <tr><td class="left" valign="baseline"><code>\n</code></td> |
| 184 | <td class="left" >ASCII Linefeed (LF)</td> |
| 185 | <td class="center"></td></tr> |
| 186 | <tr><td class="left" valign="baseline"><code>\N{<var>name</var>}</code></td> |
| 187 | <td class="left" >Character named <var>name</var> in the Unicode database (Unicode only)</td> |
| 188 | <td class="center"></td></tr> |
| 189 | <tr><td class="left" valign="baseline"><code>\r</code></td> |
| 190 | <td class="left" >ASCII Carriage Return (CR)</td> |
| 191 | <td class="center"></td></tr> |
| 192 | <tr><td class="left" valign="baseline"><code>\t</code></td> |
| 193 | <td class="left" >ASCII Horizontal Tab (TAB)</td> |
| 194 | <td class="center"></td></tr> |
| 195 | <tr><td class="left" valign="baseline"><code>\u<var>xxxx</var></code></td> |
| 196 | <td class="left" >Character with 16-bit hex value <var>xxxx</var> (Unicode only)</td> |
| 197 | <td class="center">(1)</td></tr> |
| 198 | <tr><td class="left" valign="baseline"><code>\U<var>xxxxxxxx</var></code></td> |
| 199 | <td class="left" >Character with 32-bit hex value <var>xxxxxxxx</var> (Unicode only)</td> |
| 200 | <td class="center">(2)</td></tr> |
| 201 | <tr><td class="left" valign="baseline"><code>\v</code></td> |
| 202 | <td class="left" >ASCII Vertical Tab (VT)</td> |
| 203 | <td class="center"></td></tr> |
| 204 | <tr><td class="left" valign="baseline"><code>\<var>ooo</var></code></td> |
| 205 | <td class="left" >Character with octal value <var>ooo</var></td> |
| 206 | <td class="center">(3,5)</td></tr> |
| 207 | <tr><td class="left" valign="baseline"><code>\x<var>hh</var></code></td> |
| 208 | <td class="left" >Character with hex value <var>hh</var></td> |
| 209 | <td class="center">(4,5)</td></tr></tbody> |
| 210 | </table></div> |
| 211 | |
| 212 | <P> |
| 213 | Notes: |
| 214 | |
| 215 | <P> |
| 216 | <DL COMPACT> |
| 217 | <DT>(1)</DT> |
| 218 | <DD>Individual code units which form parts of a surrogate pair can be |
| 219 | encoded using this escape sequence. |
| 220 | </DD> |
| 221 | <DT>(2)</DT> |
| 222 | <DD>Any Unicode character can be encoded this way, but characters |
| 223 | outside the Basic Multilingual Plane (BMP) will be encoded using a |
| 224 | surrogate pair if Python is compiled to use 16-bit code units (the |
| 225 | default). Individual code units which form parts of a surrogate |
| 226 | pair can be encoded using this escape sequence. |
| 227 | </DD> |
| 228 | <DT>(3)</DT> |
| 229 | <DD>As in Standard C, up to three octal digits are accepted. |
| 230 | </DD> |
| 231 | <DT>(4)</DT> |
| 232 | <DD>Unlike in Standard C, at most two hex digits are accepted. |
| 233 | </DD> |
| 234 | <DT>(5)</DT> |
| 235 | <DD>In a string literal, hexadecimal and octal escapes denote the |
| 236 | byte with the given value; it is not necessary that the byte |
| 237 | encodes a character in the source character set. In a Unicode |
| 238 | literal, these escapes denote a Unicode character with the given |
| 239 | value. |
| 240 | </DD> |
| 241 | </DL> |
| 242 | |
| 243 | <P> |
| 244 | Unlike Standard <a id='l2h-15' xml:id='l2h-15'></a>C, |
| 245 | all unrecognized escape sequences are left in the string unchanged, |
| 246 | i.e., <em>the backslash is left in the string</em>. (This behavior is |
| 247 | useful when debugging: if an escape sequence is mistyped, the |
| 248 | resulting output is more easily recognized as broken.) It is also |
| 249 | important to note that the escape sequences marked as ``(Unicode |
| 250 | only)'' in the table above fall into the category of unrecognized |
| 251 | escapes for non-Unicode string literals. |
| 252 | |
| 253 | <P> |
| 254 | When an "<tt class="character">r</tt>" or "<tt class="character">R</tt>" prefix is present, a character |
| 255 | following a backslash is included in the string without change, and <em>all |
| 256 | backslashes are left in the string</em>. For example, the string literal |
| 257 | <code>r"\n"</code> consists of two characters: a backslash and a lowercase |
| 258 | "<tt class="character">n</tt>". String quotes can be escaped with a backslash, but the |
| 259 | backslash remains in the string; for example, <code>r"\""</code> is a valid string |
| 260 | literal consisting of two characters: a backslash and a double quote; |
| 261 | <code>r"\"</code> is not a valid string literal (even a raw string cannot |
| 262 | end in an odd number of backslashes). Specifically, <em>a raw |
| 263 | string cannot end in a single backslash</em> (since the backslash would |
| 264 | escape the following quote character). Note also that a single |
| 265 | backslash followed by a newline is interpreted as those two characters |
| 266 | as part of the string, <em>not</em> as a line continuation. |
| 267 | |
| 268 | <P> |
| 269 | When an "<tt class="character">r</tt>" or "<tt class="character">R</tt>" prefix is used in conjunction |
| 270 | with a "<tt class="character">u</tt>" or "<tt class="character">U</tt>" prefix, then the <code>\uXXXX</code> |
| 271 | escape sequence is processed while <em>all other backslashes are |
| 272 | left in the string</em>. For example, the string literal |
| 273 | <code>ur"\u0062\n"</code> consists of three Unicode characters: `LATIN |
| 274 | SMALL LETTER B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'. |
| 275 | Backslashes can be escaped with a preceding backslash; however, both |
| 276 | remain in the string. As a result, <code>\uXXXX</code> escape sequences |
| 277 | are only recognized when there are an odd number of backslashes. |
| 278 | |
| 279 | <P> |
| 280 | |
| 281 | <DIV CLASS="navigation"> |
| 282 | <div class='online-navigation'> |
| 283 | <p></p><hr /> |
| 284 | <table align="center" width="100%" cellpadding="0" cellspacing="2"> |
| 285 | <tr> |
| 286 | <td class='online-navigation'><a rel="prev" title="2.4 Literals" |
| 287 | href="literals.html"><img src='../icons/previous.png' |
| 288 | border='0' height='32' alt='Previous Page' width='32' /></A></td> |
| 289 | <td class='online-navigation'><a rel="parent" title="2.4 Literals" |
| 290 | href="literals.html"><img src='../icons/up.png' |
| 291 | border='0' height='32' alt='Up One Level' width='32' /></A></td> |
| 292 | <td class='online-navigation'><a rel="next" title="2.4.2 String literal concatenation" |
| 293 | href="string-catenation.html"><img src='../icons/next.png' |
| 294 | border='0' height='32' alt='Next Page' width='32' /></A></td> |
| 295 | <td align="center" width="100%">Python Reference Manual</td> |
| 296 | <td class='online-navigation'><a rel="contents" title="Table of Contents" |
| 297 | href="contents.html"><img src='../icons/contents.png' |
| 298 | border='0' height='32' alt='Contents' width='32' /></A></td> |
| 299 | <td class='online-navigation'><img src='../icons/blank.png' |
| 300 | border='0' height='32' alt='' width='32' /></td> |
| 301 | <td class='online-navigation'><a rel="index" title="Index" |
| 302 | href="genindex.html"><img src='../icons/index.png' |
| 303 | border='0' height='32' alt='Index' width='32' /></A></td> |
| 304 | </tr></table> |
| 305 | <div class='online-navigation'> |
| 306 | <b class="navlabel">Previous:</b> |
| 307 | <a class="sectref" rel="prev" href="literals.html">2.4 Literals</A> |
| 308 | <b class="navlabel">Up:</b> |
| 309 | <a class="sectref" rel="parent" href="literals.html">2.4 Literals</A> |
| 310 | <b class="navlabel">Next:</b> |
| 311 | <a class="sectref" rel="next" href="string-catenation.html">2.4.2 String literal concatenation</A> |
| 312 | </div> |
| 313 | </div> |
| 314 | <hr /> |
| 315 | <span class="release-info">Release 2.4.2, documentation updated on 28 September 2005.</span> |
| 316 | </DIV> |
| 317 | <!--End of Navigation Panel--> |
| 318 | <ADDRESS> |
| 319 | See <i><a href="about.html">About this document...</a></i> for information on suggesting changes. |
| 320 | </ADDRESS> |
| 321 | </BODY> |
| 322 | </HTML> |