Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> |
2 | <html> | |
3 | <head> | |
4 | <link rel="STYLESHEET" href="ref.css" type='text/css' /> | |
5 | <link rel="SHORTCUT ICON" href="../icons/pyfav.png" type="image/png" /> | |
6 | <link rel='start' href='../index.html' title='Python Documentation Index' /> | |
7 | <link rel="first" href="ref.html" title='Python Reference Manual' /> | |
8 | <link rel='contents' href='contents.html' title="Contents" /> | |
9 | <link rel='index' href='genindex.html' title='Index' /> | |
10 | <link rel='last' href='about.html' title='About this document...' /> | |
11 | <link rel='help' href='about.html' title='About this document...' /> | |
12 | <link rel="next" href="string-catenation.html" /> | |
13 | <link rel="prev" href="literals.html" /> | |
14 | <link rel="parent" href="literals.html" /> | |
15 | <link rel="next" href="string-catenation.html" /> | |
16 | <meta name='aesop' content='information' /> | |
17 | <title>2.4.1 String literals</title> | |
18 | </head> | |
19 | <body> | |
20 | <DIV CLASS="navigation"> | |
21 | <div id='top-navigation-panel' xml:id='top-navigation-panel'> | |
22 | <table align="center" width="100%" cellpadding="0" cellspacing="2"> | |
23 | <tr> | |
24 | <td class='online-navigation'><a rel="prev" title="2.4 Literals" | |
25 | href="literals.html"><img src='../icons/previous.png' | |
26 | border='0' height='32' alt='Previous Page' width='32' /></A></td> | |
27 | <td class='online-navigation'><a rel="parent" title="2.4 Literals" | |
28 | href="literals.html"><img src='../icons/up.png' | |
29 | border='0' height='32' alt='Up One Level' width='32' /></A></td> | |
30 | <td class='online-navigation'><a rel="next" title="2.4.2 String literal concatenation" | |
31 | href="string-catenation.html"><img src='../icons/next.png' | |
32 | border='0' height='32' alt='Next Page' width='32' /></A></td> | |
33 | <td align="center" width="100%">Python Reference Manual</td> | |
34 | <td class='online-navigation'><a rel="contents" title="Table of Contents" | |
35 | href="contents.html"><img src='../icons/contents.png' | |
36 | border='0' height='32' alt='Contents' width='32' /></A></td> | |
37 | <td class='online-navigation'><img src='../icons/blank.png' | |
38 | border='0' height='32' alt='' width='32' /></td> | |
39 | <td class='online-navigation'><a rel="index" title="Index" | |
40 | href="genindex.html"><img src='../icons/index.png' | |
41 | border='0' height='32' alt='Index' width='32' /></A></td> | |
42 | </tr></table> | |
43 | <div class='online-navigation'> | |
44 | <b class="navlabel">Previous:</b> | |
45 | <a class="sectref" rel="prev" href="literals.html">2.4 Literals</A> | |
46 | <b class="navlabel">Up:</b> | |
47 | <a class="sectref" rel="parent" href="literals.html">2.4 Literals</A> | |
48 | <b class="navlabel">Next:</b> | |
49 | <a class="sectref" rel="next" href="string-catenation.html">2.4.2 String literal concatenation</A> | |
50 | </div> | |
51 | <hr /></div> | |
52 | </DIV> | |
53 | <!--End of Navigation Panel--> | |
54 | ||
55 | <H2><A NAME="SECTION004410000000000000000"></A><A NAME="strings"></A><a id='l2h-13' xml:id='l2h-13'></a> | |
56 | <BR> | |
57 | 2.4.1 String literals | |
58 | </H2> | |
59 | ||
60 | <P> | |
61 | String literals are described by the following lexical definitions: | |
62 | ||
63 | <P> | |
64 | ||
65 | <dl><dd class="grammar"> | |
66 | <div class="productions"> | |
67 | <table> | |
68 | <tr> | |
69 | <td><a id='tok-stringliteral' xml:id='tok-stringliteral'>stringliteral</a></td> | |
70 | <td>::=</td> | |
71 | <td>[<a class='grammartoken' href="strings.html#tok-stringprefix">stringprefix</a>](<a class='grammartoken' href="strings.html#tok-shortstring">shortstring</a> | <a class='grammartoken' href="strings.html#tok-longstring">longstring</a>)</td></tr> | |
72 | <tr> | |
73 | <td><a id='tok-stringprefix' xml:id='tok-stringprefix'>stringprefix</a></td> | |
74 | <td>::=</td> | |
75 | <td>"r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR"</td></tr> | |
76 | <tr> | |
77 | <td><a id='tok-shortstring' xml:id='tok-shortstring'>shortstring</a></td> | |
78 | <td>::=</td> | |
79 | <td>"'" <a class='grammartoken' href="strings.html#tok-shortstringitem">shortstringitem</a>* "'" | |
80 | | '"' <a class='grammartoken' href="strings.html#tok-shortstringitem">shortstringitem</a>* '"'</td></tr> | |
81 | <tr> | |
82 | <td><a id='tok-longstring' xml:id='tok-longstring'>longstring</a></td> | |
83 | <td>::=</td> | |
84 | <td>"'''" <a class='grammartoken' href="strings.html#tok-longstringitem">longstringitem</a>* "'''"</td></tr> | |
85 | <tr> | |
86 | <td></td> | |
87 | <td></td> | |
88 | <td><code>| '"""' <a class='grammartoken' href="strings.html#tok-longstringitem">longstringitem</a>* '"""'</code></td></tr> | |
89 | <tr> | |
90 | <td><a id='tok-shortstringitem' xml:id='tok-shortstringitem'>shortstringitem</a></td> | |
91 | <td>::=</td> | |
92 | <td><a class='grammartoken' href="strings.html#tok-shortstringchar">shortstringchar</a> | <a class='grammartoken' href="strings.html#tok-escapeseq">escapeseq</a></td></tr> | |
93 | <tr> | |
94 | <td><a id='tok-longstringitem' xml:id='tok-longstringitem'>longstringitem</a></td> | |
95 | <td>::=</td> | |
96 | <td><a class='grammartoken' href="strings.html#tok-longstringchar">longstringchar</a> | <a class='grammartoken' href="strings.html#tok-escapeseq">escapeseq</a></td></tr> | |
97 | <tr> | |
98 | <td><a id='tok-shortstringchar' xml:id='tok-shortstringchar'>shortstringchar</a></td> | |
99 | <td>::=</td> | |
100 | <td><any source character except "\" or newline or the quote></td></tr> | |
101 | <tr> | |
102 | <td><a id='tok-longstringchar' xml:id='tok-longstringchar'>longstringchar</a></td> | |
103 | <td>::=</td> | |
104 | <td><any source character except "\"></td></tr> | |
105 | <tr> | |
106 | <td><a id='tok-escapeseq' xml:id='tok-escapeseq'>escapeseq</a></td> | |
107 | <td>::=</td> | |
108 | <td>"\" <any ASCII character></td></tr> | |
109 | </table> | |
110 | </div> | |
111 | <a class="grammar-footer" | |
112 | href="grammar.txt" type="text/plain" | |
113 | >Download entire grammar as text.</a> | |
114 | </dd></dl> | |
115 | ||
116 | <P> | |
117 | One syntactic restriction not indicated by these productions is that | |
118 | whitespace is not allowed between the <a class='grammartoken' href="strings.html#tok-stringprefix">stringprefix</a> and | |
119 | the rest of the string literal. The source character set is defined | |
120 | by the encoding declaration; it is ASCII if no encoding declaration | |
121 | is given in the source file; see section <A href="encodings.html#encodings">2.1.4</A>. | |
122 | ||
123 | <P> | |
124 | In plain English: String literals can be enclosed in matching single | |
125 | quotes (<code>'</code>) or double quotes (<code>"</code>). They can also be | |
126 | enclosed in matching groups of three single or double quotes (these | |
127 | are generally referred to as <em>triple-quoted strings</em>). The | |
128 | backslash (<code>\</code>) character is used to escape characters that | |
129 | otherwise have a special meaning, such as newline, backslash itself, | |
130 | or the quote character. String literals may optionally be prefixed | |
131 | with a letter "<tt class="character">r</tt>" or "<tt class="character">R</tt>"; such strings are called | |
132 | <i class="dfn">raw strings</i><a id='l2h-14' xml:id='l2h-14'></a> and use different rules for interpreting | |
133 | backslash escape sequences. A prefix of "<tt class="character">u</tt>" or "<tt class="character">U</tt>" | |
134 | makes the string a Unicode string. Unicode strings use the Unicode character | |
135 | set as defined by the Unicode Consortium and ISO 10646. Some additional | |
136 | escape sequences, described below, are available in Unicode strings. | |
137 | The two prefix characters may be combined; in this case, "<tt class="character">u</tt>" must | |
138 | appear before "<tt class="character">r</tt>". | |
139 | ||
140 | <P> | |
141 | In triple-quoted strings, | |
142 | unescaped newlines and quotes are allowed (and are retained), except | |
143 | that three unescaped quotes in a row terminate the string. (A | |
144 | ``quote'' is the character used to open the string, i.e. either | |
145 | <code>'</code> or <code>"</code>.) | |
146 | ||
147 | <P> | |
148 | Unless an "<tt class="character">r</tt>" or "<tt class="character">R</tt>" prefix is present, escape | |
149 | sequences in strings are interpreted according to rules similar | |
150 | to those used by Standard C. The recognized escape sequences are: | |
151 | ||
152 | <P> | |
153 | <div class="center"><table class="realtable"> | |
154 | <thead> | |
155 | <tr> | |
156 | <th class="left" >Escape Sequence</th> | |
157 | <th class="left" >Meaning</th> | |
158 | <th class="center">Notes</th> | |
159 | </tr> | |
160 | </thead> | |
161 | <tbody> | |
162 | <tr><td class="left" valign="baseline"><code>\<var>newline</var></code></td> | |
163 | <td class="left" >Ignored</td> | |
164 | <td class="center"></td></tr> | |
165 | <tr><td class="left" valign="baseline"><code>\\</code></td> | |
166 | <td class="left" >Backslash (<code>\</code>)</td> | |
167 | <td class="center"></td></tr> | |
168 | <tr><td class="left" valign="baseline"><code>\'</code></td> | |
169 | <td class="left" >Single quote (<code>'</code>)</td> | |
170 | <td class="center"></td></tr> | |
171 | <tr><td class="left" valign="baseline"><code>\"</code></td> | |
172 | <td class="left" >Double quote (<code>"</code>)</td> | |
173 | <td class="center"></td></tr> | |
174 | <tr><td class="left" valign="baseline"><code>\a</code></td> | |
175 | <td class="left" >ASCII Bell (BEL)</td> | |
176 | <td class="center"></td></tr> | |
177 | <tr><td class="left" valign="baseline"><code>\b</code></td> | |
178 | <td class="left" >ASCII Backspace (BS)</td> | |
179 | <td class="center"></td></tr> | |
180 | <tr><td class="left" valign="baseline"><code>\f</code></td> | |
181 | <td class="left" >ASCII Formfeed (FF)</td> | |
182 | <td class="center"></td></tr> | |
183 | <tr><td class="left" valign="baseline"><code>\n</code></td> | |
184 | <td class="left" >ASCII Linefeed (LF)</td> | |
185 | <td class="center"></td></tr> | |
186 | <tr><td class="left" valign="baseline"><code>\N{<var>name</var>}</code></td> | |
187 | <td class="left" >Character named <var>name</var> in the Unicode database (Unicode only)</td> | |
188 | <td class="center"></td></tr> | |
189 | <tr><td class="left" valign="baseline"><code>\r</code></td> | |
190 | <td class="left" >ASCII Carriage Return (CR)</td> | |
191 | <td class="center"></td></tr> | |
192 | <tr><td class="left" valign="baseline"><code>\t</code></td> | |
193 | <td class="left" >ASCII Horizontal Tab (TAB)</td> | |
194 | <td class="center"></td></tr> | |
195 | <tr><td class="left" valign="baseline"><code>\u<var>xxxx</var></code></td> | |
196 | <td class="left" >Character with 16-bit hex value <var>xxxx</var> (Unicode only)</td> | |
197 | <td class="center">(1)</td></tr> | |
198 | <tr><td class="left" valign="baseline"><code>\U<var>xxxxxxxx</var></code></td> | |
199 | <td class="left" >Character with 32-bit hex value <var>xxxxxxxx</var> (Unicode only)</td> | |
200 | <td class="center">(2)</td></tr> | |
201 | <tr><td class="left" valign="baseline"><code>\v</code></td> | |
202 | <td class="left" >ASCII Vertical Tab (VT)</td> | |
203 | <td class="center"></td></tr> | |
204 | <tr><td class="left" valign="baseline"><code>\<var>ooo</var></code></td> | |
205 | <td class="left" >Character with octal value <var>ooo</var></td> | |
206 | <td class="center">(3,5)</td></tr> | |
207 | <tr><td class="left" valign="baseline"><code>\x<var>hh</var></code></td> | |
208 | <td class="left" >Character with hex value <var>hh</var></td> | |
209 | <td class="center">(4,5)</td></tr></tbody> | |
210 | </table></div> | |
211 | ||
212 | <P> | |
213 | Notes: | |
214 | ||
215 | <P> | |
216 | <DL COMPACT> | |
217 | <DT>(1)</DT> | |
218 | <DD>Individual code units which form parts of a surrogate pair can be | |
219 | encoded using this escape sequence. | |
220 | </DD> | |
221 | <DT>(2)</DT> | |
222 | <DD>Any Unicode character can be encoded this way, but characters | |
223 | outside the Basic Multilingual Plane (BMP) will be encoded using a | |
224 | surrogate pair if Python is compiled to use 16-bit code units (the | |
225 | default). Individual code units which form parts of a surrogate | |
226 | pair can be encoded using this escape sequence. | |
227 | </DD> | |
228 | <DT>(3)</DT> | |
229 | <DD>As in Standard C, up to three octal digits are accepted. | |
230 | </DD> | |
231 | <DT>(4)</DT> | |
232 | <DD>Unlike in Standard C, at most two hex digits are accepted. | |
233 | </DD> | |
234 | <DT>(5)</DT> | |
235 | <DD>In a string literal, hexadecimal and octal escapes denote the | |
236 | byte with the given value; it is not necessary that the byte | |
237 | encodes a character in the source character set. In a Unicode | |
238 | literal, these escapes denote a Unicode character with the given | |
239 | value. | |
240 | </DD> | |
241 | </DL> | |
242 | ||
243 | <P> | |
244 | Unlike Standard <a id='l2h-15' xml:id='l2h-15'></a>C, | |
245 | all unrecognized escape sequences are left in the string unchanged, | |
246 | i.e., <em>the backslash is left in the string</em>. (This behavior is | |
247 | useful when debugging: if an escape sequence is mistyped, the | |
248 | resulting output is more easily recognized as broken.) It is also | |
249 | important to note that the escape sequences marked as ``(Unicode | |
250 | only)'' in the table above fall into the category of unrecognized | |
251 | escapes for non-Unicode string literals. | |
252 | ||
253 | <P> | |
254 | When an "<tt class="character">r</tt>" or "<tt class="character">R</tt>" prefix is present, a character | |
255 | following a backslash is included in the string without change, and <em>all | |
256 | backslashes are left in the string</em>. For example, the string literal | |
257 | <code>r"\n"</code> consists of two characters: a backslash and a lowercase | |
258 | "<tt class="character">n</tt>". String quotes can be escaped with a backslash, but the | |
259 | backslash remains in the string; for example, <code>r"\""</code> is a valid string | |
260 | literal consisting of two characters: a backslash and a double quote; | |
261 | <code>r"\"</code> is not a valid string literal (even a raw string cannot | |
262 | end in an odd number of backslashes). Specifically, <em>a raw | |
263 | string cannot end in a single backslash</em> (since the backslash would | |
264 | escape the following quote character). Note also that a single | |
265 | backslash followed by a newline is interpreted as those two characters | |
266 | as part of the string, <em>not</em> as a line continuation. | |
267 | ||
268 | <P> | |
269 | When an "<tt class="character">r</tt>" or "<tt class="character">R</tt>" prefix is used in conjunction | |
270 | with a "<tt class="character">u</tt>" or "<tt class="character">U</tt>" prefix, then the <code>\uXXXX</code> | |
271 | escape sequence is processed while <em>all other backslashes are | |
272 | left in the string</em>. For example, the string literal | |
273 | <code>ur"\u0062\n"</code> consists of three Unicode characters: `LATIN | |
274 | SMALL LETTER B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'. | |
275 | Backslashes can be escaped with a preceding backslash; however, both | |
276 | remain in the string. As a result, <code>\uXXXX</code> escape sequences | |
277 | are only recognized when there are an odd number of backslashes. | |
278 | ||
279 | <P> | |
280 | ||
281 | <DIV CLASS="navigation"> | |
282 | <div class='online-navigation'> | |
283 | <p></p><hr /> | |
284 | <table align="center" width="100%" cellpadding="0" cellspacing="2"> | |
285 | <tr> | |
286 | <td class='online-navigation'><a rel="prev" title="2.4 Literals" | |
287 | href="literals.html"><img src='../icons/previous.png' | |
288 | border='0' height='32' alt='Previous Page' width='32' /></A></td> | |
289 | <td class='online-navigation'><a rel="parent" title="2.4 Literals" | |
290 | href="literals.html"><img src='../icons/up.png' | |
291 | border='0' height='32' alt='Up One Level' width='32' /></A></td> | |
292 | <td class='online-navigation'><a rel="next" title="2.4.2 String literal concatenation" | |
293 | href="string-catenation.html"><img src='../icons/next.png' | |
294 | border='0' height='32' alt='Next Page' width='32' /></A></td> | |
295 | <td align="center" width="100%">Python Reference Manual</td> | |
296 | <td class='online-navigation'><a rel="contents" title="Table of Contents" | |
297 | href="contents.html"><img src='../icons/contents.png' | |
298 | border='0' height='32' alt='Contents' width='32' /></A></td> | |
299 | <td class='online-navigation'><img src='../icons/blank.png' | |
300 | border='0' height='32' alt='' width='32' /></td> | |
301 | <td class='online-navigation'><a rel="index" title="Index" | |
302 | href="genindex.html"><img src='../icons/index.png' | |
303 | border='0' height='32' alt='Index' width='32' /></A></td> | |
304 | </tr></table> | |
305 | <div class='online-navigation'> | |
306 | <b class="navlabel">Previous:</b> | |
307 | <a class="sectref" rel="prev" href="literals.html">2.4 Literals</A> | |
308 | <b class="navlabel">Up:</b> | |
309 | <a class="sectref" rel="parent" href="literals.html">2.4 Literals</A> | |
310 | <b class="navlabel">Next:</b> | |
311 | <a class="sectref" rel="next" href="string-catenation.html">2.4.2 String literal concatenation</A> | |
312 | </div> | |
313 | </div> | |
314 | <hr /> | |
315 | <span class="release-info">Release 2.4.2, documentation updated on 28 September 2005.</span> | |
316 | </DIV> | |
317 | <!--End of Navigation Panel--> | |
318 | <ADDRESS> | |
319 | See <i><a href="about.html">About this document...</a></i> for information on suggesting changes. | |
320 | </ADDRESS> | |
321 | </BODY> | |
322 | </HTML> |