Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """HTTP server base class. |
2 | ||
3 | Note: the class in this module doesn't implement any HTTP request; see | |
4 | SimpleHTTPServer for simple implementations of GET, HEAD and POST | |
5 | (including CGI scripts). It does, however, optionally implement HTTP/1.1 | |
6 | persistent connections, as of version 0.3. | |
7 | ||
8 | Contents: | |
9 | ||
10 | - BaseHTTPRequestHandler: HTTP request handler base class | |
11 | - test: test function | |
12 | ||
13 | XXX To do: | |
14 | ||
15 | - log requests even later (to capture byte count) | |
16 | - log user-agent header and other interesting goodies | |
17 | - send error log to separate file | |
18 | """ | |
19 | ||
20 | ||
21 | # See also: | |
22 | # | |
23 | # HTTP Working Group T. Berners-Lee | |
24 | # INTERNET-DRAFT R. T. Fielding | |
25 | # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen | |
26 | # Expires September 8, 1995 March 8, 1995 | |
27 | # | |
28 | # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt | |
29 | # | |
30 | # and | |
31 | # | |
32 | # Network Working Group R. Fielding | |
33 | # Request for Comments: 2616 et al | |
34 | # Obsoletes: 2068 June 1999 | |
35 | # Category: Standards Track | |
36 | # | |
37 | # URL: http://www.faqs.org/rfcs/rfc2616.html | |
38 | ||
39 | # Log files | |
40 | # --------- | |
41 | # | |
42 | # Here's a quote from the NCSA httpd docs about log file format. | |
43 | # | |
44 | # | The logfile format is as follows. Each line consists of: | |
45 | # | | |
46 | # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb | |
47 | # | | |
48 | # | host: Either the DNS name or the IP number of the remote client | |
49 | # | rfc931: Any information returned by identd for this person, | |
50 | # | - otherwise. | |
51 | # | authuser: If user sent a userid for authentication, the user name, | |
52 | # | - otherwise. | |
53 | # | DD: Day | |
54 | # | Mon: Month (calendar name) | |
55 | # | YYYY: Year | |
56 | # | hh: hour (24-hour format, the machine's timezone) | |
57 | # | mm: minutes | |
58 | # | ss: seconds | |
59 | # | request: The first line of the HTTP request as sent by the client. | |
60 | # | ddd: the status code returned by the server, - if not available. | |
61 | # | bbbb: the total number of bytes sent, | |
62 | # | *not including the HTTP/1.0 header*, - if not available | |
63 | # | | |
64 | # | You can determine the name of the file accessed through request. | |
65 | # | |
66 | # (Actually, the latter is only true if you know the server configuration | |
67 | # at the time the request was made!) | |
68 | ||
69 | __version__ = "0.3" | |
70 | ||
71 | __all__ = ["HTTPServer", "BaseHTTPRequestHandler"] | |
72 | ||
73 | import sys | |
74 | import time | |
75 | import socket # For gethostbyaddr() | |
76 | import mimetools | |
77 | import SocketServer | |
78 | ||
79 | # Default error message | |
80 | DEFAULT_ERROR_MESSAGE = """\ | |
81 | <head> | |
82 | <title>Error response</title> | |
83 | </head> | |
84 | <body> | |
85 | <h1>Error response</h1> | |
86 | <p>Error code %(code)d. | |
87 | <p>Message: %(message)s. | |
88 | <p>Error code explanation: %(code)s = %(explain)s. | |
89 | </body> | |
90 | """ | |
91 | ||
92 | def _quote_html(html): | |
93 | return html.replace("&", "&").replace("<", "<").replace(">", ">") | |
94 | ||
95 | class HTTPServer(SocketServer.TCPServer): | |
96 | ||
97 | allow_reuse_address = 1 # Seems to make sense in testing environment | |
98 | ||
99 | def server_bind(self): | |
100 | """Override server_bind to store the server name.""" | |
101 | SocketServer.TCPServer.server_bind(self) | |
102 | host, port = self.socket.getsockname()[:2] | |
103 | self.server_name = socket.getfqdn(host) | |
104 | self.server_port = port | |
105 | ||
106 | ||
107 | class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): | |
108 | ||
109 | """HTTP request handler base class. | |
110 | ||
111 | The following explanation of HTTP serves to guide you through the | |
112 | code as well as to expose any misunderstandings I may have about | |
113 | HTTP (so you don't need to read the code to figure out I'm wrong | |
114 | :-). | |
115 | ||
116 | HTTP (HyperText Transfer Protocol) is an extensible protocol on | |
117 | top of a reliable stream transport (e.g. TCP/IP). The protocol | |
118 | recognizes three parts to a request: | |
119 | ||
120 | 1. One line identifying the request type and path | |
121 | 2. An optional set of RFC-822-style headers | |
122 | 3. An optional data part | |
123 | ||
124 | The headers and data are separated by a blank line. | |
125 | ||
126 | The first line of the request has the form | |
127 | ||
128 | <command> <path> <version> | |
129 | ||
130 | where <command> is a (case-sensitive) keyword such as GET or POST, | |
131 | <path> is a string containing path information for the request, | |
132 | and <version> should be the string "HTTP/1.0" or "HTTP/1.1". | |
133 | <path> is encoded using the URL encoding scheme (using %xx to signify | |
134 | the ASCII character with hex code xx). | |
135 | ||
136 | The specification specifies that lines are separated by CRLF but | |
137 | for compatibility with the widest range of clients recommends | |
138 | servers also handle LF. Similarly, whitespace in the request line | |
139 | is treated sensibly (allowing multiple spaces between components | |
140 | and allowing trailing whitespace). | |
141 | ||
142 | Similarly, for output, lines ought to be separated by CRLF pairs | |
143 | but most clients grok LF characters just fine. | |
144 | ||
145 | If the first line of the request has the form | |
146 | ||
147 | <command> <path> | |
148 | ||
149 | (i.e. <version> is left out) then this is assumed to be an HTTP | |
150 | 0.9 request; this form has no optional headers and data part and | |
151 | the reply consists of just the data. | |
152 | ||
153 | The reply form of the HTTP 1.x protocol again has three parts: | |
154 | ||
155 | 1. One line giving the response code | |
156 | 2. An optional set of RFC-822-style headers | |
157 | 3. The data | |
158 | ||
159 | Again, the headers and data are separated by a blank line. | |
160 | ||
161 | The response code line has the form | |
162 | ||
163 | <version> <responsecode> <responsestring> | |
164 | ||
165 | where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), | |
166 | <responsecode> is a 3-digit response code indicating success or | |
167 | failure of the request, and <responsestring> is an optional | |
168 | human-readable string explaining what the response code means. | |
169 | ||
170 | This server parses the request and the headers, and then calls a | |
171 | function specific to the request type (<command>). Specifically, | |
172 | a request SPAM will be handled by a method do_SPAM(). If no | |
173 | such method exists the server sends an error response to the | |
174 | client. If it exists, it is called with no arguments: | |
175 | ||
176 | do_SPAM() | |
177 | ||
178 | Note that the request name is case sensitive (i.e. SPAM and spam | |
179 | are different requests). | |
180 | ||
181 | The various request details are stored in instance variables: | |
182 | ||
183 | - client_address is the client IP address in the form (host, | |
184 | port); | |
185 | ||
186 | - command, path and version are the broken-down request line; | |
187 | ||
188 | - headers is an instance of mimetools.Message (or a derived | |
189 | class) containing the header information; | |
190 | ||
191 | - rfile is a file object open for reading positioned at the | |
192 | start of the optional input data part; | |
193 | ||
194 | - wfile is a file object open for writing. | |
195 | ||
196 | IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! | |
197 | ||
198 | The first thing to be written must be the response line. Then | |
199 | follow 0 or more header lines, then a blank line, and then the | |
200 | actual data (if any). The meaning of the header lines depends on | |
201 | the command executed by the server; in most cases, when data is | |
202 | returned, there should be at least one header line of the form | |
203 | ||
204 | Content-type: <type>/<subtype> | |
205 | ||
206 | where <type> and <subtype> should be registered MIME types, | |
207 | e.g. "text/html" or "text/plain". | |
208 | ||
209 | """ | |
210 | ||
211 | # The Python system version, truncated to its first component. | |
212 | sys_version = "Python/" + sys.version.split()[0] | |
213 | ||
214 | # The server software version. You may want to override this. | |
215 | # The format is multiple whitespace-separated strings, | |
216 | # where each string is of the form name[/version]. | |
217 | server_version = "BaseHTTP/" + __version__ | |
218 | ||
219 | def parse_request(self): | |
220 | """Parse a request (internal). | |
221 | ||
222 | The request should be stored in self.raw_requestline; the results | |
223 | are in self.command, self.path, self.request_version and | |
224 | self.headers. | |
225 | ||
226 | Return True for success, False for failure; on failure, an | |
227 | error is sent back. | |
228 | ||
229 | """ | |
230 | self.command = None # set in case of error on the first line | |
231 | self.request_version = version = "HTTP/0.9" # Default | |
232 | self.close_connection = 1 | |
233 | requestline = self.raw_requestline | |
234 | if requestline[-2:] == '\r\n': | |
235 | requestline = requestline[:-2] | |
236 | elif requestline[-1:] == '\n': | |
237 | requestline = requestline[:-1] | |
238 | self.requestline = requestline | |
239 | words = requestline.split() | |
240 | if len(words) == 3: | |
241 | [command, path, version] = words | |
242 | if version[:5] != 'HTTP/': | |
243 | self.send_error(400, "Bad request version (%r)" % version) | |
244 | return False | |
245 | try: | |
246 | base_version_number = version.split('/', 1)[1] | |
247 | version_number = base_version_number.split(".") | |
248 | # RFC 2145 section 3.1 says there can be only one "." and | |
249 | # - major and minor numbers MUST be treated as | |
250 | # separate integers; | |
251 | # - HTTP/2.4 is a lower version than HTTP/2.13, which in | |
252 | # turn is lower than HTTP/12.3; | |
253 | # - Leading zeros MUST be ignored by recipients. | |
254 | if len(version_number) != 2: | |
255 | raise ValueError | |
256 | version_number = int(version_number[0]), int(version_number[1]) | |
257 | except (ValueError, IndexError): | |
258 | self.send_error(400, "Bad request version (%r)" % version) | |
259 | return False | |
260 | if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": | |
261 | self.close_connection = 0 | |
262 | if version_number >= (2, 0): | |
263 | self.send_error(505, | |
264 | "Invalid HTTP Version (%s)" % base_version_number) | |
265 | return False | |
266 | elif len(words) == 2: | |
267 | [command, path] = words | |
268 | self.close_connection = 1 | |
269 | if command != 'GET': | |
270 | self.send_error(400, | |
271 | "Bad HTTP/0.9 request type (%r)" % command) | |
272 | return False | |
273 | elif not words: | |
274 | return False | |
275 | else: | |
276 | self.send_error(400, "Bad request syntax (%r)" % requestline) | |
277 | return False | |
278 | self.command, self.path, self.request_version = command, path, version | |
279 | ||
280 | # Examine the headers and look for a Connection directive | |
281 | self.headers = self.MessageClass(self.rfile, 0) | |
282 | ||
283 | conntype = self.headers.get('Connection', "") | |
284 | if conntype.lower() == 'close': | |
285 | self.close_connection = 1 | |
286 | elif (conntype.lower() == 'keep-alive' and | |
287 | self.protocol_version >= "HTTP/1.1"): | |
288 | self.close_connection = 0 | |
289 | return True | |
290 | ||
291 | def handle_one_request(self): | |
292 | """Handle a single HTTP request. | |
293 | ||
294 | You normally don't need to override this method; see the class | |
295 | __doc__ string for information on how to handle specific HTTP | |
296 | commands such as GET and POST. | |
297 | ||
298 | """ | |
299 | self.raw_requestline = self.rfile.readline() | |
300 | if not self.raw_requestline: | |
301 | self.close_connection = 1 | |
302 | return | |
303 | if not self.parse_request(): # An error code has been sent, just exit | |
304 | return | |
305 | mname = 'do_' + self.command | |
306 | if not hasattr(self, mname): | |
307 | self.send_error(501, "Unsupported method (%r)" % self.command) | |
308 | return | |
309 | method = getattr(self, mname) | |
310 | method() | |
311 | ||
312 | def handle(self): | |
313 | """Handle multiple requests if necessary.""" | |
314 | self.close_connection = 1 | |
315 | ||
316 | self.handle_one_request() | |
317 | while not self.close_connection: | |
318 | self.handle_one_request() | |
319 | ||
320 | def send_error(self, code, message=None): | |
321 | """Send and log an error reply. | |
322 | ||
323 | Arguments are the error code, and a detailed message. | |
324 | The detailed message defaults to the short entry matching the | |
325 | response code. | |
326 | ||
327 | This sends an error response (so it must be called before any | |
328 | output has been generated), logs the error, and finally sends | |
329 | a piece of HTML explaining the error to the user. | |
330 | ||
331 | """ | |
332 | ||
333 | try: | |
334 | short, long = self.responses[code] | |
335 | except KeyError: | |
336 | short, long = '???', '???' | |
337 | if message is None: | |
338 | message = short | |
339 | explain = long | |
340 | self.log_error("code %d, message %s", code, message) | |
341 | # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) | |
342 | content = (self.error_message_format % | |
343 | {'code': code, 'message': _quote_html(message), 'explain': explain}) | |
344 | self.send_response(code, message) | |
345 | self.send_header("Content-Type", "text/html") | |
346 | self.send_header('Connection', 'close') | |
347 | self.end_headers() | |
348 | if self.command != 'HEAD' and code >= 200 and code not in (204, 304): | |
349 | self.wfile.write(content) | |
350 | ||
351 | error_message_format = DEFAULT_ERROR_MESSAGE | |
352 | ||
353 | def send_response(self, code, message=None): | |
354 | """Send the response header and log the response code. | |
355 | ||
356 | Also send two standard headers with the server software | |
357 | version and the current date. | |
358 | ||
359 | """ | |
360 | self.log_request(code) | |
361 | if message is None: | |
362 | if code in self.responses: | |
363 | message = self.responses[code][0] | |
364 | else: | |
365 | message = '' | |
366 | if self.request_version != 'HTTP/0.9': | |
367 | self.wfile.write("%s %d %s\r\n" % | |
368 | (self.protocol_version, code, message)) | |
369 | # print (self.protocol_version, code, message) | |
370 | self.send_header('Server', self.version_string()) | |
371 | self.send_header('Date', self.date_time_string()) | |
372 | ||
373 | def send_header(self, keyword, value): | |
374 | """Send a MIME header.""" | |
375 | if self.request_version != 'HTTP/0.9': | |
376 | self.wfile.write("%s: %s\r\n" % (keyword, value)) | |
377 | ||
378 | if keyword.lower() == 'connection': | |
379 | if value.lower() == 'close': | |
380 | self.close_connection = 1 | |
381 | elif value.lower() == 'keep-alive': | |
382 | self.close_connection = 0 | |
383 | ||
384 | def end_headers(self): | |
385 | """Send the blank line ending the MIME headers.""" | |
386 | if self.request_version != 'HTTP/0.9': | |
387 | self.wfile.write("\r\n") | |
388 | ||
389 | def log_request(self, code='-', size='-'): | |
390 | """Log an accepted request. | |
391 | ||
392 | This is called by send_reponse(). | |
393 | ||
394 | """ | |
395 | ||
396 | self.log_message('"%s" %s %s', | |
397 | self.requestline, str(code), str(size)) | |
398 | ||
399 | def log_error(self, *args): | |
400 | """Log an error. | |
401 | ||
402 | This is called when a request cannot be fulfilled. By | |
403 | default it passes the message on to log_message(). | |
404 | ||
405 | Arguments are the same as for log_message(). | |
406 | ||
407 | XXX This should go to the separate error log. | |
408 | ||
409 | """ | |
410 | ||
411 | self.log_message(*args) | |
412 | ||
413 | def log_message(self, format, *args): | |
414 | """Log an arbitrary message. | |
415 | ||
416 | This is used by all other logging functions. Override | |
417 | it if you have specific logging wishes. | |
418 | ||
419 | The first argument, FORMAT, is a format string for the | |
420 | message to be logged. If the format string contains | |
421 | any % escapes requiring parameters, they should be | |
422 | specified as subsequent arguments (it's just like | |
423 | printf!). | |
424 | ||
425 | The client host and current date/time are prefixed to | |
426 | every message. | |
427 | ||
428 | """ | |
429 | ||
430 | sys.stderr.write("%s - - [%s] %s\n" % | |
431 | (self.address_string(), | |
432 | self.log_date_time_string(), | |
433 | format%args)) | |
434 | ||
435 | def version_string(self): | |
436 | """Return the server software version string.""" | |
437 | return self.server_version + ' ' + self.sys_version | |
438 | ||
439 | def date_time_string(self): | |
440 | """Return the current date and time formatted for a message header.""" | |
441 | now = time.time() | |
442 | year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now) | |
443 | s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( | |
444 | self.weekdayname[wd], | |
445 | day, self.monthname[month], year, | |
446 | hh, mm, ss) | |
447 | return s | |
448 | ||
449 | def log_date_time_string(self): | |
450 | """Return the current time formatted for logging.""" | |
451 | now = time.time() | |
452 | year, month, day, hh, mm, ss, x, y, z = time.localtime(now) | |
453 | s = "%02d/%3s/%04d %02d:%02d:%02d" % ( | |
454 | day, self.monthname[month], year, hh, mm, ss) | |
455 | return s | |
456 | ||
457 | weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] | |
458 | ||
459 | monthname = [None, | |
460 | 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', | |
461 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | |
462 | ||
463 | def address_string(self): | |
464 | """Return the client address formatted for logging. | |
465 | ||
466 | This version looks up the full hostname using gethostbyaddr(), | |
467 | and tries to find a name that contains at least one dot. | |
468 | ||
469 | """ | |
470 | ||
471 | host, port = self.client_address[:2] | |
472 | return socket.getfqdn(host) | |
473 | ||
474 | # Essentially static class variables | |
475 | ||
476 | # The version of the HTTP protocol we support. | |
477 | # Set this to HTTP/1.1 to enable automatic keepalive | |
478 | protocol_version = "HTTP/1.0" | |
479 | ||
480 | # The Message-like class used to parse headers | |
481 | MessageClass = mimetools.Message | |
482 | ||
483 | # Table mapping response codes to messages; entries have the | |
484 | # form {code: (shortmessage, longmessage)}. | |
485 | # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html | |
486 | responses = { | |
487 | 100: ('Continue', 'Request received, please continue'), | |
488 | 101: ('Switching Protocols', | |
489 | 'Switching to new protocol; obey Upgrade header'), | |
490 | ||
491 | 200: ('OK', 'Request fulfilled, document follows'), | |
492 | 201: ('Created', 'Document created, URL follows'), | |
493 | 202: ('Accepted', | |
494 | 'Request accepted, processing continues off-line'), | |
495 | 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), | |
496 | 204: ('No response', 'Request fulfilled, nothing follows'), | |
497 | 205: ('Reset Content', 'Clear input form for further input.'), | |
498 | 206: ('Partial Content', 'Partial content follows.'), | |
499 | ||
500 | 300: ('Multiple Choices', | |
501 | 'Object has several resources -- see URI list'), | |
502 | 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), | |
503 | 302: ('Found', 'Object moved temporarily -- see URI list'), | |
504 | 303: ('See Other', 'Object moved -- see Method and URL list'), | |
505 | 304: ('Not modified', | |
506 | 'Document has not changed since given time'), | |
507 | 305: ('Use Proxy', | |
508 | 'You must use proxy specified in Location to access this ' | |
509 | 'resource.'), | |
510 | 307: ('Temporary Redirect', | |
511 | 'Object moved temporarily -- see URI list'), | |
512 | ||
513 | 400: ('Bad request', | |
514 | 'Bad request syntax or unsupported method'), | |
515 | 401: ('Unauthorized', | |
516 | 'No permission -- see authorization schemes'), | |
517 | 402: ('Payment required', | |
518 | 'No payment -- see charging schemes'), | |
519 | 403: ('Forbidden', | |
520 | 'Request forbidden -- authorization will not help'), | |
521 | 404: ('Not Found', 'Nothing matches the given URI'), | |
522 | 405: ('Method Not Allowed', | |
523 | 'Specified method is invalid for this server.'), | |
524 | 406: ('Not Acceptable', 'URI not available in preferred format.'), | |
525 | 407: ('Proxy Authentication Required', 'You must authenticate with ' | |
526 | 'this proxy before proceeding.'), | |
527 | 408: ('Request Time-out', 'Request timed out; try again later.'), | |
528 | 409: ('Conflict', 'Request conflict.'), | |
529 | 410: ('Gone', | |
530 | 'URI no longer exists and has been permanently removed.'), | |
531 | 411: ('Length Required', 'Client must specify Content-Length.'), | |
532 | 412: ('Precondition Failed', 'Precondition in headers is false.'), | |
533 | 413: ('Request Entity Too Large', 'Entity is too large.'), | |
534 | 414: ('Request-URI Too Long', 'URI is too long.'), | |
535 | 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), | |
536 | 416: ('Requested Range Not Satisfiable', | |
537 | 'Cannot satisfy request range.'), | |
538 | 417: ('Expectation Failed', | |
539 | 'Expect condition could not be satisfied.'), | |
540 | ||
541 | 500: ('Internal error', 'Server got itself in trouble'), | |
542 | 501: ('Not Implemented', | |
543 | 'Server does not support this operation'), | |
544 | 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), | |
545 | 503: ('Service temporarily overloaded', | |
546 | 'The server cannot process the request due to a high load'), | |
547 | 504: ('Gateway timeout', | |
548 | 'The gateway server did not receive a timely response'), | |
549 | 505: ('HTTP Version not supported', 'Cannot fulfill request.'), | |
550 | } | |
551 | ||
552 | ||
553 | def test(HandlerClass = BaseHTTPRequestHandler, | |
554 | ServerClass = HTTPServer, protocol="HTTP/1.0"): | |
555 | """Test the HTTP request handler class. | |
556 | ||
557 | This runs an HTTP server on port 8000 (or the first command line | |
558 | argument). | |
559 | ||
560 | """ | |
561 | ||
562 | if sys.argv[1:]: | |
563 | port = int(sys.argv[1]) | |
564 | else: | |
565 | port = 8000 | |
566 | server_address = ('', port) | |
567 | ||
568 | HandlerClass.protocol_version = protocol | |
569 | httpd = ServerClass(server_address, HandlerClass) | |
570 | ||
571 | sa = httpd.socket.getsockname() | |
572 | print "Serving HTTP on", sa[0], "port", sa[1], "..." | |
573 | httpd.serve_forever() | |
574 | ||
575 | ||
576 | if __name__ == '__main__': | |
577 | test() |