socketserver例子

#! /usr/bin/env python
# encoding: utf-8

"""
@Author: zengchunyun
@Date: 2017/5/12
"""

import socketserver


class MyTCPHandler(socketserver.BaseRequestHandler):
    """
    The request handler class for our server.

    It is instantiated once per connection to the server, and must
    override the handle() method to implement communication to the
    client.
    """

    def handle(self):
        # self.request is the TCP socket connected to the client
        self.data = self.request.recv(1024).strip()
        print("{} wrote:".format(self.client_address[0]))
        # just send back the same data, but upper-cased
        header = """HTTP/1.1 200 ok with me\r\nContent-type: text/html\r\nConnection:close\r\nAllow: GET\r\nSet-Cookie:haoren=yes\r\nServer: cloud\r\nContent-length: %s\r\n\r\n%s"""
        body = """<div style="color:green">%s</div>""" % str(self.data.upper())
        header = header % (len(body), body)
        print(str(self.data))
        self.request.sendall(bytes(header, 'utf-8'))

if __name__ == "__main__":
    HOST, PORT = "localhost", 8000

    # Create the server, binding to localhost on port 9999
    with socketserver.TCPServer((HOST, PORT), MyTCPHandler) as server:
        # Activate the server; this will keep running until you
        # interrupt the program with Ctrl-C
        server.serve_forever()

这个是我们的一个简单的服务器端,只要客户端发送请求的话,,都会经过MyTCPHandlerhandle方法处理,该响应目前仅仅是接受什么数据,就以大写方式响应
客户端, 通常,我们的Django服务器处理请求时,也是在这个方法内完成的,一个完整的web操作事务处理包括请求和响应的报文信息,
他们之间的关系是:

浏览器首先输入服务器的URL到地址栏,如:http://www.cnblogs.com/zengchunyun
. 其中,http://为第一部分,称为schema,说明了访问该web站点所使用的协议类型为http协议
. www.cnblogs.com为服务器的地址
. /zengchunyun为web服务器上的一个资源

浏览器首先通过DNS服务器解析出域名对应的IP地址,然后浏览器经过三次握手,已服务端建立连接请求,然后浏览器将请求报文发送给服务器

GET /zengchunyun HTTP/1.1  # 请求报文的第一行必须是告诉服务器你要做什么动作,这里是使用HTTP/1.1协议get服务器上的/zengchunyun这个资源
Host: www.cnblogs.com  # 第二行开始到最近位置的一个含有空行的内容,都称为头部信息,剩下的都是主体信息

假如上面的例子就是那个服务端,那么收到请求后,会调用我们的handle方法,然后会给浏览器响应报文

HTTP/1.1 200 OK  # 响应报文的起始行必须是告诉服务器处理的结果是什么,成功了还是失败了,什么原因
Content-type: text/html  # 第二行开始到最近位置的一个含有空行结尾的之间的内容都是头部信息,这个是告诉浏览器以主体信息是文本类型/html类型的信息,浏览器根据该类型会对数据进行不同的处理
Content-length:1024  # 这里告诉浏览器,主体内容的长度是1024长度,这样浏览器就知道收多少数据,才算接收完整这个响应的报文
                # 这里需空一行,方便浏览器处理,到这为止,剩下的内容都是主体内容
<div>这里是一个1024长度的主体信息,.....省略多少字,你猜</div>    # 这部分内容将会在浏览器上可见,上面的信息只会在开发者模式,网络那,对应的请求链接里可见

这就是一个完整的HTTP事务,包括一条浏览器发往服务器的请求命令和一个服务器发回浏览器的响应结果。


我们接下来继续分析Django处理web请求时的过程


from __future__ import unicode_literals

import logging
import socket
import sys
from wsgiref import simple_server

from django.core.exceptions import ImproperlyConfigured
from django.core.wsgi import get_wsgi_application
from django.utils import six
from django.utils.module_loading import import_string
from django.utils.six.moves import socketserver

__all__ = ('WSGIServer', 'WSGIRequestHandler')

logger = logging.getLogger('django.server')


def is_broken_pipe_error():
    exc_type, exc_value = sys.exc_info()[:2]
    return issubclass(exc_type, socket.error) and exc_value.args[0] == 32


class WSGIServer(simple_server.WSGIServer, object):
    """BaseHTTPServer that implements the Python WSGI protocol"""

    request_queue_size = 10

    def __init__(self, *args, **kwargs):
        if kwargs.pop('ipv6', False):
            self.address_family = socket.AF_INET6
        self.allow_reuse_address = kwargs.pop('allow_reuse_address', True)
        super(WSGIServer, self).__init__(*args, **kwargs)

    def handle_error(self, request, client_address):
        if is_broken_pipe_error():
            logger.info("- Broken pipe from %s\n", client_address)
        else:
            super(WSGIServer, self).handle_error(request, client_address)


# Inheriting from object required on Python 2.
class ServerHandler(simple_server.ServerHandler, object):
    def handle_error(self):
        # Ignore broken pipe errors, otherwise pass on
        if not is_broken_pipe_error():
            super(ServerHandler, self).handle_error()


class WSGIRequestHandler(simple_server.WSGIRequestHandler, object):
    def address_string(self):
        # Short-circuit parent method to not call socket.getfqdn
        return self.client_address[0]

    def log_message(self, format, *args):
        extra = {
            'request': self.request,
            'server_time': self.log_date_time_string(),
        }
        if args[1][0] == '4':
            # 0x16 = Handshake, 0x03 = SSL 3.0 or TLS 1.x
            if args[0].startswith(str('\x16\x03')):
                extra['status_code'] = 500
                logger.error(
                    "You're accessing the development server over HTTPS, but "
                    "it only supports HTTP.\n", extra=extra,
                )
                return

        if args[1].isdigit() and len(args[1]) == 3:
            status_code = int(args[1])
            extra['status_code'] = status_code

            if status_code >= 500:
                level = logger.error
            elif status_code >= 400:
                level = logger.warning
            else:
                level = logger.info
        else:
            level = logger.info

        level(format, *args, extra=extra)

    def get_environ(self):
        # Strip all headers with underscores in the name before constructing
        # the WSGI environ. This prevents header-spoofing based on ambiguity
        # between underscores and dashes both normalized to underscores in WSGI
        # env vars. Nginx and Apache 2.4+ both do this as well.
        for k, v in self.headers.items():
            if '_' in k:
                del self.headers[k]

        return super(WSGIRequestHandler, self).get_environ()

    def handle(self):
        """Copy of WSGIRequestHandler, but with different ServerHandler"""

        self.raw_requestline = self.rfile.readline(65537)
        if len(self.raw_requestline) > 65536:
            self.requestline = ''
            self.request_version = ''
            self.command = ''
            self.send_error(414)
            return

        if not self.parse_request():  # An error code has been sent, just exit
            return

        handler = ServerHandler(
            self.rfile, self.wfile, self.get_stderr(), self.get_environ()
        )
        handler.request_handler = self      # backpointer for logging
        handler.run(self.server.get_app())


def run(addr, port, wsgi_handler, ipv6=False, threading=False, server_cls=WSGIServer):
    server_address = (addr, port)
    if threading:
        httpd_cls = type(str('WSGIServer'), (socketserver.ThreadingMixIn, server_cls), {})
    else:
        httpd_cls = server_cls
    httpd = httpd_cls(server_address, WSGIRequestHandler, ipv6=ipv6)
    if threading:
        httpd.daemon_threads = True
    httpd.set_app(wsgi_handler)  # 记住这一步,它将我们之前分析的inner_run方法得到的WSGIHandler实例对象传进来了,这个set_app方法是
                                # WSGIServer父类的方法,待会我们分析时,会用到父类的get_app获取该WSGIHandler对象
    httpd.serve_forever()

WSGIServer与socketserver的关系

  • WSGIServer父类是simple_server.WSGIServer,而simple_server.WSGIServer的父类是http.server.HTTPServer,
    http.server.HTTPServer父类是socketserver.TCPServer
  • 按照上面分析的例子转换, WSGIServer就是我们上面那个例子的TCPServerWSGIRequestHandler则是上面例子的MyTCPHandler了,既然关系明了,
    那我们就需要分析WSGIRequestHandler 的handle方法了。
  • WSGIRequestHandler的父类是wsgiref.simple_server.WSGIRequestHandler,而wsgiref.simple_server.WSGIRequestHandler的父类
    http.server.BaseHTTPRequestHandler,http.server.BaseHTTPRequestHandler又继承自socketserver.StreamRequestHandler,
    socketserver.StreamRequestHandler又继承自socketserver.BaseRequestHandler,看到这是不是知道怎么回事了呢?没错,WSGIRequestHandler
    和我们上面例子定义的MyTCPHandler都继承自同一个基类,既然基类都一样,那么我们接下来需要分析都就是它是怎么触发我们都handle方法了。
  • 既然是WSGIServer方法调用都WSGIRequestHandler方法,那么对应的,我们要找的就是TCPServer怎么调用的BaseRequestHandler
  • TCPServer类的执行过程如下
"""
    - __init__(server_address, RequestHandlerClass, bind_and_activate=True)  # 初始化TCPserver,并传入三个参数,第二个参数就是我们这里定义的WSGIRequestHandler
    self.server_address = server_address
    self.RequestHandlerClass = RequestHandlerClass
    self.server_bind()
    self.server_activate()
    self.serve_forever()
    self._handle_request_noblock()
    self.verify_request(request, client_address)
    self.process_request(request, client_address)
    self.finish_request(request, client_address)
    self.RequestHandlerClass(request, client_address, self)  # 这里才开始调用我们传进来的WSGIRequestHandler,这个self就是WSGIServer实例化的对象,
    self.shutdown_request(request)
"""
class BaseRequestHandler:

    def __init__(self, request, client_address, server):
        self.request = request
        self.client_address = client_address
        self.server = server
        self.setup()
        try:
            self.handle()
        finally:
            self.finish()

    def setup(self):
        pass

    def handle(self):
        pass

    def finish(self):
        pass
        
class StreamRequestHandler(BaseRequestHandler):
    rbufsize = -1
    wbufsize = 0

    timeout = None

    disable_nagle_algorithm = False

    def setup(self):
        self.connection = self.request
        if self.timeout is not None:
            self.connection.settimeout(self.timeout)
        if self.disable_nagle_algorithm:
            self.connection.setsockopt(socket.IPPROTO_TCP,
                                       socket.TCP_NODELAY, True)
        self.rfile = self.connection.makefile('rb', self.rbufsize)
        if self.wbufsize == 0:
            self.wfile = _SocketWriter(self.connection)
        else:
            self.wfile = self.connection.makefile('wb', self.wbufsize)

    def finish(self):
        if not self.wfile.closed:
            try:
                self.wfile.flush()
            except socket.error:
                # A final socket error may have occurred here, such as
                # the local error ECONNABORTED.
                pass
        self.wfile.close()
        self.rfile.close()

首先,BaseRequestHandler进行了一些初始化操作,接着,调用对象的handle方法,没错,这个handle已经被你重写了,现在总该明白,为什么只有定义
handle方法,才能处理咱们的请求了吧。

既然这个大概过程清楚了,那么我们再来看看WSGIRequestHandler定义的handle方法.

class WSGIRequestHandler(simple_server.WSGIRequestHandler, object):
    def address_string(self):
        return self.client_address[0]

    def log_message(self, format, *args):
        """"""

    def get_environ(self):
        for k, v in self.headers.items():
            if '_' in k:
                del self.headers[k]

        return super(WSGIRequestHandler, self).get_environ()

    def handle(self):
        """Copy of WSGIRequestHandler, but with different ServerHandler"""

        self.raw_requestline = self.rfile.readline(65537)  # 这里首先读取浏览器发过来的请求报文起始行,也就是浏览器需要做什么
        if len(self.raw_requestline) > 65536:  # 然后判断这个行为数据长度是否大于65536,大于则不处理,直接返回414错误
            self.requestline = ''
            self.request_version = ''
            self.command = ''
            self.send_error(414)
            return

        if not self.parse_request():  # An error code has been sent, just exit
            return

        handler = ServerHandler(
            self.rfile, self.wfile, self.get_stderr(), self.get_environ()
        )
        handler.request_handler = self      # backpointer for logging
        handler.run(self.server.get_app())  # 看到这没,这里调用了`WSGIServer`的`get_app`方法,`WSGIServer`的父类`wsgiref.simple_server.WSGIServer`
                                            # 才定义该方法,同时上面的run(addr, port, wsgi_handler, ipv6=False, threading=False, server_cls=WSGIServer)
                                            # 方法时的一段注解已经说明了这里获取到的是WSGIHandler的实例对象

handle方法首先获取请求报文起始行数据,然后调用parse_request方法,将获取到到起始行数据编码为iso-8859-1然后去除起始行末尾的\r\n回车换行符。
然后以空格分割该数据,判断分割后数据长度是否为3个长度还是2个长度,其余长度均非标准请求方式,不处理非标准请求方式,Django目前不支持http/2.0及以上的版本协议请求,
http/0.9只支持get请求,http/1.0默认使用短连接,即请求一次完成就关闭连接,,如果需要发送更多数据,则需要再次建立新连接,如果需要使用长连接,
则需要在头部增加非标准的Connection字段。而http/1.1则默认是长连接(keep-alive)。

self.close_connection = False长连接时该值为False,短连接标记为True。

如果解析的起始行长度为2,且非GET方式,则不处理请求。

当前起始行数据没问题后,再解析头部信息

self.headers = http.client.parse_headers(self.rfile, _class=self.MessageClass)

该方法先检查每个头部字段是否大于65536长度,如果大于则直接返回错误码,且判断头部个数是否大于100个,大于100也返回错误码,直到将所有的头部读取完毕,
即读到空行为止,则认为头部读取完了


def parse_headers(fp, _class=HTTPMessage):
    """Parses only RFC2822 headers from a file pointer.

    email Parser wants to see strings rather than bytes.
    But a TextIOWrapper around self.rfile would buffer too many bytes
    from the stream, bytes which we later need to read as bytes.
    So we read the correct bytes here, as bytes, for email Parser
    to parse.

    """
    headers = []
    while True:
        line = fp.readline(_MAXLINE + 1)
        if len(line) > _MAXLINE:
            raise LineTooLong("header line")
        headers.append(line)
        if len(headers) > _MAXHEADERS:
            raise HTTPException("got more than %d headers" % _MAXHEADERS)
        if line in (b'\r\n', b'\n', b''):
            break
    hstring = b''.join(headers).decode('iso-8859-1')
    return email.parser.Parser(_class=_class).parsestr(hstring)

self.handle调用self.parse_request --> http.client.parse_headers ---> self.headers,最终得到的是一个字典类型的数据,
其中字典的值都已被转换为对应的python对象
实例化ServerHandler时,传入的三个参数,self.rfile相当于socket的recv方法,只不过是将socket的数据传输方式改为读文件方式来操作了,用来
接收数据。self.wfile相当于socket的send方法,用来发送数据操作。
self.get_stderr是系统的标准错误输出句柄,
self.get_environ是先将下划线开头的头部字段删除,然后调用父类的get_environ的方法,将请求头部字段都加上HTTP_,同时增加一些公共的字段。

再调用ServerHandler父类的run方法,把上一次分析的那个WSGIHandler实例化对象传入

    def run(self, application):
        # application为WSGIHandler的实例化对象
        """Invoke the application"""
        # Note to self: don't move the close()!  Asynchronous servers shouldn't
        # call close() from finish_response(), so if you close() anywhere but
        # the double-error branch here, you'll break asynchronous servers by
        # prematurely closing.  Async servers must return from 'run()' without
        # closing if there might still be output to iterate over.
        try:
            self.setup_environ()
            self.result = application(self.environ, self.start_response)
            self.finish_response()
        except:
            try:
                self.handle_error()
            except:
                # If we get an error handling an error, just give up already!
                self.close()
                raise   # ...and let the actual server figure it out.

首先设置环境变量,将系统环境拷贝,然后将传入的环境变量扩展进该环境内同时增加一些WSGI通用环境

    def setup_environ(self):
        """Set up the environment for one request"""

        env = self.environ = self.os_environ.copy()
        self.add_cgi_vars()
        env['wsgi.input']        = self.get_stdin()
        env['wsgi.errors']       = self.get_stderr()
        env['wsgi.version']      = self.wsgi_version
        env['wsgi.run_once']     = self.wsgi_run_once
        env['wsgi.url_scheme']   = self.get_scheme()
        env['wsgi.multithread']  = self.wsgi_multithread
        env['wsgi.multiprocess'] = self.wsgi_multiprocess

        if self.wsgi_file_wrapper is not None:
            env['wsgi.file_wrapper'] = self.wsgi_file_wrapper

        if self.origin_server and self.server_software:
            env.setdefault('SERVER_SOFTWARE',self.server_software)

设置完环境后,开始执行我们的handler对象了,WSGIHandler重写了__call__方法

class WSGIHandler(base.BaseHandler):
    request_class = WSGIRequest

    def __init__(self, *args, **kwargs):
        super(WSGIHandler, self).__init__(*args, **kwargs)
        self.load_middleware()

    def __call__(self, environ, start_response):
        set_script_prefix(get_script_name(environ))
        signals.request_started.send(sender=self.__class__, environ=environ)
        request = self.request_class(environ)
        response = self.get_response(request)
        response._handler_class = self.__class__

        status = '%d %s' % (response.status_code, response.reason_phrase)
        response_headers = [(str(k), str(v)) for k, v in response.items()]
        for c in response.cookies.values():
            response_headers.append((str('Set-Cookie'), str(c.output(header=''))))
        start_response(force_str(status), response_headers)
        if getattr(response, 'file_to_stream', None) is not None and environ.get('wsgi.file_wrapper'):
            response = environ['wsgi.file_wrapper'](response.file_to_stream)
        return response