Source code for tortik.page

# -*- coding: utf-8 -*-

import os
import sys
import time
import traceback
import hashlib
import random
from itertools import count
from functools import partial
from copy import copy
import json

try:
    import urlparse  # py2
except ImportError:
    import urllib.parse as urlparse  # py3

import tornado.web
import tornado.httpclient
from tornado.options import options, define
from tornado.escape import to_unicode
import tornado.gen
from jinja2 import Environment, PackageLoader
import six

from tortik.util import decorate_all, make_list, real_ip, make_qs
from tortik.util.dumper import dump
from tortik.util.xml_etree import tostring
from tortik.logger import PageLogger
from tortik.util.async import AsyncGroup
from tortik.util.parse import parse_xml, parse_json


define('debug_password', default=None, type=str, help='Password for debug')
define('debug', default=True, type=bool, help='Debug mode')
define('tortik_max_clients', default=200, type=int, help='Max clients (requests) for http_client')
define('tortik_timeout_multiplier', default=1.0, type=float, help='Timeout multiplier (affects all requests)')

_DEBUG_ALL = "all"
_DEBUG_ONLY_ERRORS = "only_errors"
_DEBUG_NONE = "none"

stats = count()


def _gen_requestid():
    return hashlib.md5('{}{}{}'.format(os.getpid(), next(stats), random.random()).encode('utf-8')).hexdigest()


_decorates = decorate_all([
    (tornado.web.asynchronous, 'asynchronous'),  # should be the last
])


@six.add_metaclass(_decorates)
[docs]class RequestHandler(tornado.web.RequestHandler): """Base handler for request handle Differs from ``tornado.web.RequestHandler`` that all method handlers are ``@tornado.web.asynchronous`` by default. Handler completion should be done with ``self.complete`` method instead of ``self.finish`` for applying postprocessors. """ def initialize(self, *args, **kwargs): debug_pass = options.debug_password debug_agrs = self.get_arguments('debug') debug_arg_set = (len(debug_agrs) > 0 and debug_pass is not None and (debug_pass == '' or debug_pass == debug_agrs[-1])) if debug_arg_set: self.debug_type = _DEBUG_ALL elif options.debug: self.debug_type = _DEBUG_ONLY_ERRORS else: self.debug_type = _DEBUG_NONE if self.debug_type != _DEBUG_NONE and not hasattr(RequestHandler, 'debug_loader'): environment = Environment(autoescape=True, loader=PackageLoader('tortik', 'templates'), extensions=['jinja2.ext.autoescape'], auto_reload=options.debug) environment.filters['split'] = lambda x, y: x.split(y) RequestHandler.debug_loader = environment self.error_detected = False self.request_id = self.request.headers.get('X-Request-Id', _gen_requestid()) self.log = PageLogger(self.request, self.request_id, (self.debug_type != _DEBUG_NONE), handler_name=(type(self).__module__ + '.' + type(self).__name__)) self.responses = {} self.http_client = self.initialize_http_client() self.preprocessors = copy(self.preprocessors) if hasattr(self, 'preprocessors') else [] self.postprocessors = copy(self.postprocessors) if hasattr(self, 'postprocessors') else [] self.log.info('Using http client: %s' % repr(self.http_client)) self._extra_data = {} @tornado.gen.coroutine def prepare(self): if self.preprocessors: start_time = time.time() yield list(map(lambda x: tornado.gen.Task(x, self), self.preprocessors)) self.log.debug("Preprocessors completed in %.2fms", (time.time() - start_time)*1000.) @staticmethod def get_global_http_client(): if not hasattr(RequestHandler, '_http_client'): RequestHandler._http_client = tornado.httpclient.AsyncHTTPClient( max_clients=options.tortik_max_clients) return RequestHandler._http_client def initialize_http_client(self): return self.get_global_http_client() def add(self, name, data): self._extra_data[name] = data def get_data(self): return self._extra_data def compute_etag(self): return None def on_finish(self): self.log.complete_logging(self.get_status()) def write_error(self, status_code, **kwargs): if self.debug_type in [_DEBUG_ALL, _DEBUG_ONLY_ERRORS]: if 'exc_info' in kwargs: type, value, tb = kwargs['exc_info'] self.log.error("Uncaught exception %s\n%r", self._request_summary(), self.request, exc_info=(type, value, tb)) if self._finished: return self.set_status(status_code) self.log.complete_logging(status_code) self.finish_with_debug() return True else: super(RequestHandler, self).write_error(status_code, **kwargs) def finish_with_debug(self): self.set_header('Content-Type', 'text/html; charset=utf-8') if self.debug_type == _DEBUG_ALL: self.set_status(200) self.finish(RequestHandler.debug_loader.get_template('debug.html').render( data=self.log.get_debug_info(), output_data=self.get_data(), size=sys.getsizeof, get_params=lambda x: urlparse.parse_qs(x, keep_blank_values=True), pretty_json=lambda x: json.dumps(x, sort_keys=True, indent=4, ensure_ascii=False), pretty_xml=lambda x: to_unicode(tostring(x.getroot() if hasattr(x, 'getroot') else x, pretty_print=True, encoding='UTF-8')), to_unicode=to_unicode, dumper=dump, format_exception=lambda x: "".join(traceback.format_exception(*x)) )) def complete(self, output_data=None): def finished_cb(handler, data): handler.log.complete_logging(handler.get_status()) if handler.debug_type == _DEBUG_ALL: self.finish_with_debug() return self.finish(data) if self.postprocessors: last = len(self.postprocessors) - 1 def add_cb(index): if index == last: return finished_cb else: def _cb(handler, data): self.postprocessors[index + 1](handler, data, add_cb(index + 1)) return _cb self.postprocessors[0](self, output_data, add_cb(0)) else: finished_cb(self, output_data) def fetch_requests(self, requests, callback=None, stage='page'): self.log.stage_started(stage) requests = make_list(requests) def _finish_cb(): self.log.stage_complete(stage) if callback is not None: callback() ag = AsyncGroup(_finish_cb, self.log.debug, name=stage) def _on_fetch(response, name): content_type = response.headers.get('Content-Type', '').split(';')[0] response.data = None try: if 'xml' in content_type: response.data = parse_xml(response) elif content_type == 'application/json': response.data = parse_json(response) except: self.log.warning('Could not parse response with Content-Type header') if response.data is not None: self.add(name, response.data) self.responses[name] = response self.log.request_complete(response) for req in requests: if isinstance(req, (tuple, list)): assert len(req) in (2, 3) req = self.make_request(name=req[0], method='GET', full_url=req[1], data=req[2] if len(req) == 3 else '') self.log.request_started(req) self.http_client.fetch(req, ag.add(partial(_on_fetch, name=req.name)))
[docs] def make_request(self, name, method='GET', full_url=None, url_prefix=None, path='', data='', headers=None, connect_timeout=1, request_timeout=2, follow_redirects=True, **kwargs): """ Class for easier constructing ``tornado.httpclient.HTTPRequest`` object. Request url could be constructed with two ways: * ``full_url`` argument * ``url_prefix`` as domain part and ``path`` as path part :param string name: Name of the request (for later accessing response through ``self.responses.get(name)``) :param string method: HTTP method, e.g. "GET" or "POST" :param string full_url: Full url for the requesting server (ex. ``http://example.com``) :param string url_prefix: Request url domain part :param string path: Request url path part :param data: Query to be passed to the request (could be a dict and would be translated to a query string) :type data: `string` or `dict` :param headers: Additional HTTP headers to pass on the request :type headers: ``tornado.httputil.HTTPHeaders`` or `dict` :param float connect_timeout: Timeout for initial connection in seconds :param float request_timeout: Timeout for entire request in seconds :param bool follow_redirects: Should redirects be followed automatically or return the 3xx response? :param kwargs: any other ``tornado.httpclient.HTTPRequest`` arguments :return: ``tornado.httpclient.HTTPRequest`` """ if (full_url is None) == (url_prefix is None): raise TypeError('make_request required path/url_prefix arguments pair or full_url argument') if full_url is not None and path != '': raise TypeError("Can't combine full_url and path arguments") scheme = 'http' query = '' body = None if full_url is not None: parsed_full_url = urlparse.urlsplit(full_url) scheme = parsed_full_url.scheme url_prefix = parsed_full_url.netloc path = parsed_full_url.path query = parsed_full_url.query if method in ['GET', 'HEAD', 'DELETE']: parsed_query = urlparse.parse_qs(query) parsed_query.update(data if isinstance(data, dict) else urlparse.parse_qs(data)) query = make_qs(parsed_query) else: body = make_qs(data) if isinstance(data, dict) else data headers = {} if headers is None else headers headers.update({ 'X-Forwarded-For': real_ip(self.request), 'X-Request-Id': self.request_id, 'Content-Type': headers.get('Content-Type', 'application/x-www-form-urlencoded') }) req = tornado.httpclient.HTTPRequest( url=urlparse.urlunsplit((scheme, url_prefix, path, query, '')), method=method, headers=headers, body=body, connect_timeout=connect_timeout*options.tortik_timeout_multiplier, request_timeout=request_timeout*options.tortik_timeout_multiplier, follow_redirects=follow_redirects, **kwargs ) req.name = name return req
def add_preprocessor(self, preprocessor): self.preprocessors.append(preprocessor) def add_postprocessor(self, postprocessor): self.postprocessors.append(postprocessor)