[tornado] Async handlers, code refactor

Let's start with sync implementation:

import json

from tornado import options
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler, Application
from tornado.httpclient import HTTPClient, HTTPError


class FBHandler(RequestHandler):

    FB_GRAPH_ME_URL = 'https://graph.facebook.com/me?fields=id&access_token={fb_token}'

    def get(self):
        fb_id = self.get_argument('fb_id')
        fb_token = self.get_argument('fb_token')
        url = self.FB_GRAPH_ME_URL.format(fb_token=fb_token)
        http_client = HTTPClient()
        try:
            response = http_client.fetch(request=url, method='GET')
            if json.loads(response.body).get('id') == fb_id:
                self.write('Ok')
                return
        except HTTPError:
            pass
        self.write('Fail')


if __name__ == '__main__':
    options.parse_command_line()
    application = Application(
        handlers=[
            (r'/', FBHandler),
        ],
        debug=True,
    )
    application.listen(8000)
    IOLoop.instance().start()

Make it async:

import json

from tornado import options
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler, Application, asynchronous
from tornado.httpclient import AsyncHTTPClient, HTTPError


class FBHandler(RequestHandler):

    FB_GRAPH_ME_URL = 'https://graph.facebook.com/me?fields=id&access_token={fb_token}'

    @asynchronous
    def get(self):
        self.fb_id = self.get_argument('fb_id')
        fb_token = self.get_argument('fb_token')
        url = self.FB_GRAPH_ME_URL.format(fb_token=fb_token)
        http_client = AsyncHTTPClient()
        try:
            response = http_client.fetch(
                request=url, method='GET', callback=self.on_fetch)
        except HTTPError:
            self.write('Fail')

    def on_fetch(self, response):
        if json.loads(response.body).get('id') == self.fb_id:
            self.write('Ok')
        else:
            self.write('Fail')
        self.finish()


...

Add gen sugar:

import json

from tornado import options,
gen
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler, Application
from tornado.httpclient import AsyncHTTPClient, HTTPError


class FBHandler(RequestHandler):

    FB_GRAPH_ME_URL = 'https://graph.facebook.com/me?fields=id&access_token={fb_token}'

    @gen.coroutine
    def get(self):
        fb_id = self.get_argument('fb_id')
        fb_token = self.get_argument('fb_token')
        url = self.FB_GRAPH_ME_URL.format(fb_token=fb_token)
        http_client = AsyncHTTPClient()
        try:
            response = yield http_client.fetch(request=url, method='GET')
            if json.loads(response.body).get('id') == fb_id:
                self.write('Ok')
                return
            self.write('Fail')
        except HTTPError:
            pass
        self.write('Fail')


...

Hold handlers as simple as possible:

import json

from tornado import options, gen
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler, Application
from tornado.httpclient import AsyncHTTPClient, HTTPError


class FBHandler(RequestHandler):

    @gen.coroutine
    def get(self):
        try:
            fb_id = self.get_argument('fb_id')
            fb_token = self.get_argument('fb_token')
            yield self.validate(fb_id=fb_id, fb_token=fb_token)
            self.write('Ok')
        except Exception:
            self.write('Fail')

    @gen.coroutine
    def validate(self, fb_id, fb_token):
        FB_GRAPH_ME_URL = 'https://graph.facebook.com/me?fields=id&access_token={fb_token}'

        url = FB_GRAPH_ME_URL.format(fb_token=fb_token)
        http_client = AsyncHTTPClient()
        response = yield http_client.fetch(request=url, method='GET')
        assert json.loads(response.body).get('id') == fb_id
        raise gen.Return(True)


...

[Django] Save model instance into json dict

I need to save a dict with numbers, text and django model instances. And I don't know which model instances may be present in the dict.
data = {
    'count': 10,
    'title': 'Example title',
    'user': request.user,
}

or

data = {
    'content': 'Example content',
    'site': Site.objects.get_current(),
}

If I try to dump dicts above, I'll get TypeError Exception:
>>> import json
>>> json.dumps(data)
TypeError: <User: exampleuser> is not JSON serializable

Possible solution:
from django.contrib.contenttypes.models import ContentType
from django.db.models import Model


def encode(data):
    new_data = dict(data)
    for node, value in data.iteritems():
        if isinstance(value, Model):
            node_type = ContentType.objects.get_for_model(value.__class__)
            new_data[node] = {
                'app_label': node_type.app_label,
                'model': node_type.model,
                'id': value.id}
    return new_data

def decode(data):
    new_data = dict(data)
    for node, value in data.iteritems():
        if not isinstance(value, dict):
            continue
        if 'app_label' in value and 'model' in value and 'id' in value:
            user_type = ContentType.objects.get(
                app_label=value['app_label'],
                model=value['model'])
            new_data[node] = user_type.get_object_for_this_type(id=value['id'])
    return new_data

>>> data
{'count': 10, 'user': <User: exampleuser>, 'title': 'Example title'}
>>> json.dumps(data)
TypeError: <User: exampleuser> is not JSON serializable
>>> encoded = json.dumps(encode(data))
>>> encoded
'{"count": 10, "user": {"model": "user", "id": 1, "app_label": "auth"}, "title": "Example title"}'
>>> decode(json.loads(encoded))
{u'count': 10, u'user': <User: exampleuser>, u'title': u'Example title'}

Lightweight xml parser

Dom parsers may consume over gigabytes of memory while parsing big amounts of xml data, sax parser works more effectively. This is an example how to use sax to transform necessary data from xml into python data object.

TEST_DATASET = [{
    # no keys specified
    'xml': """
<xml>
  <data>test</data>
</xml>""",
    'keys': [],
    'lists': [],
    'data': {},
    }, {
    # nonexistent key
    'xml': """
<xml>
  <data>test</data>
</xml>""",
    'keys': ['xml.data', 'xml.nonexistent'],
    'lists': [],
    'data': {'xml': {'data': 'test'}},
    }, {
    # attributes
    'xml': """
<xml>
  <data id="100"/>
</xml>""",
    'keys': ['xml.data.id'],
    'lists': [],
    'data': {'xml': {'data': {'id': "100"}}},
    }, {
    # lists
    'xml': """
<xml>
  <data>
    <item id="1">
        <color>purple</color>
    </item>
    <item id="2">
        <color>cyan</color>
    </item>
  </data>
</xml>""",
    'keys': ['xml.data.item.id', 'xml.data.item.color'],
    'lists': ['xml.data.item'],
    'data': {
        'xml': {
            'data': {
                'item': [
                    {'color': 'purple', 'id': '1'},
                    {'color': 'cyan', 'id': '2'}
                ]
            }
        }
    }},
]

Gist: https://gist.github.com/nanvel/f944eae1f02d47b6d6a4

[python-social-auth] Disallow user creation

If user with returned email exists, then associate and authenticate. If not - redirect to registration page.

1. Edit authentification pipeline
SOCIAL_AUTH_PIPELINE = (
    'social.pipeline.social_auth.social_details',
    'social.pipeline.social_auth.social_uid',
    'social.pipeline.social_auth.auth_allowed',
    'social.pipeline.social_auth.social_user',
    'social.pipeline.user.get_username',
    'social.pipeline.social_auth.associate_by_email',
    'myproject.apps.accounts.utils.custom_create_user',
)

2. Email have to be specified in data returned by social account
SOCIAL_AUTH_SCOPE = ['email']

3. Redirects configuration
SOCIAL_AUTH_LOGIN_REDIRECT_URL = '/'
SOCIAL_AUTH_NEW_ASSOCIATION_REDIRECT_URL = '/'

4. custom_create_user
from django.contrib import messages
from django.shortcuts import redirect


def custom_create_user(strategy, details, response, uid, user=None, *args, **kwargs):
    """
    Replacement for social.pipeline.user.create_user.
    If user was not associated - redirect to the registration form.
    """
    if user:
        return {'is_new': False}
    messages.warning(kwargs['request'], 'You have to register first!')
    return redirect('registation')

[Django] Where to store host specific variables

1. settings/local.py

myproject/
- settings/
-- __init__.py
-- default.py
-- local.py

__init__.py:
from .default import *


try:
    from .local import *
except ImportError:
    import logging
    logger = logging.get_logger(__name__)
    logger.error('settings/local.py was not found!')

local.py shouldn't be under git index.
Useful practice to add few patterns of local.py for different hosts:
settings/local.py.development
settings/local.py.staging
settings/local.py.production

Keep in mind, don't store sensitive information in files under git index.

2. ~/.bashrc

Unix shells when starting read the .bashrc file and execute commands contained in them.
First, read file /etc/.bashrc and next - ~/.bashrc.

My .bashrc on dev laptop looks like:
export VIRTUALENV_DISTRIBUTE=true
export PIP_REQUIRE_VIRTUALENV=true
export PIP_DOWNLOAD_CACHE=$HOME/.pip/cache
export SOME_VAR=someval
To make SOME_VAR available in django.conf.settings:
# settings.py
from sys import environ

...

SOME_VAR = getattr(environ, 'SOME_VAR', <default value>)

See also:
Django settings http://nanvel.name/weblog/django-settings/