apiserver

#!/usr/bin/env python3
#encoding: utf-8

import autovenv
autovenv.run()

import string
from functools import cmp_to_key
from urllib.parse import urlparse, unquote

import couchdb
import requests
from couchdb.http import ResourceNotFound
from flask import Flask, Response, request, url_for
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
from flask_restful import Resource, Api
from icu import Locale, RuleBasedCollator
from werkzeug.exceptions import NotFound, Forbidden

from utils import *
from config import *

QUERY_LIMIT = 9999

# App Setup
# ////////////////////////////////////////////////////////////////////////////

app = Flask(__name__)
api = Api(app)
limiter = Limiter(app=app, key_func=get_remote_address)


# CouchDB Setup
# ////////////////////////////////////////////////////////////////////////////

couch = couchdb.Server(DB_SERVER)
db = couch[DB_NAME]


class CouchDocumentAccessor(object):
    def __init__(self, id):
        self.id = id
    def __getitem__(self, item):
        return self.get(item)
    def get(self, item, default=''):
        return db[self.id].get(item, default)
    def items(self):
        return db[self.id].items()
config = CouchDocumentAccessor('config:api')


# Google administrative mumbo jumbo
# ////////////////////////////////////////////////////////////////////////////

@app.route('/robots.txt')
def robots():
    '''
    Make sure google feels comfortable
    '''
    return 'User-agent: *\nDisallow:'

@app.route('/' + GOOGLE_VERIFICATION)
def google_verification():
    '''
    Keep the googles happy
    '''
    return f'google-site-verification: {GOOGLE_VERIFICATION}'


# Drive change notification
# ////////////////////////////////////////////////////////////////////////////

@app.route(API_NOTIFICATION_PATH, methods=['POST'])
def notify():
    '''
    Invoke content loader upon change notification
    '''
    header = lambda name: request.headers.get(name, '')

    # Before doing anything, verify the notification token
    if header('X-Goog-Channel-Token') != API_NOTIFICATION_TOKEN:
        raise Forbidden

    state = header('X-Goog-Resource-State')
    channel_id = header('X-Goog-Channel-Id')
    resource_id = header('X-Goog-Resource-Id')

    # Save new notification ids so they can be cancelled
    if state == 'sync':
        log(f"sync: Channel-Id: {channel_id} Resource-Id: {resource_id}")
        try:
            channels = db['config:notification-channels']
            channels[channel_id] = resource_id
        except ResourceNotFound:
            channels = {channel_id: resource_id}
            channels.update(_id='config:notification-channels', type='config', slug='notification-channels')
        db.save(channels)

    # Attempt to invoke the content loader
    elif state == 'change':
        log(f"changed: Channel-Id: {channel_id} Resource-Id: {resource_id}")
        body = request.get_json()
        if body:
            with script_directory():
                change_number = str(int(body['id']))
                venv_run("contentloader", "--change-id", change_number)

    # Allow manual loading invocations from control panel
    elif state == 'load':
        body = request.get_json()
        if body and 'document_id' in body:
            doc_id = body['document_id']
            log(f"update: {doc_id}")
            with script_directory():
                venv_run("contentloader", "--id", doc_id)

    return '', 204


# Output filtering
# ////////////////////////////////////////////////////////////////////////////

# Used for setting module-type
module_types = [t['one'] for t in config['types-tool']]

# Clean strings of punctuation for better ordering
punctuation_table = str.maketrans('', '', string.punctuation + '¿¡‘’“”')
cleaned_string = lambda i: str(i or '').translate(punctuation_table)

def filter_output(resources):
    '''
    Filter the output by language, merge x-language keys, and hide keys
    '''
    # Allow passing in a single resource to get a single resource back
    single = False
    if isinstance(resources, dict):
        single = True
        resources = [resources]

    language_all = config['language-all']
    language_default = config['language-default']
    language_omit = set(config['language-omit'])
    language_ignore_missing = language_omit | {
        # Added during standard processing
        'lang', 'type', 'title', 'slug', 'timestamp',
        'document_id', 'document_link', 'document_title',
        # Added during custom filtering
        'byline', 'email-available',
    }

    # Get one valid language no matter what
    lang = request.args.get('lang')
    if lang not in language_all:
        lang = language_default

    # Identify the keys which should be hidden from the API
    # HTTPS/localhost connections can circumvent w/an X-API-Admin-Token: header
    if request.headers.get('x-api-admin-token') == API_ADMIN_TOKEN:
        private_keys = {}
    else:
        private_keys = {T['one']: T['private'] for T in config['types'] if 'private' in T}

    output = []
    for resource in resources:
        # Remove the translations for overlaying or discarding
        translations = resource.pop('translations', {})
        translated = translations.get(lang)

        if lang == language_default:
            # Default language has no lang-missing
            language_present = set(resource.keys())

        elif translated:
            # Keep track of the keys which are present in this translation
            language_present = set(translated.keys())

            # Default language content it here overwritten with translated content
            resource.update({k:v for k,v in translated.items() if k not in language_omit})

            # I believe this sets lang for pieces where the translations were created
            # artificially (using suffixed keys) and lang isn't present. If that's the
            # case it probably belongs in the contentloader because this won't be accurate
            resource['lang'] = lang

        else:
            # This is an untranslated piece
            language_present = set()
        
        # Remove private keys
        if 'type' in resource and resource['type'] in private_keys:
            for key in private_keys[resource['type']]:
                resource.pop(key, None)

        # Remove internal keys
        internal = [k for k in resource if k.startswith('_')]
        [resource.pop(k) for k in internal]

        # Remove google doc name (no longer do any clients need it)
        resource.pop('document_title', None)

        # Produce a proper list of all lang-missing keys. This requires taking into account
        # all of the keys generated by the contentloader which may not be present when a
        # translated document is generated only from language-suffixed keys.
        resource['lang-missing'] = lang_missing = [k for k in (
                set(resource.keys())
                - {resource['type']}
                - language_present
                - language_ignore_missing
            ) if not k.startswith('_')]

        # Produce an effective module type based on lang-missing
        # The module-type is initially produced by the document title in the contentloader
        # but may be effectively a lesser type when portions are missing from translation.
        if resource['type'] in module_types:
            module_type = module_type_effective = resource['module-type']
            # We have to be sure not to promote a piece to gallery if it's meant to be a
            # snapshot but has a full-write-up in language_default and possibly has
            # 'full-write-up' in lang_missing. Team ed. wants the filename to set the type.
            if module_type == 'full' and 'full-write-up' in lang_missing:
                module_type_effective = 'gallery'
            if 'short-write-up' in lang_missing:
                module_type_effective = 'snapshot'
            if 'snapshot' in lang_missing:
                module_type_effective = 'untranslated'
                # This relates to the comment above about the lang attribute. An underlying
                # assumption here might be that an untranslated piece means the original is
                # in English and that is not necessarily always true.
                resource['lang'] = language_default
            resource['module-type-effective'] = module_type_effective

        # List available languages
        resource['langs-available'] = list(translations.keys() | {language_default})

        output.append(resource)

    # Return an dict or list of dicts as appropriate
    if single:
        output = output or [{}]
        return output[0]

    # Allow sorting by query param 'orderby=keyname' (if possible)
    try:
        orderby = request.args.get('orderby', 'title')
        compare = RuleBasedCollator.createInstance(Locale(lang)).compare
        keyfunc = cmp_to_key(lambda a, b: compare(cleaned_string(a.get(orderby)), cleaned_string(b.get(orderby))))
        reverse = request.args.get('reverse', '').lower() == 'true'
        output = sorted(output, key=keyfunc, reverse=reverse)
    except: pass

    # Allow basic pagination by query params 'limit=N_per_page&page=N'
    # A sentinel value of [current_page, total_pages] appears at the end
    try:
        limit = abs(int(request.args['limit']))
        page = abs(int(request.args.get('page', 1)))
        total = len(output) // limit + bool(len(output) % limit)
        output = output[(page-1) * limit: (page-1) * limit + limit] + [[page, total]]
    except: pass
    else:
        if not 0 < page <= total:
            raise NotFound

    return output


# API Endpoints
# ////////////////////////////////////////////////////////////////////////////

@app.after_request
def add_cors_headers(response):
    '''
    Tell browsers it's okay to load from this resource
    '''
    response.headers.add('Access-Control-Allow-Origin', '*')
    response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
    response.headers.add('Access-Control-Allow-Methods', 'GET')
    return response


# Get content types and create endpoints
types = {T['one']: T['many'] for T in config['types']}
for singular_name, plural_name in types.items():
    class Type(Resource):
        T = singular_name
        def get(self, slug=None):
            try:
                if slug:
                    query = {'limit': 1, 'selector': {'type': self.T, 'slug': slug}}
                    result = [*db.find(query)][0]
                else:
                    query = {'limit': QUERY_LIMIT, 'selector': {'type': self.T}}
                    result = [*db.find(query)]
                return filter_output(result)
            except (KeyError, IndexError, ResourceNotFound): raise NotFound
    api.add_resource(Type, f'{API_PATH}/{singular_name}/', endpoint=singular_name)
    api.add_resource(Type, f'{API_PATH}/{singular_name}/<slug>', endpoint=f'{singular_name}/slug')
    api.add_resource(Type, f'{API_PATH}/{plural_name}/', endpoint=f'{plural_name} (DEPRECATED, use: {API_PATH}/{singular_name}/)')


# Get the grouped types and create endpoints
groups = {k.split('-',1)[-1]: [d['one'] for d in v] for k, v in config.items() if k.startswith('types-')}
for group, types in groups.items():
    class Group(Resource):
        T_select_or = {'$or': [{'type': T} for T in types]}
        def get(self, slug=None):
            try:
                if slug:
                    query = {'limit': 1, 'selector': {'$and': [self.T_select_or, {'slug': slug}]}}
                    result = [*db.find(query)][0]
                else:
                    query = {'limit': QUERY_LIMIT, 'selector': self.T_select_or}
                    result = [*db.find(query)]
                return filter_output(result)
            except (KeyError, IndexError, ResourceNotFound): raise NotFound
    api.add_resource(Group, f'{API_PATH}/{group}/', endpoint=group)
    api.add_resource(Group, f'{API_PATH}/{group}/<slug>', endpoint=f'{group}/slug')


# Pragmatic tools + people endpoint for BT2020 website
class Content(Resource):
    web_content_types = [t['one'] for t in config['types-tool']] + ['person']
    T_select_or = {'$or': [{'type': T} for T in web_content_types]}
    def get(self, slug=None):
        try:
            return filter_output([*db.find({'limit': QUERY_LIMIT, 'selector': self.T_select_or})])
        except (KeyError, IndexError, ResourceNotFound): raise NotFound
api.add_resource(Content, f'{API_PATH}/content/', endpoint='content')


# Just emit all the documents
class All(Resource):
    def get(self):
        try:
            return filter_output([row['doc'] for row in db.view('_all_docs', include_docs=True)])
        except (KeyError, ResourceNotFound): raise NotFound
api.add_resource(All, f'{API_PATH}/all/', endpoint='all')


# Expose the config for front-ends
class Config(Resource):
    def get(self):
        return filter_output(dict(config.items()))
api.add_resource(Config, f'{API_PATH}/config')


# Expose the endpoints themselves
class Endpoints(Resource):
    def get(self):
        return {rule.endpoint: {
                'url': unquote(url_for(rule.endpoint, **{a: f'<{a}>' for a in rule.arguments})),
                'methods': list(rule.methods),
            } for rule in app.url_map.iter_rules()}
api.add_resource(Endpoints, f'{API_PATH}/')


# Run when invoked from the command line
# ////////////////////////////////////////////////////////////////////////////

if __name__ == '__main__':
    app.run(port=6000 + DEBUG + DEVELOP, debug=DEBUG)