# -*- coding: utf-8 -*- import cjson import time import nltk def handler(function,data,callback): try: data = cjson.decode(data) except: return fail("invalid input json for : %s" % data,callback) if(function == 'sentences'): if isinstance(data, basestring): result = sentences_function(data) else: return fail('invalid input, string expected',callback) elif(function == 'tokens'): if isinstance(data,list): result = tokens_function(data) else: return fail('invalid input, string expected',callback) elif(function == 'tags'): if isinstance(data,list): result = tags_function(data) else: return fail('invalid input, list expected',callback) else: return fail('unknown function: %s' % function,callback) return response(result,callback) def fail(message,callback): result = json_fail(message) return response(result,callback) def response(result,callback): if callback != '': return callback +'(' + result + ');' else: return result def json_envelope(data, start_time): envelope = '{"success":true, "time":%f, "result":%s}' % (time.clock()-start_time,data) return envelope.encode('UTF-8') def json_list(lst): return '[' + ", ".join(['"' + s + '"' for s in lst]) + ']' def json_fail(message): envelope = '{"success":false, "message":"%s"}' % (message) return envelope.encode('UTF-8') def sentences_function(text): start_time = time.clock() tokens = nltk.sent_tokenize(text) result = json_envelope(json_list(tokens),start_time) return (result) def tokens_function(sentences): start_time = time.clock() tokens_list = [nltk.word_tokenize(sentence) for sentence in sentences] json = '[' + ", ".join([json_list(tokens) for tokens in tokens_list]) + ']' result = json_envelope(json,start_time) return (result) def tags_function(tokens_list): start_time = time.clock() tagged_tokens = [nltk.pos_tag(tokens) for tokens in tokens_list] json = '[' + ", ".join([render_json(tokens) for tokens in tagged_tokens]) + ']' result = json_envelope(json,start_time) return (result) def render_json(result): return '[' + ", ".join(['{"token":"%s","pos":"%s"}' % (w,f) for (w,f) in result]) + ']'