Skip to content

Commit

Permalink
First commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnb30 committed Jul 15, 2015
0 parents commit bd6cf40
Show file tree
Hide file tree
Showing 9 changed files with 234 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.DS_Store
*.pyc
*.swp
.ropeproject
13 changes: 13 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM ubuntu:14.04

MAINTAINER John Beieler <[email protected]>

RUN apt-get update && apt-get install -y git python-dev python-pip

ADD . /src

RUN cd /src; pip install -r requirements.txt

EXPOSE 5002

CMD ["python", "/src/app.py"]
38 changes: 38 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
hypnos
======

A RESTful API around the [PETRARCH](https://github.com/openeventdata/petrarch)
event data coder. Using `docker compose`, this setup also integrates the
Stanford [CoreNLP](http://nlp.stanford.edu/software/corenlp.shtml) parser
using Casey Hilland's [docker container](https://github.com/chilland/ccNLP).
This setup allows the user to stream texts into the API, rather than the
batch mode seen in applications such as the [Phoenix pipeline](https://github.com/openeventdata/phoenix_pipeline).

Running
-------

Running the system is as simple as using

`docker-compose up`

or

`docker-compose up -d`

to run in the background.

This assumes that you have `docker-compose` and `docker` installed.

Usage
-----

```
headers = {'Content-Type': 'application/json'}
data = {'text': "At least 37 people are dead after Islamist radical group Boko
Haram assaulted a town in northeastern Nigeria.", 'id': 'abc123', 'date':
'20010101'}
data = json.dumps(data)
r = requests.get('http://localhost:5002/siesta/extract', data=data,
headers=headers)
r.json()
```
78 changes: 78 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import os
import json
import requests
from tornado.ioloop import IOLoop
from tornado.wsgi import WSGIContainer
from tornado.httpserver import HTTPServer
from flask import Flask, jsonify, make_response
from flask.ext.restful import Api, Resource, reqparse

app = Flask(__name__)
api = Api(app)

cwd = os.path.abspath(os.path.dirname(__file__))


@app.errorhandler(400)
def bad_request(error):
return make_response(jsonify({'error': 'Bad request'}), 400)


@app.errorhandler(404)
def not_found(error):
return make_response(jsonify({'error': 'Not found'}), 404)


class ExtractAPI(Resource):
def __init__(self):
self.reqparse = reqparse.RequestParser()
self.reqparse.add_argument('text', type=unicode, location='json')
self.reqparse.add_argument('id', type=unicode, location='json')
self.reqparse.add_argument('date', type=unicode, location='json')
super(ExtractAPI, self).__init__()

def get(self):
args = self.reqparse.parse_args()
text = args['text']
text = text.encode('utf-8')
storyid = args['id']
date = args['date']

headers = {'Content-Type': 'application/json'}
core_data = json.dumps({'text': text})
ccnlp = os.environ['CCNLP_PORT_5000_TCP_ADDR']
ccnlp_url = 'http://{}:5000/process'.format(ccnlp)
r = requests.post(ccnlp_url, data=core_data, headers=headers)
out = r.json()

event_dict = process_corenlp(out, date, storyid)

events_data = json.dumps({'events': event_dict})
print(events_data)
petr = os.environ['PETRARCH_PORT_5001_TCP_ADDR']
petr_url = 'http://{}:5001/petrarch/code'.format(petr)
events_r = requests.post(petr_url, data=events_data, headers=headers)
event_updated = events_r.json()

return event_updated


def process_corenlp(output, date, STORYID):
event_dict = {STORYID: {}}
event_dict[STORYID]['sents'] = {}
event_dict[STORYID]['meta'] = {}
event_dict[STORYID]['meta']['date'] = date
for i, sent in enumerate(output['sentences']):
sents = output['sentences']
event_dict[STORYID]['sents'][i] = {}
event_dict[STORYID]['sents'][i]['content'] = ' '.join(sents[i]['tokens'])
event_dict[STORYID]['sents'][i]['parsed'] = sents[i]['parse'].upper().replace(')', ' )')

return event_dict

api.add_resource(ExtractAPI, '/siesta/extract')

if __name__ == '__main__':
http_server = HTTPServer(WSGIContainer(app))
http_server.listen(5002)
IOLoop.instance().start()
11 changes: 11 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
siesta:
build: .
links:
- ccnlp
- petrarch
ports:
- "5002:5002"
ccnlp:
image: caerusassociates/ccnlp:1.0.0
petrarch:
build: petrarch/.
15 changes: 15 additions & 0 deletions petrarch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM ubuntu:14.04

MAINTAINER John Beieler <[email protected]>

RUN apt-get update && apt-get install -y git python-dev python-pip

RUN pip install git+https://github.com/openeventdata/petrarch.git

ADD . /src

RUN cd /src; pip install -r requirements.txt

EXPOSE 5001

CMD ["python", "/src/petrarch_app.py"]
53 changes: 53 additions & 0 deletions petrarch/petrarch_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
from petrarch import petrarch
from tornado.ioloop import IOLoop
from tornado.wsgi import WSGIContainer
from tornado.httpserver import HTTPServer
from flask import Flask, jsonify, make_response
from flask.ext.restful import Api, Resource, reqparse

app = Flask(__name__)
api = Api(app)

cwd = os.path.abspath(os.path.dirname(__file__))


@app.errorhandler(400)
def bad_request(error):
return make_response(jsonify({'error': 'Bad request'}), 400)


@app.errorhandler(404)
def not_found(error):
return make_response(jsonify({'error': 'Not found'}), 404)



class CodeAPI(Resource):
def __init__(self):
self.reqparse = reqparse.RequestParser()
self.reqparse.add_argument('events', type=dict)
super(CodeAPI, self).__init__()

def post(self):
args = self.reqparse.parse_args()
event_dict = args['events']

print(event_dict)
event_dict_updated = petrarch.do_coding(event_dict, None)

return event_dict_updated


api.add_resource(CodeAPI, '/petrarch/code')

if __name__ == '__main__':
config = petrarch.utilities._get_data('data/config/', 'PETR_config.ini')
print("reading config")
petrarch.PETRreader.parse_Config(config)
print("reading dicts")
petrarch.read_dictionaries()

http_server = HTTPServer(WSGIContainer(app))
http_server.listen(5001)
IOLoop.instance().start()
11 changes: 11 additions & 0 deletions petrarch/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pytest==2.6.3
requests==2.4.3
Flask==0.10.1
Flask-RESTful==0.3.3
Flask-HTTPAuth==2.5.0
itsdangerous==0.24
Jinja2==2.7.3
MarkupSafe==0.23
tornado==4.2
simplejson==3.6.5
Werkzeug==0.10.4
11 changes: 11 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pytest==2.6.3
requests==2.4.3
Flask==0.10.1
Flask-RESTful==0.3.3
Flask-HTTPAuth==2.5.0
itsdangerous==0.24
Jinja2==2.7.3
MarkupSafe==0.23
tornado==4.2
simplejson==3.6.5
Werkzeug==0.10.4

0 comments on commit bd6cf40

Please sign in to comment.