-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit bd6cf40
Showing
9 changed files
with
234 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.DS_Store | ||
*.pyc | ||
*.swp | ||
.ropeproject |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
FROM ubuntu:14.04 | ||
|
||
MAINTAINER John Beieler <[email protected]> | ||
|
||
RUN apt-get update && apt-get install -y git python-dev python-pip | ||
|
||
ADD . /src | ||
|
||
RUN cd /src; pip install -r requirements.txt | ||
|
||
EXPOSE 5002 | ||
|
||
CMD ["python", "/src/app.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
hypnos | ||
====== | ||
|
||
A RESTful API around the [PETRARCH](https://github.com/openeventdata/petrarch) | ||
event data coder. Using `docker compose`, this setup also integrates the | ||
Stanford [CoreNLP](http://nlp.stanford.edu/software/corenlp.shtml) parser | ||
using Casey Hilland's [docker container](https://github.com/chilland/ccNLP). | ||
This setup allows the user to stream texts into the API, rather than the | ||
batch mode seen in applications such as the [Phoenix pipeline](https://github.com/openeventdata/phoenix_pipeline). | ||
|
||
Running | ||
------- | ||
|
||
Running the system is as simple as using | ||
|
||
`docker-compose up` | ||
|
||
or | ||
|
||
`docker-compose up -d` | ||
|
||
to run in the background. | ||
|
||
This assumes that you have `docker-compose` and `docker` installed. | ||
|
||
Usage | ||
----- | ||
|
||
``` | ||
headers = {'Content-Type': 'application/json'} | ||
data = {'text': "At least 37 people are dead after Islamist radical group Boko | ||
Haram assaulted a town in northeastern Nigeria.", 'id': 'abc123', 'date': | ||
'20010101'} | ||
data = json.dumps(data) | ||
r = requests.get('http://localhost:5002/siesta/extract', data=data, | ||
headers=headers) | ||
r.json() | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import os | ||
import json | ||
import requests | ||
from tornado.ioloop import IOLoop | ||
from tornado.wsgi import WSGIContainer | ||
from tornado.httpserver import HTTPServer | ||
from flask import Flask, jsonify, make_response | ||
from flask.ext.restful import Api, Resource, reqparse | ||
|
||
app = Flask(__name__) | ||
api = Api(app) | ||
|
||
cwd = os.path.abspath(os.path.dirname(__file__)) | ||
|
||
|
||
@app.errorhandler(400) | ||
def bad_request(error): | ||
return make_response(jsonify({'error': 'Bad request'}), 400) | ||
|
||
|
||
@app.errorhandler(404) | ||
def not_found(error): | ||
return make_response(jsonify({'error': 'Not found'}), 404) | ||
|
||
|
||
class ExtractAPI(Resource): | ||
def __init__(self): | ||
self.reqparse = reqparse.RequestParser() | ||
self.reqparse.add_argument('text', type=unicode, location='json') | ||
self.reqparse.add_argument('id', type=unicode, location='json') | ||
self.reqparse.add_argument('date', type=unicode, location='json') | ||
super(ExtractAPI, self).__init__() | ||
|
||
def get(self): | ||
args = self.reqparse.parse_args() | ||
text = args['text'] | ||
text = text.encode('utf-8') | ||
storyid = args['id'] | ||
date = args['date'] | ||
|
||
headers = {'Content-Type': 'application/json'} | ||
core_data = json.dumps({'text': text}) | ||
ccnlp = os.environ['CCNLP_PORT_5000_TCP_ADDR'] | ||
ccnlp_url = 'http://{}:5000/process'.format(ccnlp) | ||
r = requests.post(ccnlp_url, data=core_data, headers=headers) | ||
out = r.json() | ||
|
||
event_dict = process_corenlp(out, date, storyid) | ||
|
||
events_data = json.dumps({'events': event_dict}) | ||
print(events_data) | ||
petr = os.environ['PETRARCH_PORT_5001_TCP_ADDR'] | ||
petr_url = 'http://{}:5001/petrarch/code'.format(petr) | ||
events_r = requests.post(petr_url, data=events_data, headers=headers) | ||
event_updated = events_r.json() | ||
|
||
return event_updated | ||
|
||
|
||
def process_corenlp(output, date, STORYID): | ||
event_dict = {STORYID: {}} | ||
event_dict[STORYID]['sents'] = {} | ||
event_dict[STORYID]['meta'] = {} | ||
event_dict[STORYID]['meta']['date'] = date | ||
for i, sent in enumerate(output['sentences']): | ||
sents = output['sentences'] | ||
event_dict[STORYID]['sents'][i] = {} | ||
event_dict[STORYID]['sents'][i]['content'] = ' '.join(sents[i]['tokens']) | ||
event_dict[STORYID]['sents'][i]['parsed'] = sents[i]['parse'].upper().replace(')', ' )') | ||
|
||
return event_dict | ||
|
||
api.add_resource(ExtractAPI, '/siesta/extract') | ||
|
||
if __name__ == '__main__': | ||
http_server = HTTPServer(WSGIContainer(app)) | ||
http_server.listen(5002) | ||
IOLoop.instance().start() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
siesta: | ||
build: . | ||
links: | ||
- ccnlp | ||
- petrarch | ||
ports: | ||
- "5002:5002" | ||
ccnlp: | ||
image: caerusassociates/ccnlp:1.0.0 | ||
petrarch: | ||
build: petrarch/. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
FROM ubuntu:14.04 | ||
|
||
MAINTAINER John Beieler <[email protected]> | ||
|
||
RUN apt-get update && apt-get install -y git python-dev python-pip | ||
|
||
RUN pip install git+https://github.com/openeventdata/petrarch.git | ||
|
||
ADD . /src | ||
|
||
RUN cd /src; pip install -r requirements.txt | ||
|
||
EXPOSE 5001 | ||
|
||
CMD ["python", "/src/petrarch_app.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import os | ||
from petrarch import petrarch | ||
from tornado.ioloop import IOLoop | ||
from tornado.wsgi import WSGIContainer | ||
from tornado.httpserver import HTTPServer | ||
from flask import Flask, jsonify, make_response | ||
from flask.ext.restful import Api, Resource, reqparse | ||
|
||
app = Flask(__name__) | ||
api = Api(app) | ||
|
||
cwd = os.path.abspath(os.path.dirname(__file__)) | ||
|
||
|
||
@app.errorhandler(400) | ||
def bad_request(error): | ||
return make_response(jsonify({'error': 'Bad request'}), 400) | ||
|
||
|
||
@app.errorhandler(404) | ||
def not_found(error): | ||
return make_response(jsonify({'error': 'Not found'}), 404) | ||
|
||
|
||
|
||
class CodeAPI(Resource): | ||
def __init__(self): | ||
self.reqparse = reqparse.RequestParser() | ||
self.reqparse.add_argument('events', type=dict) | ||
super(CodeAPI, self).__init__() | ||
|
||
def post(self): | ||
args = self.reqparse.parse_args() | ||
event_dict = args['events'] | ||
|
||
print(event_dict) | ||
event_dict_updated = petrarch.do_coding(event_dict, None) | ||
|
||
return event_dict_updated | ||
|
||
|
||
api.add_resource(CodeAPI, '/petrarch/code') | ||
|
||
if __name__ == '__main__': | ||
config = petrarch.utilities._get_data('data/config/', 'PETR_config.ini') | ||
print("reading config") | ||
petrarch.PETRreader.parse_Config(config) | ||
print("reading dicts") | ||
petrarch.read_dictionaries() | ||
|
||
http_server = HTTPServer(WSGIContainer(app)) | ||
http_server.listen(5001) | ||
IOLoop.instance().start() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
pytest==2.6.3 | ||
requests==2.4.3 | ||
Flask==0.10.1 | ||
Flask-RESTful==0.3.3 | ||
Flask-HTTPAuth==2.5.0 | ||
itsdangerous==0.24 | ||
Jinja2==2.7.3 | ||
MarkupSafe==0.23 | ||
tornado==4.2 | ||
simplejson==3.6.5 | ||
Werkzeug==0.10.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
pytest==2.6.3 | ||
requests==2.4.3 | ||
Flask==0.10.1 | ||
Flask-RESTful==0.3.3 | ||
Flask-HTTPAuth==2.5.0 | ||
itsdangerous==0.24 | ||
Jinja2==2.7.3 | ||
MarkupSafe==0.23 | ||
tornado==4.2 | ||
simplejson==3.6.5 | ||
Werkzeug==0.10.4 |