From bcdc982d7e040af5616b347c89a50404b616f025 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Mon, 22 Feb 2021 14:55:16 -0800 Subject: [PATCH 01/19] adapted for use with orpheus --- neuralqa/retriever/__init__.py | 1 + .../retriever/awselasticsearchretriever.py | 105 ++++++++++++++++++ neuralqa/retriever/retrieverpool.py | 5 +- neuralqa/server/routehandlers.py | 3 + neuralqa/server/server_app.py | 2 +- requirements.txt | 4 +- 6 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 neuralqa/retriever/awselasticsearchretriever.py diff --git a/neuralqa/retriever/__init__.py b/neuralqa/retriever/__init__.py index fa07e6f..24f4a54 100644 --- a/neuralqa/retriever/__init__.py +++ b/neuralqa/retriever/__init__.py @@ -1,4 +1,5 @@ from .retriever import * from .elasticsearchretriever import * +from .awselasticsearchretriever import * from .solrretriever import * from .retrieverpool import * diff --git a/neuralqa/retriever/awselasticsearchretriever.py b/neuralqa/retriever/awselasticsearchretriever.py new file mode 100644 index 0000000..552b01e --- /dev/null +++ b/neuralqa/retriever/awselasticsearchretriever.py @@ -0,0 +1,105 @@ +import boto3 +from requests_aws4auth import AWS4Auth +import copy +from neuralqa.retriever import Retriever, ElasticSearchRetriever +from neuralqa.utils import parse_field_content +from elasticsearch import Elasticsearch, ConnectionError, NotFoundError, RequestsHttpConnection +import logging + +import traceback + +logger = logging.getLogger(__name__) +region = 'us-east-2' +service = 'es' + + +class AWSElasticSearchRetriever(ElasticSearchRetriever): + def __init__(self, host, index_type="elasticsearch", port=443, **kwargs): + Retriever.__init__(self, index_type) + + self.body_field = "" + self.search_fields = [] + self.return_fields = [] + self.remove_body_field = True + self.host = host + self.port = port + allowed_keys = list(self.__dict__.keys()) + self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys) + assert self.body_field in self.return_fields + assert any(self.body_field in f for f in self.search_fields) + + credentials = boto3.Session().get_credentials() + awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, + session_token=credentials.token) + self.es = Elasticsearch( + hosts=[{"host": self.host, "port": self.port}], + http_auth=awsauth, + use_ssl = True, + verify_certs = True, + connection_class = RequestsHttpConnection, + ) + self.isAvailable = self.es.ping() + + rejected_keys = set(kwargs.keys()) - set(allowed_keys) + + if rejected_keys: + raise ValueError( + "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys)) + + def run_query(self, index_name, search_query, max_documents=5, fragment_size=100, relsnip=True, num_fragments=5, + highlight_tags=True): + + tags = {"pre_tags": [""], "post_tags": [ + ""]} if not highlight_tags else {} + highlight_params = { + "fragment_size": fragment_size, + "fields": { + self.body_field: tags + }, + "number_of_fragments": num_fragments + } + + search_query = { + "_source": self.return_fields, + "query": { + "multi_match": { + "query": search_query, + "fields": self.search_fields + } + }, + "size": max_documents + } + + status = True + results = {} + + if (relsnip): + # search_query["_source"] = {"includes": [""]} + search_query["highlight"] = highlight_params + # else: + # search_query["_source"] = {"includes": [self.body_field]} + + try: + query_result = self.es.search( + index=index_name, body=search_query) + + # RelSnip: for each document, we concatenate all + # fragments in each document and return as the document. + highlights = [" ".join(hit["highlight"][self.body_field]) + for hit in query_result["hits"]["hits"] if "highlight" in hit] + docs = [parse_field_content(self.body_field, hit["_source"]) + for hit in query_result["hits"]["hits"] if "_source" in hit] + source = copy.deepcopy(query_result) + if self.remove_body_field: + for hit in source["hits"]["hits"]: + if "_source" in hit: + del hit['_source'][self.body_field] + took = query_result["took"] + results = {"took": took, "highlights": highlights, "docs": docs, "source": source} + + except (ConnectionRefusedError, NotFoundError, Exception) as e: + status = False + results["errormsg"] = str(e) + + results["status"] = status + return results diff --git a/neuralqa/retriever/retrieverpool.py b/neuralqa/retriever/retrieverpool.py index 06ba9a3..ac88781 100644 --- a/neuralqa/retriever/retrieverpool.py +++ b/neuralqa/retriever/retrieverpool.py @@ -1,5 +1,5 @@ -from neuralqa.retriever import ElasticSearchRetriever +from neuralqa.retriever import ElasticSearchRetriever, AWSElasticSearchRetriever import logging logger = logging.getLogger(__name__) @@ -17,6 +17,9 @@ def __init__(self, retrievers): if (retriever["type"] == "elasticsearch"): self.retriever_pool[retriever["value"]] = ElasticSearchRetriever( **retriever["connection"]) + if (retriever["type"] == "awselasticsearch"): + self.retriever_pool[retriever["value"]] = AWSElasticSearchRetriever( + **retriever["connection"]) if (retriever["type"] == "solr"): logger.info("We do not yet support Solr retrievers") self.selected_retriever = retrievers["selected"] diff --git a/neuralqa/server/routehandlers.py b/neuralqa/server/routehandlers.py index 755716d..dbed186 100644 --- a/neuralqa/server/routehandlers.py +++ b/neuralqa/server/routehandlers.py @@ -39,6 +39,7 @@ async def get_answers(params: Answer): self.reader_pool.selected_model = params.reader self.retriever_pool.selected_retriever = params.retriever + source = None # print(params.query + " ".join(params.expansionterms)) # answer question based on provided context if (params.retriever == "none" or self.retriever_pool.selected_retriever == None): @@ -69,12 +70,14 @@ async def get_answers(params: Answer): for answer in answers: answer["index"] = i answer_holder.append(answer) + source = query_results['source'] # sort answers by probability answer_holder = sorted( answer_holder, key=lambda k: k['probability'], reverse=True) elapsed_time = time.time() - start_time response = {"answers": answer_holder, + "source": source, "took": elapsed_time} return response diff --git a/neuralqa/server/server_app.py b/neuralqa/server/server_app.py index 38a3422..e925f63 100644 --- a/neuralqa/server/server_app.py +++ b/neuralqa/server/server_app.py @@ -3,7 +3,7 @@ import os -def launch_server(host="127.0.0.1", port=5000, workers=1, reload=False): +def launch_server(host="0.0.0.0", port=5000, workers=1, reload=False): uvicorn.run("neuralqa.server.serve:app", host=host, port=port, workers=workers, log_level="info", reload=reload) diff --git a/requirements.txt b/requirements.txt index dea7850..417f79f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,6 @@ aiofiles fastapi elasticsearch>=7.7.1 pyyaml>=3.13 -spacy \ No newline at end of file +spacy +requests-aws4auth +boto3 \ No newline at end of file From 90abbbe222e6e05d224fe8a05f884521a3d8751b Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Thu, 22 Apr 2021 11:58:14 -0700 Subject: [PATCH 02/19] roche demo week 2 --- neuralqa/config_default.yaml | 37 +++++--------------- neuralqa/retriever/__init__.py | 1 - neuralqa/retriever/elasticsearchretriever.py | 2 +- neuralqa/retriever/retrieverpool.py | 5 +-- neuralqa/server/routehandlers.py | 3 -- neuralqa/server/server_app.py | 2 +- nqa.Dockerfile | 32 +++++++++++++++++ 7 files changed, 44 insertions(+), 38 deletions(-) create mode 100644 nqa.Dockerfile diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml index 012abd1..77e4a41 100644 --- a/neuralqa/config_default.yaml +++ b/neuralqa/config_default.yaml @@ -61,34 +61,15 @@ retriever: - name: None value: "none" type: "none" - - # - name: Case Law - # value: cases - # type: elasticsearch - # connection: - # host: localhost - # port: 9200 - # username: "" - # password: "" - # body_field: "casebody.data.opinions.text" - # - name: Medical - # value: medical - # host: localhost - # port: 9200 - # username: None - # password: None - # type: elasticsearch - # fields: - # body_field: context - # - name: Supreme Court - # value: supremecourt - # host: localhost - # port: 9200 - # username: None - # password: None - # type: elasticsearch - # fields: - # body_field: casebody + - name: Orpheus + value: orpheus + type: elasticsearch + connection: + host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com + port: 443 + username: "orpheus" + password: "WandaVision2021!" + body_field: "text" readtopn: 0 relsnip: diff --git a/neuralqa/retriever/__init__.py b/neuralqa/retriever/__init__.py index 24f4a54..fa07e6f 100644 --- a/neuralqa/retriever/__init__.py +++ b/neuralqa/retriever/__init__.py @@ -1,5 +1,4 @@ from .retriever import * from .elasticsearchretriever import * -from .awselasticsearchretriever import * from .solrretriever import * from .retrieverpool import * diff --git a/neuralqa/retriever/elasticsearchretriever.py b/neuralqa/retriever/elasticsearchretriever.py index 35fa95f..bf5c28c 100644 --- a/neuralqa/retriever/elasticsearchretriever.py +++ b/neuralqa/retriever/elasticsearchretriever.py @@ -25,7 +25,7 @@ def __init__(self, index_type="elasticsearch", host="localhost", port=9200, user # [{'host': self.host, 'port': self.port, # "username": self.username, "password": self.password}]) self.es = Elasticsearch(hosts=[{"host": self.host, "port": self.port}], - http_auth=(self.username, self.password)) + http_auth=(self.username, self.password), scheme='https') self.isAvailable = self.es.ping() rejected_keys = set(kwargs.keys()) - set(allowed_keys) diff --git a/neuralqa/retriever/retrieverpool.py b/neuralqa/retriever/retrieverpool.py index ac88781..06ba9a3 100644 --- a/neuralqa/retriever/retrieverpool.py +++ b/neuralqa/retriever/retrieverpool.py @@ -1,5 +1,5 @@ -from neuralqa.retriever import ElasticSearchRetriever, AWSElasticSearchRetriever +from neuralqa.retriever import ElasticSearchRetriever import logging logger = logging.getLogger(__name__) @@ -17,9 +17,6 @@ def __init__(self, retrievers): if (retriever["type"] == "elasticsearch"): self.retriever_pool[retriever["value"]] = ElasticSearchRetriever( **retriever["connection"]) - if (retriever["type"] == "awselasticsearch"): - self.retriever_pool[retriever["value"]] = AWSElasticSearchRetriever( - **retriever["connection"]) if (retriever["type"] == "solr"): logger.info("We do not yet support Solr retrievers") self.selected_retriever = retrievers["selected"] diff --git a/neuralqa/server/routehandlers.py b/neuralqa/server/routehandlers.py index dbed186..755716d 100644 --- a/neuralqa/server/routehandlers.py +++ b/neuralqa/server/routehandlers.py @@ -39,7 +39,6 @@ async def get_answers(params: Answer): self.reader_pool.selected_model = params.reader self.retriever_pool.selected_retriever = params.retriever - source = None # print(params.query + " ".join(params.expansionterms)) # answer question based on provided context if (params.retriever == "none" or self.retriever_pool.selected_retriever == None): @@ -70,14 +69,12 @@ async def get_answers(params: Answer): for answer in answers: answer["index"] = i answer_holder.append(answer) - source = query_results['source'] # sort answers by probability answer_holder = sorted( answer_holder, key=lambda k: k['probability'], reverse=True) elapsed_time = time.time() - start_time response = {"answers": answer_holder, - "source": source, "took": elapsed_time} return response diff --git a/neuralqa/server/server_app.py b/neuralqa/server/server_app.py index e925f63..38a3422 100644 --- a/neuralqa/server/server_app.py +++ b/neuralqa/server/server_app.py @@ -3,7 +3,7 @@ import os -def launch_server(host="0.0.0.0", port=5000, workers=1, reload=False): +def launch_server(host="127.0.0.1", port=5000, workers=1, reload=False): uvicorn.run("neuralqa.server.serve:app", host=host, port=port, workers=workers, log_level="info", reload=reload) diff --git a/nqa.Dockerfile b/nqa.Dockerfile new file mode 100644 index 0000000..911371f --- /dev/null +++ b/nqa.Dockerfile @@ -0,0 +1,32 @@ +FROM continuumio/miniconda3 + +RUN conda install -c anaconda python=3.7 +RUN conda install pip +RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\ + conda install -c anaconda tensorflow==2.3.0 &&\ + python -m pip install transformers==3.5.1 &&\ + conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\ + python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1 +RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\ + python -m pip install gremlinpython requests_aws4auth + +ADD Dockerfile /root/neuralqa/ +ADD LICENSE /root/neuralqa/ +ADD README.md /root/neuralqa/ +ADD config.yaml /root/neuralqa/ +ADD docker-compose.yml /root/neuralqa/ +ADD docs/ /root/neuralqa/docs +ADD neuralqa/ /root/neuralqa/neuralqa +ADD notes.md /root/neuralqa/ +ADD nqa.Dockerfile /root/neuralqa/ +ADD requirements.txt /root/neuralqa/ +ADD setup.cfg /root/neuralqa/ +ADD setup.py /root/neuralqa/ +ADD tests/ /root/neuralqa/tests +WORKDIR /root/neuralqa +RUN ls && python setup.py install + +COPY neuralqa/config_default.yaml /root/config_default.yaml +ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml + +CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"] From 4924e28555c0518bfcbb462019c8be4a612523fc Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Fri, 23 Apr 2021 00:40:40 -0700 Subject: [PATCH 03/19] modifying Dockerfiles --- .gitignore | 5 +++++ Dockerfile | 46 +++++++++++++++++++++++++++++----------------- Dockerfile.orig | 20 ++++++++++++++++++++ nqa.Dockerfile | 32 -------------------------------- 4 files changed, 54 insertions(+), 49 deletions(-) create mode 100644 Dockerfile.orig delete mode 100644 nqa.Dockerfile diff --git a/.gitignore b/.gitignore index 34708e6..dd13c3b 100644 --- a/.gitignore +++ b/.gitignore @@ -135,3 +135,8 @@ dmypy.json # Pyre type checker .pyre/ + +# Elastic Beanstalk Files +.elasticbeanstalk/* +!.elasticbeanstalk/*.cfg.yml +!.elasticbeanstalk/*.global.yml diff --git a/Dockerfile b/Dockerfile index a8ad302..1595f98 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,32 @@ -FROM ubuntu:20.04 +FROM continuumio/miniconda3 -COPY . . +RUN conda install -c anaconda python=3.7 +RUN conda install pip +RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\ + conda install -c anaconda tensorflow==2.3.0 &&\ + python -m pip install transformers==3.5.1 &&\ + conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\ + python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1 +RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\ + python -m pip install gremlinpython requests_aws4auth -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install python3 && \ - apt-get -y install python3-pip && \ - pip3 install neuralqa && \ - apt-get -y install wget && \ - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb && \ - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb.sha512 && \ - shasum -a 512 -c elasticsearch-7.8.0-amd64.deb.sha512 && \ - dpkg -i elasticsearch-7.8.0-amd64.deb && \ - service elasticsearch start && \ - sleep 30 && \ - -EXPOSE 80 +ADD Dockerfile /root/neuralqa/ +ADD LICENSE /root/neuralqa/ +ADD README.md /root/neuralqa/ +#ADD config.yaml /root/neuralqa/ +ADD docker-compose.yml /root/neuralqa/ +ADD docs/ /root/neuralqa/docs +ADD neuralqa/ /root/neuralqa/neuralqa +ADD notes.md /root/neuralqa/ +ADD Dockerfile /root/neuralqa/ +ADD requirements.txt /root/neuralqa/ +ADD setup.cfg /root/neuralqa/ +ADD setup.py /root/neuralqa/ +ADD tests/ /root/neuralqa/tests +WORKDIR /root/neuralqa +RUN ls && python setup.py install -CMD ["neuralqa", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file +COPY neuralqa/config_default.yaml /root/config_default.yaml +ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml + +CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"] diff --git a/Dockerfile.orig b/Dockerfile.orig new file mode 100644 index 0000000..a8ad302 --- /dev/null +++ b/Dockerfile.orig @@ -0,0 +1,20 @@ +FROM ubuntu:20.04 + +COPY . . + +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get -y install python3 && \ + apt-get -y install python3-pip && \ + pip3 install neuralqa && \ + apt-get -y install wget && \ + wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb && \ + wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb.sha512 && \ + shasum -a 512 -c elasticsearch-7.8.0-amd64.deb.sha512 && \ + dpkg -i elasticsearch-7.8.0-amd64.deb && \ + service elasticsearch start && \ + sleep 30 && \ + +EXPOSE 80 + +CMD ["neuralqa", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file diff --git a/nqa.Dockerfile b/nqa.Dockerfile deleted file mode 100644 index 911371f..0000000 --- a/nqa.Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -FROM continuumio/miniconda3 - -RUN conda install -c anaconda python=3.7 -RUN conda install pip -RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\ - conda install -c anaconda tensorflow==2.3.0 &&\ - python -m pip install transformers==3.5.1 &&\ - conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\ - python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1 -RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\ - python -m pip install gremlinpython requests_aws4auth - -ADD Dockerfile /root/neuralqa/ -ADD LICENSE /root/neuralqa/ -ADD README.md /root/neuralqa/ -ADD config.yaml /root/neuralqa/ -ADD docker-compose.yml /root/neuralqa/ -ADD docs/ /root/neuralqa/docs -ADD neuralqa/ /root/neuralqa/neuralqa -ADD notes.md /root/neuralqa/ -ADD nqa.Dockerfile /root/neuralqa/ -ADD requirements.txt /root/neuralqa/ -ADD setup.cfg /root/neuralqa/ -ADD setup.py /root/neuralqa/ -ADD tests/ /root/neuralqa/tests -WORKDIR /root/neuralqa -RUN ls && python setup.py install - -COPY neuralqa/config_default.yaml /root/config_default.yaml -ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml - -CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"] From e977b5de3b98b8ffab380c94a25f2f3ad52ed3e6 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Fri, 23 Apr 2021 01:12:32 -0700 Subject: [PATCH 04/19] changing port to 80 --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1595f98..a92fb38 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,4 +29,6 @@ RUN ls && python setup.py install COPY neuralqa/config_default.yaml /root/config_default.yaml ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml -CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"] +EXPOSE 80 + +CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"] From 95330eabab77bd85e284d66b337f956b42e7cc45 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Fri, 23 Apr 2021 02:14:06 -0700 Subject: [PATCH 05/19] added ports section --- docker-compose.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index f6aaa68..fb0771c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,8 @@ version: "3" services: - neuralqa_docker: + neuralqa: build: . + ports: + - "80:80" expose: - 80 From a1f8aacfab3228e9f9ce3b109e9583518742c7aa Mon Sep 17 00:00:00 2001 From: pratacosmin Date: Thu, 23 Sep 2021 19:09:44 +0300 Subject: [PATCH 06/19] change retriever and reader config --- Dockerfile | 5 ++++- neuralqa/config_default.yaml | 15 ++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index a92fb38..a314a84 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,11 +5,14 @@ RUN conda install pip RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\ conda install -c anaconda tensorflow==2.3.0 &&\ python -m pip install transformers==3.5.1 &&\ - conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\ + conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\ python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1 RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\ python -m pip install gremlinpython requests_aws4auth +RUN python -m pip install uvicorn[standard] websockets +# RUN python -m pip install websockets + ADD Dockerfile /root/neuralqa/ ADD LICENSE /root/neuralqa/ ADD README.md /root/neuralqa/ diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml index 77e4a41..1a8a2c6 100644 --- a/neuralqa/config_default.yaml +++ b/neuralqa/config_default.yaml @@ -61,15 +61,15 @@ retriever: - name: None value: "none" type: "none" - - name: Orpheus - value: orpheus + - name: Abstracts + value: pubmed_abstracts type: elasticsearch connection: host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com port: 443 username: "orpheus" password: "WandaVision2021!" - body_field: "text" + body_field: "abstract" readtopn: 0 relsnip: @@ -87,13 +87,10 @@ server: # webserver host and port defaults reader: title: Reader - selected: twmkn9/distilbert-base-uncased-squad2 + selected: ktrapeznikov/biobert_v1.1_pubmed_squad_v2 options: - - name: DistilBERT SQUAD2 - value: twmkn9/distilbert-base-uncased-squad2 - type: distilbert - - name: BERT SQUAD2 - value: deepset/bert-base-cased-squad2 + - name: BioBERT Pubmed SQUAD2 + value: ktrapeznikov/biobert_v1.1_pubmed_squad_v2 type: bert # - name: Medical BERT SQUAD2 # value: /Users/victordibia/Downloads/meddistilbert From 7606074c3156f166b48358c9eb2dcdae916eefd8 Mon Sep 17 00:00:00 2001 From: pratacosmin Date: Wed, 29 Sep 2021 15:22:23 +0300 Subject: [PATCH 07/19] enhance retriever --- neuralqa/config_default.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml index 1a8a2c6..e76c6ce 100644 --- a/neuralqa/config_default.yaml +++ b/neuralqa/config_default.yaml @@ -70,6 +70,15 @@ retriever: username: "orpheus" password: "WandaVision2021!" body_field: "abstract" + - name: Orpheus + value: orpheus + type: elasticsearch + connection: + host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com + port: 443 + username: "orpheus" + password: "WandaVision2021!" + body_field: "text" readtopn: 0 relsnip: From a033d2f0b18b7b9bde854fcf67fdd8f197f1a192 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sat, 26 Feb 2022 12:52:45 -0800 Subject: [PATCH 08/19] changing python to 3.6 and tensorflow version --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a314a84..5e7fe6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,9 @@ FROM continuumio/miniconda3 -RUN conda install -c anaconda python=3.7 +RUN conda install -c anaconda python=3.6 RUN conda install pip RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\ - conda install -c anaconda tensorflow==2.3.0 &&\ + conda install -c anaconda tensorflow &&\ python -m pip install transformers==3.5.1 &&\ conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\ python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1 From 0fc9f669524554da9e9a8c09f4e7db2b300508f9 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sat, 26 Feb 2022 20:18:10 -0800 Subject: [PATCH 09/19] changes to move to aws pytorch deep learning container base image --- Dockerfile | 21 ++++++++++----------- Dockerfile.old | 37 +++++++++++++++++++++++++++++++++++++ Dockerfile.orig | 20 -------------------- setup.py | 1 + 4 files changed, 48 insertions(+), 31 deletions(-) create mode 100644 Dockerfile.old delete mode 100644 Dockerfile.orig diff --git a/Dockerfile b/Dockerfile index 5e7fe6a..170eb6f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,16 @@ -FROM continuumio/miniconda3 +FROM 763104351884.dkr.ecr.us-east-2.amazonaws.com/pytorch-inference:1.5.1-gpu-py36-cu101-ubuntu16.04 -RUN conda install -c anaconda python=3.6 -RUN conda install pip -RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\ - conda install -c anaconda tensorflow &&\ - python -m pip install transformers==3.5.1 &&\ - conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\ - python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1 -RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\ - python -m pip install gremlinpython requests_aws4auth +RUN conda install -c anaconda tensorflow +RUN python -m pip install transformers==3.5.1 +RUN conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 +RUN conda install -c conda-forge flask spacy plac==0.9.6 +RUN python -m pip install numpy==1.19.2 scipy==1.4.1 Keras-Preprocessing==1.1.1 +RUN conda install -c conda-forge boto3 requests pandas scikit-learn +RUN python -m pip install gremlinpython requests_aws4auth RUN python -m pip install uvicorn[standard] websockets -# RUN python -m pip install websockets +RUN python -m pip install thinc[tensorflow,torch] --pre +RUN python -m pip install --upgrade tensorflow ADD Dockerfile /root/neuralqa/ ADD LICENSE /root/neuralqa/ diff --git a/Dockerfile.old b/Dockerfile.old new file mode 100644 index 0000000..5e7fe6a --- /dev/null +++ b/Dockerfile.old @@ -0,0 +1,37 @@ +FROM continuumio/miniconda3 + +RUN conda install -c anaconda python=3.6 +RUN conda install pip +RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\ + conda install -c anaconda tensorflow &&\ + python -m pip install transformers==3.5.1 &&\ + conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\ + python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1 +RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\ + python -m pip install gremlinpython requests_aws4auth + +RUN python -m pip install uvicorn[standard] websockets +# RUN python -m pip install websockets + +ADD Dockerfile /root/neuralqa/ +ADD LICENSE /root/neuralqa/ +ADD README.md /root/neuralqa/ +#ADD config.yaml /root/neuralqa/ +ADD docker-compose.yml /root/neuralqa/ +ADD docs/ /root/neuralqa/docs +ADD neuralqa/ /root/neuralqa/neuralqa +ADD notes.md /root/neuralqa/ +ADD Dockerfile /root/neuralqa/ +ADD requirements.txt /root/neuralqa/ +ADD setup.cfg /root/neuralqa/ +ADD setup.py /root/neuralqa/ +ADD tests/ /root/neuralqa/tests +WORKDIR /root/neuralqa +RUN ls && python setup.py install + +COPY neuralqa/config_default.yaml /root/config_default.yaml +ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml + +EXPOSE 80 + +CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"] diff --git a/Dockerfile.orig b/Dockerfile.orig deleted file mode 100644 index a8ad302..0000000 --- a/Dockerfile.orig +++ /dev/null @@ -1,20 +0,0 @@ -FROM ubuntu:20.04 - -COPY . . - -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install python3 && \ - apt-get -y install python3-pip && \ - pip3 install neuralqa && \ - apt-get -y install wget && \ - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb && \ - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb.sha512 && \ - shasum -a 512 -c elasticsearch-7.8.0-amd64.deb.sha512 && \ - dpkg -i elasticsearch-7.8.0-amd64.deb && \ - service elasticsearch start && \ - sleep 30 && \ - -EXPOSE 80 - -CMD ["neuralqa", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file diff --git a/setup.py b/setup.py index 4712fe4..e53c350 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ def package_files(directory): 'aiofiles', 'uvicorn', 'numpy', + 'plac==0.9.6', 'tensorflow>=2.1.0', 'torch', 'torchvision', From b3fb4b653645b449f8578cf29d5bbb6c1377f85a Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 27 Feb 2022 09:28:53 -0800 Subject: [PATCH 10/19] fixing cuda environment variables --- .dockerignore | 4 ++++ Dockerfile | 3 +++ 2 files changed, 7 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b9bd98b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +# Elastic Beanstalk Files +.elasticbeanstalk/* +.git +.gitignore \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 170eb6f..a76cb31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,6 +30,9 @@ RUN ls && python setup.py install COPY neuralqa/config_default.yaml /root/config_default.yaml ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64 +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility EXPOSE 80 From 783316c612272068aad26f1d47c654d2db60a127 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 27 Feb 2022 10:16:08 -0800 Subject: [PATCH 11/19] fixing cuda issues --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a76cb31..54c287d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,8 @@ RUN conda install -c conda-forge boto3 requests pandas scikit-learn RUN python -m pip install gremlinpython requests_aws4auth RUN python -m pip install uvicorn[standard] websockets RUN python -m pip install thinc[tensorflow,torch] --pre -RUN python -m pip install --upgrade tensorflow +RUN conda install -c conda-forge cudatoolkit +RUN python -m pip install tensorflow==2.3.0 ADD Dockerfile /root/neuralqa/ ADD LICENSE /root/neuralqa/ From 55a5266a113195b80251dd40d3a176c1a3556221 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 27 Feb 2022 10:39:06 -0800 Subject: [PATCH 12/19] fixing LD_LIBRARY_PATH to include cuda/compat folder --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 54c287d..dfd7724 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,7 +31,7 @@ RUN ls && python setup.py install COPY neuralqa/config_default.yaml /root/config_default.yaml ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64 +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat:/usr/local/cuda/lib:/usr/local/cuda/lib64 ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility From a6ffb87aaf6887363ca004bb8b4a086386b3effe Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 27 Feb 2022 10:51:55 -0800 Subject: [PATCH 13/19] adding fix for CUDA_ERROR_NO_DEVICE --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index dfd7724..4186686 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,6 +34,8 @@ ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat:/usr/local/cuda/lib:/usr/local/cuda/lib64 ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +ENV CUDA_VISIBLE_DEVICES 0,1 + EXPOSE 80 From 99e084bd072797fb266ca6af12f3cc0f2d927f2f Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 27 Feb 2022 11:27:13 -0800 Subject: [PATCH 14/19] adding nvidia drivers --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 4186686..798f59b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,7 @@ RUN python -m pip install uvicorn[standard] websockets RUN python -m pip install thinc[tensorflow,torch] --pre RUN conda install -c conda-forge cudatoolkit RUN python -m pip install tensorflow==2.3.0 +RUN apt-get install -y nvidia-headless-495 nvidia-modprobe ADD Dockerfile /root/neuralqa/ ADD LICENSE /root/neuralqa/ From dfb088b507643cb9a6f02b7f70876ee8b8db9ba8 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 20 Mar 2022 15:22:25 -0700 Subject: [PATCH 15/19] adding run command to include gpus --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index fb0771c..826db13 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,6 +2,7 @@ version: "3" services: neuralqa: build: . + command: --gpus all ports: - "80:80" expose: From 71f83c5dd695752a35ba9ef341cdc9058ed62e72 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 20 Mar 2022 15:56:59 -0700 Subject: [PATCH 16/19] reverting changes to docker compose file --- docker-compose.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 826db13..fb0771c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,6 @@ version: "3" services: neuralqa: build: . - command: --gpus all ports: - "80:80" expose: From 685a1d9909b1c7c5af55c8d7a43a9466203fbe09 Mon Sep 17 00:00:00 2001 From: Vishnu Vettrivel Date: Sun, 20 Mar 2022 16:22:40 -0700 Subject: [PATCH 17/19] adding GPU access with Compose --- docker-compose.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index fb0771c..9179842 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,6 +2,11 @@ version: "3" services: neuralqa: build: . + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] ports: - "80:80" expose: From 9c80f24fc566be8871f3c2da62efe4f4cbb4543a Mon Sep 17 00:00:00 2001 From: Bogdan-Flavius Budihala Date: Wed, 14 Sep 2022 11:37:48 +0300 Subject: [PATCH 18/19] WAP-170 | Migrated to AWS OpenSearch cluster --- .gitignore | 1 + neuralqa/config_default.yaml | 8 ++------ neuralqa/retriever/__init__.py | 1 + neuralqa/retriever/awselasticsearchretriever.py | 4 ++-- neuralqa/retriever/retrieverpool.py | 4 ++-- neuralqa/server/serve.py | 2 +- requirements.txt | 2 +- tests/retriever/test_retriever.py | 4 ++-- 8 files changed, 12 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index dd13c3b..d16a5a2 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ dmypy.json .elasticbeanstalk/* !.elasticbeanstalk/*.cfg.yml !.elasticbeanstalk/*.global.yml +.idea/ diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml index e76c6ce..369207a 100644 --- a/neuralqa/config_default.yaml +++ b/neuralqa/config_default.yaml @@ -65,19 +65,15 @@ retriever: value: pubmed_abstracts type: elasticsearch connection: - host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com + host: vpc-neptune-es-opxf6xkhk6ra7sfhybnkvxydtu.us-east-2.es.amazonaws.com port: 443 - username: "orpheus" - password: "WandaVision2021!" body_field: "abstract" - name: Orpheus value: orpheus type: elasticsearch connection: - host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com + host: vpc-neptune-es-opxf6xkhk6ra7sfhybnkvxydtu.us-east-2.es.amazonaws.com port: 443 - username: "orpheus" - password: "WandaVision2021!" body_field: "text" readtopn: 0 diff --git a/neuralqa/retriever/__init__.py b/neuralqa/retriever/__init__.py index fa07e6f..24f4a54 100644 --- a/neuralqa/retriever/__init__.py +++ b/neuralqa/retriever/__init__.py @@ -1,4 +1,5 @@ from .retriever import * from .elasticsearchretriever import * +from .awselasticsearchretriever import * from .solrretriever import * from .retrieverpool import * diff --git a/neuralqa/retriever/awselasticsearchretriever.py b/neuralqa/retriever/awselasticsearchretriever.py index 552b01e..b5e2cad 100644 --- a/neuralqa/retriever/awselasticsearchretriever.py +++ b/neuralqa/retriever/awselasticsearchretriever.py @@ -25,8 +25,8 @@ def __init__(self, host, index_type="elasticsearch", port=443, **kwargs): self.port = port allowed_keys = list(self.__dict__.keys()) self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys) - assert self.body_field in self.return_fields - assert any(self.body_field in f for f in self.search_fields) + # assert self.body_field in self.return_fields + # assert any(self.body_field in f for f in self.search_fields) credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, diff --git a/neuralqa/retriever/retrieverpool.py b/neuralqa/retriever/retrieverpool.py index 06ba9a3..8985e8f 100644 --- a/neuralqa/retriever/retrieverpool.py +++ b/neuralqa/retriever/retrieverpool.py @@ -1,5 +1,5 @@ -from neuralqa.retriever import ElasticSearchRetriever +from neuralqa.retriever import AWSElasticSearchRetriever import logging logger = logging.getLogger(__name__) @@ -15,7 +15,7 @@ def __init__(self, retrievers): "Duplicate retriever value : {} ".format(retriever["value"])) if (retriever["type"] == "elasticsearch"): - self.retriever_pool[retriever["value"]] = ElasticSearchRetriever( + self.retriever_pool[retriever["value"]] = AWSElasticSearchRetriever( **retriever["connection"]) if (retriever["type"] == "solr"): logger.info("We do not yet support Solr retrievers") diff --git a/neuralqa/server/serve.py b/neuralqa/server/serve.py index 1d57ac3..1627bff 100644 --- a/neuralqa/server/serve.py +++ b/neuralqa/server/serve.py @@ -2,7 +2,7 @@ from neuralqa.reader import BERTReader, ReaderPool from neuralqa.server.routehandlers import Handler -from neuralqa.retriever import ElasticSearchRetriever, RetrieverPool +from neuralqa.retriever import AWSElasticSearchRetriever, RetrieverPool from neuralqa.utils import ConfigParser from neuralqa.expander import ExpanderPool diff --git a/requirements.txt b/requirements.txt index 417f79f..ca4b737 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ transformers>=2.9.1 uvicorn aiofiles fastapi -elasticsearch>=7.7.1 +elasticsearch<=7.13.4 pyyaml>=3.13 spacy requests-aws4auth diff --git a/tests/retriever/test_retriever.py b/tests/retriever/test_retriever.py index 5dc9f15..b854031 100644 --- a/tests/retriever/test_retriever.py +++ b/tests/retriever/test_retriever.py @@ -1,11 +1,11 @@ -from neuralqa.retriever import ElasticSearchRetriever +from neuralqa.retriever import AWSElasticSearchRetriever from neuralqa.utils import ConfigParser def test_elasticserch_retriever(): app_config = ConfigParser("config.yaml") rkwargs = app_config.config["retriever"]["options"][1]["connection"] - retriever = ElasticSearchRetriever(**rkwargs) + retriever = AWSElasticSearchRetriever(**rkwargs) results = retriever.run_query( "cases", "what is the punishment for arson crime") assert results != None From 44f673d807126ee509a71a8d9beb23161e2de260 Mon Sep 17 00:00:00 2001 From: Bogdan-Flavius Budihala Date: Wed, 12 Oct 2022 09:47:04 +0200 Subject: [PATCH 19/19] WAP-170 | Fixed expiring credentials --- Dockerfile | 10 +++--- docker-compose.yml | 7 +--- .../retriever/awselasticsearchretriever.py | 33 +++++++++++-------- neuralqa/server/routehandlers.py | 3 +- neuralqa/server/routemodels.py | 2 +- neuralqa/utils/decorators.py | 14 ++++++++ 6 files changed, 42 insertions(+), 27 deletions(-) create mode 100644 neuralqa/utils/decorators.py diff --git a/Dockerfile b/Dockerfile index 798f59b..2fdbe43 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,7 @@ FROM 763104351884.dkr.ecr.us-east-2.amazonaws.com/pytorch-inference:1.5.1-gpu-py36-cu101-ubuntu16.04 - RUN conda install -c anaconda tensorflow -RUN python -m pip install transformers==3.5.1 +RUN python -m pip install transformers==3.5.1 RUN conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 RUN conda install -c conda-forge flask spacy plac==0.9.6 RUN python -m pip install numpy==1.19.2 scipy==1.4.1 Keras-Preprocessing==1.1.1 @@ -12,7 +11,10 @@ RUN python -m pip install uvicorn[standard] websockets RUN python -m pip install thinc[tensorflow,torch] --pre RUN conda install -c conda-forge cudatoolkit RUN python -m pip install tensorflow==2.3.0 -RUN apt-get install -y nvidia-headless-495 nvidia-modprobe +#RUN apt-get install -y nvidia-headless-495 nvidia-modprobe +RUN apt-get update -y --allow-unauthenticated +RUN apt-get install -y --allow-unauthenticated nvidia-headless-495 nvidia-modprobe + ADD Dockerfile /root/neuralqa/ ADD LICENSE /root/neuralqa/ @@ -40,4 +42,4 @@ ENV CUDA_VISIBLE_DEVICES 0,1 EXPOSE 80 -CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"] +CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 9179842..ae79586 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,12 +2,7 @@ version: "3" services: neuralqa: build: . - deploy: - resources: - reservations: - devices: - - capabilities: [gpu] ports: - "80:80" expose: - - 80 + - 80 \ No newline at end of file diff --git a/neuralqa/retriever/awselasticsearchretriever.py b/neuralqa/retriever/awselasticsearchretriever.py index b5e2cad..75bfd60 100644 --- a/neuralqa/retriever/awselasticsearchretriever.py +++ b/neuralqa/retriever/awselasticsearchretriever.py @@ -1,4 +1,5 @@ import boto3 +from elasticsearch.exceptions import AuthorizationException from requests_aws4auth import AWS4Auth import copy from neuralqa.retriever import Retriever, ElasticSearchRetriever @@ -8,6 +9,8 @@ import traceback +from neuralqa.utils.decorators import retry_on_exception + logger = logging.getLogger(__name__) region = 'us-east-2' service = 'es' @@ -27,25 +30,26 @@ def __init__(self, host, index_type="elasticsearch", port=443, **kwargs): self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys) # assert self.body_field in self.return_fields # assert any(self.body_field in f for f in self.search_fields) + self.construct_es_instance() + rejected_keys = set(kwargs.keys()) - set(allowed_keys) + if rejected_keys: + raise ValueError( + "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys)) + + def construct_es_instance(self): credentials = boto3.Session().get_credentials() - awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, - session_token=credentials.token) + awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token) self.es = Elasticsearch( hosts=[{"host": self.host, "port": self.port}], http_auth=awsauth, - use_ssl = True, - verify_certs = True, - connection_class = RequestsHttpConnection, + use_ssl=True, + verify_certs=True, + connection_class=RequestsHttpConnection, ) self.isAvailable = self.es.ping() - rejected_keys = set(kwargs.keys()) - set(allowed_keys) - - if rejected_keys: - raise ValueError( - "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys)) - + @retry_on_exception(exception=AuthorizationException) def run_query(self, index_name, search_query, max_documents=5, fragment_size=100, relsnip=True, num_fragments=5, highlight_tags=True): @@ -80,8 +84,7 @@ def run_query(self, index_name, search_query, max_documents=5, fragment_size=100 # search_query["_source"] = {"includes": [self.body_field]} try: - query_result = self.es.search( - index=index_name, body=search_query) + query_result = self.es.search(index=index_name, body=search_query) # RelSnip: for each document, we concatenate all # fragments in each document and return as the document. @@ -96,7 +99,9 @@ def run_query(self, index_name, search_query, max_documents=5, fragment_size=100 del hit['_source'][self.body_field] took = query_result["took"] results = {"took": took, "highlights": highlights, "docs": docs, "source": source} - + except AuthorizationException: + self.construct_es_instance() + raise except (ConnectionRefusedError, NotFoundError, Exception) as e: status = False results["errormsg"] = str(e) diff --git a/neuralqa/server/routehandlers.py b/neuralqa/server/routehandlers.py index 755716d..9b21960 100644 --- a/neuralqa/server/routehandlers.py +++ b/neuralqa/server/routehandlers.py @@ -51,8 +51,7 @@ async def get_answers(params: Answer): else: # add query expansion terms to query if any - retriever_query = params.query + \ - " ".join(params.expansionterms) + retriever_query = params.query + " ".join(params.expansionterms) num_fragments = 5 query_results = self.retriever_pool.retriever.run_query(params.retriever, retriever_query, max_documents=params.max_documents, fragment_size=params.fragment_size, diff --git a/neuralqa/server/routemodels.py b/neuralqa/server/routemodels.py index 8ead518..73f178f 100644 --- a/neuralqa/server/routemodels.py +++ b/neuralqa/server/routemodels.py @@ -23,7 +23,7 @@ class Answer(BaseModel): reader: str = None relsnip: bool = True expander: Optional[str] = None - expansionterms: Optional[list] = None + expansionterms: Optional[list] = [] retriever: Optional[str] = "manual" diff --git a/neuralqa/utils/decorators.py b/neuralqa/utils/decorators.py new file mode 100644 index 0000000..f9ba522 --- /dev/null +++ b/neuralqa/utils/decorators.py @@ -0,0 +1,14 @@ +import functools + + +def retry_on_exception(exception): + def actual_decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exception: + return func(*args, **kwargs) + return wrapper + + return actual_decorator