From bcdc982d7e040af5616b347c89a50404b616f025 Mon Sep 17 00:00:00 2001
From: Alex Thomas <alexander.n.thomas@gmail.com>
Date: Mon, 22 Feb 2021 14:55:16 -0800
Subject: [PATCH 01/19] adapted for use with orpheus

---
 neuralqa/retriever/__init__.py                |   1 +
 .../retriever/awselasticsearchretriever.py    | 105 ++++++++++++++++++
 neuralqa/retriever/retrieverpool.py           |   5 +-
 neuralqa/server/routehandlers.py              |   3 +
 neuralqa/server/server_app.py                 |   2 +-
 requirements.txt                              |   4 +-
 6 files changed, 117 insertions(+), 3 deletions(-)
 create mode 100644 neuralqa/retriever/awselasticsearchretriever.py

diff --git a/neuralqa/retriever/__init__.py b/neuralqa/retriever/__init__.py
index fa07e6f..24f4a54 100644
--- a/neuralqa/retriever/__init__.py
+++ b/neuralqa/retriever/__init__.py
@@ -1,4 +1,5 @@
 from .retriever import *
 from .elasticsearchretriever import *
+from .awselasticsearchretriever import *
 from .solrretriever import *
 from .retrieverpool import *
diff --git a/neuralqa/retriever/awselasticsearchretriever.py b/neuralqa/retriever/awselasticsearchretriever.py
new file mode 100644
index 0000000..552b01e
--- /dev/null
+++ b/neuralqa/retriever/awselasticsearchretriever.py
@@ -0,0 +1,105 @@
+import boto3
+from requests_aws4auth import AWS4Auth
+import copy
+from neuralqa.retriever import Retriever, ElasticSearchRetriever
+from neuralqa.utils import parse_field_content
+from elasticsearch import Elasticsearch, ConnectionError, NotFoundError, RequestsHttpConnection
+import logging
+
+import traceback
+
+logger = logging.getLogger(__name__)
+region = 'us-east-2'
+service = 'es'
+
+
+class AWSElasticSearchRetriever(ElasticSearchRetriever):
+    def __init__(self, host, index_type="elasticsearch", port=443, **kwargs):
+        Retriever.__init__(self, index_type)
+
+        self.body_field = ""
+        self.search_fields = []
+        self.return_fields = []
+        self.remove_body_field = True
+        self.host = host
+        self.port = port
+        allowed_keys = list(self.__dict__.keys())
+        self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys)
+        assert self.body_field in self.return_fields
+        assert any(self.body_field in f for f in self.search_fields)
+
+        credentials = boto3.Session().get_credentials()
+        awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service,
+                           session_token=credentials.token)
+        self.es = Elasticsearch(
+            hosts=[{"host": self.host, "port": self.port}],
+            http_auth=awsauth,
+            use_ssl = True,
+            verify_certs = True,
+            connection_class = RequestsHttpConnection,
+        )
+        self.isAvailable = self.es.ping()
+
+        rejected_keys = set(kwargs.keys()) - set(allowed_keys)
+
+        if rejected_keys:
+            raise ValueError(
+                "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys))
+
+    def run_query(self, index_name, search_query, max_documents=5, fragment_size=100, relsnip=True, num_fragments=5,
+                  highlight_tags=True):
+
+        tags = {"pre_tags": [""], "post_tags": [
+            ""]} if not highlight_tags else {}
+        highlight_params = {
+            "fragment_size": fragment_size,
+            "fields": {
+                self.body_field: tags
+            },
+            "number_of_fragments": num_fragments
+        }
+
+        search_query = {
+            "_source": self.return_fields,
+            "query": {
+                "multi_match": {
+                    "query":    search_query,
+                    "fields": self.search_fields
+                }
+            },
+            "size": max_documents
+        }
+
+        status = True
+        results = {}
+
+        if (relsnip):
+            # search_query["_source"] = {"includes": [""]}
+            search_query["highlight"] = highlight_params
+        # else:
+        #     search_query["_source"] = {"includes": [self.body_field]}
+
+        try:
+            query_result = self.es.search(
+                index=index_name, body=search_query)
+
+            # RelSnip: for each document, we concatenate all
+            # fragments in each document and return as the document.
+            highlights = [" ".join(hit["highlight"][self.body_field])
+                          for hit in query_result["hits"]["hits"] if "highlight" in hit]
+            docs = [parse_field_content(self.body_field, hit["_source"])
+                    for hit in query_result["hits"]["hits"] if "_source" in hit]
+            source = copy.deepcopy(query_result)
+            if self.remove_body_field:
+                for hit in source["hits"]["hits"]:
+                    if "_source" in hit:
+                        del hit['_source'][self.body_field]
+            took = query_result["took"]
+            results = {"took": took,  "highlights": highlights, "docs": docs, "source": source}
+
+        except (ConnectionRefusedError, NotFoundError, Exception) as e:
+            status = False
+            results["errormsg"] = str(e)
+
+        results["status"] = status
+        return results
diff --git a/neuralqa/retriever/retrieverpool.py b/neuralqa/retriever/retrieverpool.py
index 06ba9a3..ac88781 100644
--- a/neuralqa/retriever/retrieverpool.py
+++ b/neuralqa/retriever/retrieverpool.py
@@ -1,5 +1,5 @@
 
-from neuralqa.retriever import ElasticSearchRetriever
+from neuralqa.retriever import ElasticSearchRetriever, AWSElasticSearchRetriever
 import logging
 
 logger = logging.getLogger(__name__)
@@ -17,6 +17,9 @@ def __init__(self, retrievers):
             if (retriever["type"] == "elasticsearch"):
                 self.retriever_pool[retriever["value"]] = ElasticSearchRetriever(
                     **retriever["connection"])
+            if (retriever["type"] == "awselasticsearch"):
+                self.retriever_pool[retriever["value"]] = AWSElasticSearchRetriever(
+                    **retriever["connection"])
             if (retriever["type"] == "solr"):
                 logger.info("We do not yet support Solr retrievers")
         self.selected_retriever = retrievers["selected"]
diff --git a/neuralqa/server/routehandlers.py b/neuralqa/server/routehandlers.py
index 755716d..dbed186 100644
--- a/neuralqa/server/routehandlers.py
+++ b/neuralqa/server/routehandlers.py
@@ -39,6 +39,7 @@ async def get_answers(params: Answer):
             self.reader_pool.selected_model = params.reader
             self.retriever_pool.selected_retriever = params.retriever
 
+            source = None
             # print(params.query + " ".join(params.expansionterms))
             # answer question based on provided context
             if (params.retriever == "none" or self.retriever_pool.selected_retriever == None):
@@ -69,12 +70,14 @@ async def get_answers(params: Answer):
                         for answer in answers:
                             answer["index"] = i
                             answer_holder.append(answer)
+                    source = query_results['source']
 
                 # sort answers by probability
                 answer_holder = sorted(
                     answer_holder, key=lambda k: k['probability'], reverse=True)
             elapsed_time = time.time() - start_time
             response = {"answers": answer_holder,
+                        "source": source,
                         "took": elapsed_time}
             return response
 
diff --git a/neuralqa/server/server_app.py b/neuralqa/server/server_app.py
index 38a3422..e925f63 100644
--- a/neuralqa/server/server_app.py
+++ b/neuralqa/server/server_app.py
@@ -3,7 +3,7 @@
 import os
 
 
-def launch_server(host="127.0.0.1", port=5000, workers=1, reload=False):
+def launch_server(host="0.0.0.0", port=5000, workers=1, reload=False):
     uvicorn.run("neuralqa.server.serve:app", host=host, port=port, workers=workers,
                 log_level="info", reload=reload)
 
diff --git a/requirements.txt b/requirements.txt
index dea7850..417f79f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,6 @@ aiofiles
 fastapi
 elasticsearch>=7.7.1
 pyyaml>=3.13 
-spacy
\ No newline at end of file
+spacy
+requests-aws4auth
+boto3
\ No newline at end of file

From 90abbbe222e6e05d224fe8a05f884521a3d8751b Mon Sep 17 00:00:00 2001
From: Alex Thomas <alexander.n.thomas@gmail.com>
Date: Thu, 22 Apr 2021 11:58:14 -0700
Subject: [PATCH 02/19] roche demo week 2

---
 neuralqa/config_default.yaml                 | 37 +++++---------------
 neuralqa/retriever/__init__.py               |  1 -
 neuralqa/retriever/elasticsearchretriever.py |  2 +-
 neuralqa/retriever/retrieverpool.py          |  5 +--
 neuralqa/server/routehandlers.py             |  3 --
 neuralqa/server/server_app.py                |  2 +-
 nqa.Dockerfile                               | 32 +++++++++++++++++
 7 files changed, 44 insertions(+), 38 deletions(-)
 create mode 100644 nqa.Dockerfile

diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml
index 012abd1..77e4a41 100644
--- a/neuralqa/config_default.yaml
+++ b/neuralqa/config_default.yaml
@@ -61,34 +61,15 @@ retriever:
     - name: None
       value: "none"
       type: "none"
-
-    # - name: Case Law
-    #   value: cases
-    #   type: elasticsearch
-    #   connection:
-    #     host: localhost
-    #     port: 9200
-    #     username: ""
-    #     password: ""
-    #     body_field: "casebody.data.opinions.text"
-    # - name: Medical
-    #   value: medical
-    #   host: localhost
-    #   port: 9200
-    #   username: None
-    #   password: None
-    #   type: elasticsearch
-    #   fields:
-    #     body_field: context
-    # - name: Supreme Court
-    #   value: supremecourt
-    #   host: localhost
-    #   port: 9200
-    #   username: None
-    #   password: None
-    #   type: elasticsearch
-    #   fields:
-    #     body_field: casebody
+    - name: Orpheus
+      value: orpheus
+      type: elasticsearch
+      connection:
+        host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com
+        port: 443
+        username: "orpheus"
+        password: "WandaVision2021!"
+        body_field: "text"
   readtopn: 0
 
 relsnip:
diff --git a/neuralqa/retriever/__init__.py b/neuralqa/retriever/__init__.py
index 24f4a54..fa07e6f 100644
--- a/neuralqa/retriever/__init__.py
+++ b/neuralqa/retriever/__init__.py
@@ -1,5 +1,4 @@
 from .retriever import *
 from .elasticsearchretriever import *
-from .awselasticsearchretriever import *
 from .solrretriever import *
 from .retrieverpool import *
diff --git a/neuralqa/retriever/elasticsearchretriever.py b/neuralqa/retriever/elasticsearchretriever.py
index 35fa95f..bf5c28c 100644
--- a/neuralqa/retriever/elasticsearchretriever.py
+++ b/neuralqa/retriever/elasticsearchretriever.py
@@ -25,7 +25,7 @@ def __init__(self, index_type="elasticsearch", host="localhost", port=9200, user
         #     [{'host': self.host, 'port': self.port,
         #       "username": self.username, "password": self.password}])
         self.es = Elasticsearch(hosts=[{"host": self.host, "port": self.port}],
-                                http_auth=(self.username, self.password))
+                                http_auth=(self.username, self.password), scheme='https')
         self.isAvailable = self.es.ping()
 
         rejected_keys = set(kwargs.keys()) - set(allowed_keys)
diff --git a/neuralqa/retriever/retrieverpool.py b/neuralqa/retriever/retrieverpool.py
index ac88781..06ba9a3 100644
--- a/neuralqa/retriever/retrieverpool.py
+++ b/neuralqa/retriever/retrieverpool.py
@@ -1,5 +1,5 @@
 
-from neuralqa.retriever import ElasticSearchRetriever, AWSElasticSearchRetriever
+from neuralqa.retriever import ElasticSearchRetriever
 import logging
 
 logger = logging.getLogger(__name__)
@@ -17,9 +17,6 @@ def __init__(self, retrievers):
             if (retriever["type"] == "elasticsearch"):
                 self.retriever_pool[retriever["value"]] = ElasticSearchRetriever(
                     **retriever["connection"])
-            if (retriever["type"] == "awselasticsearch"):
-                self.retriever_pool[retriever["value"]] = AWSElasticSearchRetriever(
-                    **retriever["connection"])
             if (retriever["type"] == "solr"):
                 logger.info("We do not yet support Solr retrievers")
         self.selected_retriever = retrievers["selected"]
diff --git a/neuralqa/server/routehandlers.py b/neuralqa/server/routehandlers.py
index dbed186..755716d 100644
--- a/neuralqa/server/routehandlers.py
+++ b/neuralqa/server/routehandlers.py
@@ -39,7 +39,6 @@ async def get_answers(params: Answer):
             self.reader_pool.selected_model = params.reader
             self.retriever_pool.selected_retriever = params.retriever
 
-            source = None
             # print(params.query + " ".join(params.expansionterms))
             # answer question based on provided context
             if (params.retriever == "none" or self.retriever_pool.selected_retriever == None):
@@ -70,14 +69,12 @@ async def get_answers(params: Answer):
                         for answer in answers:
                             answer["index"] = i
                             answer_holder.append(answer)
-                    source = query_results['source']
 
                 # sort answers by probability
                 answer_holder = sorted(
                     answer_holder, key=lambda k: k['probability'], reverse=True)
             elapsed_time = time.time() - start_time
             response = {"answers": answer_holder,
-                        "source": source,
                         "took": elapsed_time}
             return response
 
diff --git a/neuralqa/server/server_app.py b/neuralqa/server/server_app.py
index e925f63..38a3422 100644
--- a/neuralqa/server/server_app.py
+++ b/neuralqa/server/server_app.py
@@ -3,7 +3,7 @@
 import os
 
 
-def launch_server(host="0.0.0.0", port=5000, workers=1, reload=False):
+def launch_server(host="127.0.0.1", port=5000, workers=1, reload=False):
     uvicorn.run("neuralqa.server.serve:app", host=host, port=port, workers=workers,
                 log_level="info", reload=reload)
 
diff --git a/nqa.Dockerfile b/nqa.Dockerfile
new file mode 100644
index 0000000..911371f
--- /dev/null
+++ b/nqa.Dockerfile
@@ -0,0 +1,32 @@
+FROM continuumio/miniconda3
+
+RUN conda install -c anaconda python=3.7
+RUN conda install pip
+RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\
+    conda install -c anaconda tensorflow==2.3.0 &&\
+    python -m pip install transformers==3.5.1 &&\
+    conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\
+    python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1
+RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\
+    python -m pip install gremlinpython requests_aws4auth
+
+ADD Dockerfile /root/neuralqa/
+ADD LICENSE /root/neuralqa/
+ADD README.md /root/neuralqa/
+ADD config.yaml /root/neuralqa/
+ADD docker-compose.yml /root/neuralqa/
+ADD docs/ /root/neuralqa/docs
+ADD neuralqa/ /root/neuralqa/neuralqa
+ADD notes.md /root/neuralqa/
+ADD nqa.Dockerfile /root/neuralqa/
+ADD requirements.txt /root/neuralqa/
+ADD setup.cfg /root/neuralqa/
+ADD setup.py /root/neuralqa/
+ADD tests/ /root/neuralqa/tests
+WORKDIR /root/neuralqa
+RUN ls && python setup.py install
+
+COPY neuralqa/config_default.yaml /root/config_default.yaml
+ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
+
+CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"]

From 4924e28555c0518bfcbb462019c8be4a612523fc Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Fri, 23 Apr 2021 00:40:40 -0700
Subject: [PATCH 03/19] modifying Dockerfiles

---
 .gitignore      |  5 +++++
 Dockerfile      | 46 +++++++++++++++++++++++++++++-----------------
 Dockerfile.orig | 20 ++++++++++++++++++++
 nqa.Dockerfile  | 32 --------------------------------
 4 files changed, 54 insertions(+), 49 deletions(-)
 create mode 100644 Dockerfile.orig
 delete mode 100644 nqa.Dockerfile

diff --git a/.gitignore b/.gitignore
index 34708e6..dd13c3b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,3 +135,8 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# Elastic Beanstalk Files
+.elasticbeanstalk/*
+!.elasticbeanstalk/*.cfg.yml
+!.elasticbeanstalk/*.global.yml
diff --git a/Dockerfile b/Dockerfile
index a8ad302..1595f98 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,20 +1,32 @@
-FROM ubuntu:20.04
+FROM continuumio/miniconda3
 
-COPY . . 
+RUN conda install -c anaconda python=3.7
+RUN conda install pip
+RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\
+    conda install -c anaconda tensorflow==2.3.0 &&\
+    python -m pip install transformers==3.5.1 &&\
+    conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\
+    python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1
+RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\
+    python -m pip install gremlinpython requests_aws4auth
 
-RUN apt-get update && \
-    apt-get -y upgrade && \
-    apt-get -y install python3 && \
-    apt-get -y install python3-pip && \
-    pip3 install neuralqa && \
-    apt-get -y install wget && \
-    wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb && \
-    wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb.sha512 && \
-    shasum -a 512 -c elasticsearch-7.8.0-amd64.deb.sha512 && \
-    dpkg -i elasticsearch-7.8.0-amd64.deb && \
-    service elasticsearch start && \
-    sleep 30 && \
-     
-EXPOSE 80
+ADD Dockerfile /root/neuralqa/
+ADD LICENSE /root/neuralqa/
+ADD README.md /root/neuralqa/
+#ADD config.yaml /root/neuralqa/
+ADD docker-compose.yml /root/neuralqa/
+ADD docs/ /root/neuralqa/docs
+ADD neuralqa/ /root/neuralqa/neuralqa
+ADD notes.md /root/neuralqa/
+ADD Dockerfile /root/neuralqa/
+ADD requirements.txt /root/neuralqa/
+ADD setup.cfg /root/neuralqa/
+ADD setup.py /root/neuralqa/
+ADD tests/ /root/neuralqa/tests
+WORKDIR /root/neuralqa
+RUN ls && python setup.py install
 
-CMD ["neuralqa", "--host", "0.0.0.0", "--port", "80"]
\ No newline at end of file
+COPY neuralqa/config_default.yaml /root/config_default.yaml
+ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
+
+CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"]
diff --git a/Dockerfile.orig b/Dockerfile.orig
new file mode 100644
index 0000000..a8ad302
--- /dev/null
+++ b/Dockerfile.orig
@@ -0,0 +1,20 @@
+FROM ubuntu:20.04
+
+COPY . . 
+
+RUN apt-get update && \
+    apt-get -y upgrade && \
+    apt-get -y install python3 && \
+    apt-get -y install python3-pip && \
+    pip3 install neuralqa && \
+    apt-get -y install wget && \
+    wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb && \
+    wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb.sha512 && \
+    shasum -a 512 -c elasticsearch-7.8.0-amd64.deb.sha512 && \
+    dpkg -i elasticsearch-7.8.0-amd64.deb && \
+    service elasticsearch start && \
+    sleep 30 && \
+     
+EXPOSE 80
+
+CMD ["neuralqa", "--host", "0.0.0.0", "--port", "80"]
\ No newline at end of file
diff --git a/nqa.Dockerfile b/nqa.Dockerfile
deleted file mode 100644
index 911371f..0000000
--- a/nqa.Dockerfile
+++ /dev/null
@@ -1,32 +0,0 @@
-FROM continuumio/miniconda3
-
-RUN conda install -c anaconda python=3.7
-RUN conda install pip
-RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\
-    conda install -c anaconda tensorflow==2.3.0 &&\
-    python -m pip install transformers==3.5.1 &&\
-    conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\
-    python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1
-RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\
-    python -m pip install gremlinpython requests_aws4auth
-
-ADD Dockerfile /root/neuralqa/
-ADD LICENSE /root/neuralqa/
-ADD README.md /root/neuralqa/
-ADD config.yaml /root/neuralqa/
-ADD docker-compose.yml /root/neuralqa/
-ADD docs/ /root/neuralqa/docs
-ADD neuralqa/ /root/neuralqa/neuralqa
-ADD notes.md /root/neuralqa/
-ADD nqa.Dockerfile /root/neuralqa/
-ADD requirements.txt /root/neuralqa/
-ADD setup.cfg /root/neuralqa/
-ADD setup.py /root/neuralqa/
-ADD tests/ /root/neuralqa/tests
-WORKDIR /root/neuralqa
-RUN ls && python setup.py install
-
-COPY neuralqa/config_default.yaml /root/config_default.yaml
-ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
-
-CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"]

From e977b5de3b98b8ffab380c94a25f2f3ad52ed3e6 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Fri, 23 Apr 2021 01:12:32 -0700
Subject: [PATCH 04/19] changing port to 80

---
 Dockerfile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 1595f98..a92fb38 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,4 +29,6 @@ RUN ls && python setup.py install
 COPY neuralqa/config_default.yaml /root/config_default.yaml
 ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
 
-CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "5000"]
+EXPOSE 80
+
+CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"]

From 95330eabab77bd85e284d66b337f956b42e7cc45 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Fri, 23 Apr 2021 02:14:06 -0700
Subject: [PATCH 05/19] added ports section

---
 docker-compose.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index f6aaa68..fb0771c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,8 @@
 version: "3"
 services:
-  neuralqa_docker:
+  neuralqa:
     build: .
+    ports:
+      - "80:80"
     expose:
       - 80

From a1f8aacfab3228e9f9ce3b109e9583518742c7aa Mon Sep 17 00:00:00 2001
From: pratacosmin <mcosmin1995@gmail.com>
Date: Thu, 23 Sep 2021 19:09:44 +0300
Subject: [PATCH 06/19] change retriever  and reader config

---
 Dockerfile                   |  5 ++++-
 neuralqa/config_default.yaml | 15 ++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a92fb38..a314a84 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,11 +5,14 @@ RUN conda install pip
 RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\
     conda install -c anaconda tensorflow==2.3.0 &&\
     python -m pip install transformers==3.5.1 &&\
-    conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch pyyaml spacy &&\
+    conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\
     python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1
 RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\
     python -m pip install gremlinpython requests_aws4auth
 
+RUN python -m pip install uvicorn[standard] websockets
+# RUN python -m pip install websockets
+
 ADD Dockerfile /root/neuralqa/
 ADD LICENSE /root/neuralqa/
 ADD README.md /root/neuralqa/
diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml
index 77e4a41..1a8a2c6 100644
--- a/neuralqa/config_default.yaml
+++ b/neuralqa/config_default.yaml
@@ -61,15 +61,15 @@ retriever:
     - name: None
       value: "none"
       type: "none"
-    - name: Orpheus
-      value: orpheus
+    - name: Abstracts
+      value: pubmed_abstracts
       type: elasticsearch
       connection:
         host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com
         port: 443
         username: "orpheus"
         password: "WandaVision2021!"
-        body_field: "text"
+        body_field: "abstract"
   readtopn: 0
 
 relsnip:
@@ -87,13 +87,10 @@ server: # webserver host and port defaults
 
 reader:
   title: Reader
-  selected: twmkn9/distilbert-base-uncased-squad2
+  selected: ktrapeznikov/biobert_v1.1_pubmed_squad_v2
   options:
-    - name: DistilBERT SQUAD2
-      value: twmkn9/distilbert-base-uncased-squad2
-      type: distilbert
-    - name: BERT SQUAD2
-      value: deepset/bert-base-cased-squad2
+    - name: BioBERT Pubmed SQUAD2
+      value: ktrapeznikov/biobert_v1.1_pubmed_squad_v2
       type: bert
     # - name: Medical BERT SQUAD2
     #   value: /Users/victordibia/Downloads/meddistilbert

From 7606074c3156f166b48358c9eb2dcdae916eefd8 Mon Sep 17 00:00:00 2001
From: pratacosmin <mcosmin1995@gmail.com>
Date: Wed, 29 Sep 2021 15:22:23 +0300
Subject: [PATCH 07/19] enhance retriever

---
 neuralqa/config_default.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml
index 1a8a2c6..e76c6ce 100644
--- a/neuralqa/config_default.yaml
+++ b/neuralqa/config_default.yaml
@@ -70,6 +70,15 @@ retriever:
         username: "orpheus"
         password: "WandaVision2021!"
         body_field: "abstract"
+    - name: Orpheus
+      value: orpheus
+      type: elasticsearch
+      connection:
+        host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com
+        port: 443
+        username: "orpheus"
+        password: "WandaVision2021!"
+        body_field: "text"
   readtopn: 0
 
 relsnip:

From a033d2f0b18b7b9bde854fcf67fdd8f197f1a192 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sat, 26 Feb 2022 12:52:45 -0800
Subject: [PATCH 08/19] changing python to 3.6 and tensorflow version

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a314a84..5e7fe6a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,9 @@
 FROM continuumio/miniconda3
 
-RUN conda install -c anaconda python=3.7
+RUN conda install -c anaconda python=3.6
 RUN conda install pip
 RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\
-    conda install -c anaconda tensorflow==2.3.0 &&\
+    conda install -c anaconda tensorflow &&\
     python -m pip install transformers==3.5.1 &&\
     conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\
     python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1

From 0fc9f669524554da9e9a8c09f4e7db2b300508f9 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sat, 26 Feb 2022 20:18:10 -0800
Subject: [PATCH 09/19] changes to move to aws pytorch deep learning container
 base image

---
 Dockerfile      | 21 ++++++++++-----------
 Dockerfile.old  | 37 +++++++++++++++++++++++++++++++++++++
 Dockerfile.orig | 20 --------------------
 setup.py        |  1 +
 4 files changed, 48 insertions(+), 31 deletions(-)
 create mode 100644 Dockerfile.old
 delete mode 100644 Dockerfile.orig

diff --git a/Dockerfile b/Dockerfile
index 5e7fe6a..170eb6f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,17 +1,16 @@
-FROM continuumio/miniconda3
+FROM 763104351884.dkr.ecr.us-east-2.amazonaws.com/pytorch-inference:1.5.1-gpu-py36-cu101-ubuntu16.04
 
-RUN conda install -c anaconda python=3.6
-RUN conda install pip
-RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\
-    conda install -c anaconda tensorflow &&\
-    python -m pip install transformers==3.5.1 &&\
-    conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\
-    python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1
-RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\
-    python -m pip install gremlinpython requests_aws4auth
 
+RUN conda install -c anaconda tensorflow
+RUN python -m pip install transformers==3.5.1 
+RUN conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1
+RUN conda install -c conda-forge flask spacy plac==0.9.6
+RUN python -m pip install numpy==1.19.2 scipy==1.4.1 Keras-Preprocessing==1.1.1
+RUN conda install -c conda-forge boto3 requests pandas scikit-learn
+RUN python -m pip install gremlinpython requests_aws4auth
 RUN python -m pip install uvicorn[standard] websockets
-# RUN python -m pip install websockets
+RUN python -m pip install thinc[tensorflow,torch] --pre
+RUN python -m pip install --upgrade tensorflow
 
 ADD Dockerfile /root/neuralqa/
 ADD LICENSE /root/neuralqa/
diff --git a/Dockerfile.old b/Dockerfile.old
new file mode 100644
index 0000000..5e7fe6a
--- /dev/null
+++ b/Dockerfile.old
@@ -0,0 +1,37 @@
+FROM continuumio/miniconda3
+
+RUN conda install -c anaconda python=3.6
+RUN conda install pip
+RUN conda install pytorch==1.5.1 torchvision==0.6.1 cpuonly -c pytorch &&\
+    conda install -c anaconda tensorflow &&\
+    python -m pip install transformers==3.5.1 &&\
+    conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1 pyyaml spacy &&\
+    python -m pip install numpy==1.18.5 scipy==1.4.1 Keras-Preprocessing==1.1.1
+RUN conda install -c conda-forge boto3 pandas requests scikit-learn scipy flask &&\
+    python -m pip install gremlinpython requests_aws4auth
+
+RUN python -m pip install uvicorn[standard] websockets
+# RUN python -m pip install websockets
+
+ADD Dockerfile /root/neuralqa/
+ADD LICENSE /root/neuralqa/
+ADD README.md /root/neuralqa/
+#ADD config.yaml /root/neuralqa/
+ADD docker-compose.yml /root/neuralqa/
+ADD docs/ /root/neuralqa/docs
+ADD neuralqa/ /root/neuralqa/neuralqa
+ADD notes.md /root/neuralqa/
+ADD Dockerfile /root/neuralqa/
+ADD requirements.txt /root/neuralqa/
+ADD setup.cfg /root/neuralqa/
+ADD setup.py /root/neuralqa/
+ADD tests/ /root/neuralqa/tests
+WORKDIR /root/neuralqa
+RUN ls && python setup.py install
+
+COPY neuralqa/config_default.yaml /root/config_default.yaml
+ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
+
+EXPOSE 80
+
+CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"]
diff --git a/Dockerfile.orig b/Dockerfile.orig
deleted file mode 100644
index a8ad302..0000000
--- a/Dockerfile.orig
+++ /dev/null
@@ -1,20 +0,0 @@
-FROM ubuntu:20.04
-
-COPY . . 
-
-RUN apt-get update && \
-    apt-get -y upgrade && \
-    apt-get -y install python3 && \
-    apt-get -y install python3-pip && \
-    pip3 install neuralqa && \
-    apt-get -y install wget && \
-    wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb && \
-    wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.8.0-amd64.deb.sha512 && \
-    shasum -a 512 -c elasticsearch-7.8.0-amd64.deb.sha512 && \
-    dpkg -i elasticsearch-7.8.0-amd64.deb && \
-    service elasticsearch start && \
-    sleep 30 && \
-     
-EXPOSE 80
-
-CMD ["neuralqa", "--host", "0.0.0.0", "--port", "80"]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 4712fe4..e53c350 100644
--- a/setup.py
+++ b/setup.py
@@ -36,6 +36,7 @@ def package_files(directory):
         'aiofiles',
         'uvicorn',
         'numpy',
+	'plac==0.9.6', 
         'tensorflow>=2.1.0',
         'torch',
         'torchvision',

From b3fb4b653645b449f8578cf29d5bbb6c1377f85a Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 27 Feb 2022 09:28:53 -0800
Subject: [PATCH 10/19] fixing cuda environment variables

---
 .dockerignore | 4 ++++
 Dockerfile    | 3 +++
 2 files changed, 7 insertions(+)
 create mode 100644 .dockerignore

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..b9bd98b
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,4 @@
+# Elastic Beanstalk Files
+.elasticbeanstalk/*
+.git
+.gitignore
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 170eb6f..a76cb31 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -30,6 +30,9 @@ RUN ls && python setup.py install
 
 COPY neuralqa/config_default.yaml /root/config_default.yaml
 ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
 
 EXPOSE 80
 

From 783316c612272068aad26f1d47c654d2db60a127 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 27 Feb 2022 10:16:08 -0800
Subject: [PATCH 11/19] fixing cuda issues

---
 Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index a76cb31..54c287d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,7 +10,8 @@ RUN conda install -c conda-forge boto3 requests pandas scikit-learn
 RUN python -m pip install gremlinpython requests_aws4auth
 RUN python -m pip install uvicorn[standard] websockets
 RUN python -m pip install thinc[tensorflow,torch] --pre
-RUN python -m pip install --upgrade tensorflow
+RUN conda install -c conda-forge cudatoolkit
+RUN python -m pip install tensorflow==2.3.0
 
 ADD Dockerfile /root/neuralqa/
 ADD LICENSE /root/neuralqa/

From 55a5266a113195b80251dd40d3a176c1a3556221 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 27 Feb 2022 10:39:06 -0800
Subject: [PATCH 12/19] fixing LD_LIBRARY_PATH to include cuda/compat folder

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 54c287d..dfd7724 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,7 +31,7 @@ RUN ls && python setup.py install
 
 COPY neuralqa/config_default.yaml /root/config_default.yaml
 ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat:/usr/local/cuda/lib:/usr/local/cuda/lib64
 ENV NVIDIA_VISIBLE_DEVICES all
 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
 

From a6ffb87aaf6887363ca004bb8b4a086386b3effe Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 27 Feb 2022 10:51:55 -0800
Subject: [PATCH 13/19] adding fix for CUDA_ERROR_NO_DEVICE

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index dfd7724..4186686 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -34,6 +34,8 @@ ENV NEURALQA_CONFIG_PATH=/root/config_default.yaml
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat:/usr/local/cuda/lib:/usr/local/cuda/lib64
 ENV NVIDIA_VISIBLE_DEVICES all
 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
+ENV CUDA_VISIBLE_DEVICES 0,1
+
 
 EXPOSE 80
 

From 99e084bd072797fb266ca6af12f3cc0f2d927f2f Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 27 Feb 2022 11:27:13 -0800
Subject: [PATCH 14/19] adding nvidia drivers

---
 Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile b/Dockerfile
index 4186686..798f59b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,6 +12,7 @@ RUN python -m pip install uvicorn[standard] websockets
 RUN python -m pip install thinc[tensorflow,torch] --pre
 RUN conda install -c conda-forge cudatoolkit
 RUN python -m pip install tensorflow==2.3.0
+RUN apt-get install -y nvidia-headless-495 nvidia-modprobe
 
 ADD Dockerfile /root/neuralqa/
 ADD LICENSE /root/neuralqa/

From dfb088b507643cb9a6f02b7f70876ee8b8db9ba8 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 20 Mar 2022 15:22:25 -0700
Subject: [PATCH 15/19] adding run command to include gpus

---
 docker-compose.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index fb0771c..826db13 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,6 +2,7 @@ version: "3"
 services:
   neuralqa:
     build: .
+    command: --gpus all
     ports:
       - "80:80"
     expose:

From 71f83c5dd695752a35ba9ef341cdc9058ed62e72 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 20 Mar 2022 15:56:59 -0700
Subject: [PATCH 16/19] reverting changes to docker compose file

---
 docker-compose.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 826db13..fb0771c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,6 @@ version: "3"
 services:
   neuralqa:
     build: .
-    command: --gpus all
     ports:
       - "80:80"
     expose:

From 685a1d9909b1c7c5af55c8d7a43a9466203fbe09 Mon Sep 17 00:00:00 2001
From: Vishnu Vettrivel <vishnu@Vishnus-MacBook-Pro.local>
Date: Sun, 20 Mar 2022 16:22:40 -0700
Subject: [PATCH 17/19] adding GPU access with Compose

---
 docker-compose.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index fb0771c..9179842 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,6 +2,11 @@ version: "3"
 services:
   neuralqa:
     build: .
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: [gpu]
     ports:
       - "80:80"
     expose:

From 9c80f24fc566be8871f3c2da62efe4f4cbb4543a Mon Sep 17 00:00:00 2001
From: Bogdan-Flavius Budihala <b.bogdanfl935@gmail.com>
Date: Wed, 14 Sep 2022 11:37:48 +0300
Subject: [PATCH 18/19] WAP-170 | Migrated to AWS OpenSearch cluster

---
 .gitignore                                      | 1 +
 neuralqa/config_default.yaml                    | 8 ++------
 neuralqa/retriever/__init__.py                  | 1 +
 neuralqa/retriever/awselasticsearchretriever.py | 4 ++--
 neuralqa/retriever/retrieverpool.py             | 4 ++--
 neuralqa/server/serve.py                        | 2 +-
 requirements.txt                                | 2 +-
 tests/retriever/test_retriever.py               | 4 ++--
 8 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/.gitignore b/.gitignore
index dd13c3b..d16a5a2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,3 +140,4 @@ dmypy.json
 .elasticbeanstalk/*
 !.elasticbeanstalk/*.cfg.yml
 !.elasticbeanstalk/*.global.yml
+.idea/
diff --git a/neuralqa/config_default.yaml b/neuralqa/config_default.yaml
index e76c6ce..369207a 100644
--- a/neuralqa/config_default.yaml
+++ b/neuralqa/config_default.yaml
@@ -65,19 +65,15 @@ retriever:
       value: pubmed_abstracts
       type: elasticsearch
       connection:
-        host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com
+        host: vpc-neptune-es-opxf6xkhk6ra7sfhybnkvxydtu.us-east-2.es.amazonaws.com
         port: 443
-        username: "orpheus"
-        password: "WandaVision2021!"
         body_field: "abstract"
     - name: Orpheus
       value: orpheus
       type: elasticsearch
       connection:
-        host: search-orpheus-pubmed-mfcsldctpmoxvooou3g6dimjbi.us-east-2.es.amazonaws.com
+        host: vpc-neptune-es-opxf6xkhk6ra7sfhybnkvxydtu.us-east-2.es.amazonaws.com
         port: 443
-        username: "orpheus"
-        password: "WandaVision2021!"
         body_field: "text"
   readtopn: 0
 
diff --git a/neuralqa/retriever/__init__.py b/neuralqa/retriever/__init__.py
index fa07e6f..24f4a54 100644
--- a/neuralqa/retriever/__init__.py
+++ b/neuralqa/retriever/__init__.py
@@ -1,4 +1,5 @@
 from .retriever import *
 from .elasticsearchretriever import *
+from .awselasticsearchretriever import *
 from .solrretriever import *
 from .retrieverpool import *
diff --git a/neuralqa/retriever/awselasticsearchretriever.py b/neuralqa/retriever/awselasticsearchretriever.py
index 552b01e..b5e2cad 100644
--- a/neuralqa/retriever/awselasticsearchretriever.py
+++ b/neuralqa/retriever/awselasticsearchretriever.py
@@ -25,8 +25,8 @@ def __init__(self, host, index_type="elasticsearch", port=443, **kwargs):
         self.port = port
         allowed_keys = list(self.__dict__.keys())
         self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys)
-        assert self.body_field in self.return_fields
-        assert any(self.body_field in f for f in self.search_fields)
+        # assert self.body_field in self.return_fields
+        # assert any(self.body_field in f for f in self.search_fields)
 
         credentials = boto3.Session().get_credentials()
         awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service,
diff --git a/neuralqa/retriever/retrieverpool.py b/neuralqa/retriever/retrieverpool.py
index 06ba9a3..8985e8f 100644
--- a/neuralqa/retriever/retrieverpool.py
+++ b/neuralqa/retriever/retrieverpool.py
@@ -1,5 +1,5 @@
 
-from neuralqa.retriever import ElasticSearchRetriever
+from neuralqa.retriever import AWSElasticSearchRetriever
 import logging
 
 logger = logging.getLogger(__name__)
@@ -15,7 +15,7 @@ def __init__(self, retrievers):
                     "Duplicate retriever value : {} ".format(retriever["value"]))
 
             if (retriever["type"] == "elasticsearch"):
-                self.retriever_pool[retriever["value"]] = ElasticSearchRetriever(
+                self.retriever_pool[retriever["value"]] = AWSElasticSearchRetriever(
                     **retriever["connection"])
             if (retriever["type"] == "solr"):
                 logger.info("We do not yet support Solr retrievers")
diff --git a/neuralqa/server/serve.py b/neuralqa/server/serve.py
index 1d57ac3..1627bff 100644
--- a/neuralqa/server/serve.py
+++ b/neuralqa/server/serve.py
@@ -2,7 +2,7 @@
 
 from neuralqa.reader import BERTReader, ReaderPool
 from neuralqa.server.routehandlers import Handler
-from neuralqa.retriever import ElasticSearchRetriever, RetrieverPool
+from neuralqa.retriever import AWSElasticSearchRetriever, RetrieverPool
 from neuralqa.utils import ConfigParser
 from neuralqa.expander import ExpanderPool
 
diff --git a/requirements.txt b/requirements.txt
index 417f79f..ca4b737 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ transformers>=2.9.1
 uvicorn
 aiofiles
 fastapi
-elasticsearch>=7.7.1
+elasticsearch<=7.13.4
 pyyaml>=3.13 
 spacy
 requests-aws4auth
diff --git a/tests/retriever/test_retriever.py b/tests/retriever/test_retriever.py
index 5dc9f15..b854031 100644
--- a/tests/retriever/test_retriever.py
+++ b/tests/retriever/test_retriever.py
@@ -1,11 +1,11 @@
-from neuralqa.retriever import ElasticSearchRetriever
+from neuralqa.retriever import AWSElasticSearchRetriever
 from neuralqa.utils import ConfigParser
 
 
 def test_elasticserch_retriever():
     app_config = ConfigParser("config.yaml")
     rkwargs = app_config.config["retriever"]["options"][1]["connection"]
-    retriever = ElasticSearchRetriever(**rkwargs)
+    retriever = AWSElasticSearchRetriever(**rkwargs)
     results = retriever.run_query(
         "cases", "what is the punishment for arson crime")
     assert results != None

From 44f673d807126ee509a71a8d9beb23161e2de260 Mon Sep 17 00:00:00 2001
From: Bogdan-Flavius Budihala <b.bogdanfl935@gmail.com>
Date: Wed, 12 Oct 2022 09:47:04 +0200
Subject: [PATCH 19/19] WAP-170 | Fixed expiring credentials

---
 Dockerfile                                    | 10 +++---
 docker-compose.yml                            |  7 +---
 .../retriever/awselasticsearchretriever.py    | 33 +++++++++++--------
 neuralqa/server/routehandlers.py              |  3 +-
 neuralqa/server/routemodels.py                |  2 +-
 neuralqa/utils/decorators.py                  | 14 ++++++++
 6 files changed, 42 insertions(+), 27 deletions(-)
 create mode 100644 neuralqa/utils/decorators.py

diff --git a/Dockerfile b/Dockerfile
index 798f59b..2fdbe43 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,7 @@
 FROM 763104351884.dkr.ecr.us-east-2.amazonaws.com/pytorch-inference:1.5.1-gpu-py36-cu101-ubuntu16.04
 
-
 RUN conda install -c anaconda tensorflow
-RUN python -m pip install transformers==3.5.1 
+RUN python -m pip install transformers==3.5.1
 RUN conda install -c conda-forge uvicorn aiofiles fastapi elasticsearch==7.13.1
 RUN conda install -c conda-forge flask spacy plac==0.9.6
 RUN python -m pip install numpy==1.19.2 scipy==1.4.1 Keras-Preprocessing==1.1.1
@@ -12,7 +11,10 @@ RUN python -m pip install uvicorn[standard] websockets
 RUN python -m pip install thinc[tensorflow,torch] --pre
 RUN conda install -c conda-forge cudatoolkit
 RUN python -m pip install tensorflow==2.3.0
-RUN apt-get install -y nvidia-headless-495 nvidia-modprobe
+#RUN apt-get install -y nvidia-headless-495 nvidia-modprobe
+RUN apt-get update -y --allow-unauthenticated
+RUN apt-get install -y --allow-unauthenticated nvidia-headless-495 nvidia-modprobe
+
 
 ADD Dockerfile /root/neuralqa/
 ADD LICENSE /root/neuralqa/
@@ -40,4 +42,4 @@ ENV CUDA_VISIBLE_DEVICES 0,1
 
 EXPOSE 80
 
-CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"]
+CMD ["neuralqa", "ui", "--host", "0.0.0.0", "--port", "80"]
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 9179842..ae79586 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,12 +2,7 @@ version: "3"
 services:
   neuralqa:
     build: .
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - capabilities: [gpu]
     ports:
       - "80:80"
     expose:
-      - 80
+      - 80
\ No newline at end of file
diff --git a/neuralqa/retriever/awselasticsearchretriever.py b/neuralqa/retriever/awselasticsearchretriever.py
index b5e2cad..75bfd60 100644
--- a/neuralqa/retriever/awselasticsearchretriever.py
+++ b/neuralqa/retriever/awselasticsearchretriever.py
@@ -1,4 +1,5 @@
 import boto3
+from elasticsearch.exceptions import AuthorizationException
 from requests_aws4auth import AWS4Auth
 import copy
 from neuralqa.retriever import Retriever, ElasticSearchRetriever
@@ -8,6 +9,8 @@
 
 import traceback
 
+from neuralqa.utils.decorators import retry_on_exception
+
 logger = logging.getLogger(__name__)
 region = 'us-east-2'
 service = 'es'
@@ -27,25 +30,26 @@ def __init__(self, host, index_type="elasticsearch", port=443, **kwargs):
         self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys)
         # assert self.body_field in self.return_fields
         # assert any(self.body_field in f for f in self.search_fields)
+        self.construct_es_instance()
+        rejected_keys = set(kwargs.keys()) - set(allowed_keys)
 
+        if rejected_keys:
+            raise ValueError(
+                "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys))
+
+    def construct_es_instance(self):
         credentials = boto3.Session().get_credentials()
-        awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service,
-                           session_token=credentials.token)
+        awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
         self.es = Elasticsearch(
             hosts=[{"host": self.host, "port": self.port}],
             http_auth=awsauth,
-            use_ssl = True,
-            verify_certs = True,
-            connection_class = RequestsHttpConnection,
+            use_ssl=True,
+            verify_certs=True,
+            connection_class=RequestsHttpConnection,
         )
         self.isAvailable = self.es.ping()
 
-        rejected_keys = set(kwargs.keys()) - set(allowed_keys)
-
-        if rejected_keys:
-            raise ValueError(
-                "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys))
-
+    @retry_on_exception(exception=AuthorizationException)
     def run_query(self, index_name, search_query, max_documents=5, fragment_size=100, relsnip=True, num_fragments=5,
                   highlight_tags=True):
 
@@ -80,8 +84,7 @@ def run_query(self, index_name, search_query, max_documents=5, fragment_size=100
         #     search_query["_source"] = {"includes": [self.body_field]}
 
         try:
-            query_result = self.es.search(
-                index=index_name, body=search_query)
+            query_result = self.es.search(index=index_name, body=search_query)
 
             # RelSnip: for each document, we concatenate all
             # fragments in each document and return as the document.
@@ -96,7 +99,9 @@ def run_query(self, index_name, search_query, max_documents=5, fragment_size=100
                         del hit['_source'][self.body_field]
             took = query_result["took"]
             results = {"took": took,  "highlights": highlights, "docs": docs, "source": source}
-
+        except AuthorizationException:
+            self.construct_es_instance()
+            raise
         except (ConnectionRefusedError, NotFoundError, Exception) as e:
             status = False
             results["errormsg"] = str(e)
diff --git a/neuralqa/server/routehandlers.py b/neuralqa/server/routehandlers.py
index 755716d..9b21960 100644
--- a/neuralqa/server/routehandlers.py
+++ b/neuralqa/server/routehandlers.py
@@ -51,8 +51,7 @@ async def get_answers(params: Answer):
 
             else:
                 # add query expansion terms to query if any
-                retriever_query = params.query + \
-                    " ".join(params.expansionterms)
+                retriever_query = params.query + " ".join(params.expansionterms)
                 num_fragments = 5
                 query_results = self.retriever_pool.retriever.run_query(params.retriever, retriever_query,
                                                                         max_documents=params.max_documents, fragment_size=params.fragment_size,
diff --git a/neuralqa/server/routemodels.py b/neuralqa/server/routemodels.py
index 8ead518..73f178f 100644
--- a/neuralqa/server/routemodels.py
+++ b/neuralqa/server/routemodels.py
@@ -23,7 +23,7 @@ class Answer(BaseModel):
     reader: str = None
     relsnip: bool = True
     expander: Optional[str] = None
-    expansionterms: Optional[list] = None
+    expansionterms: Optional[list] = []
     retriever: Optional[str] = "manual"
 
 
diff --git a/neuralqa/utils/decorators.py b/neuralqa/utils/decorators.py
new file mode 100644
index 0000000..f9ba522
--- /dev/null
+++ b/neuralqa/utils/decorators.py
@@ -0,0 +1,14 @@
+import functools
+
+
+def retry_on_exception(exception):
+    def actual_decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except exception:
+                return func(*args, **kwargs)
+        return wrapper
+
+    return actual_decorator