From ae3a054f4e77d9a09bdb85811ee22aa770967e0b Mon Sep 17 00:00:00 2001
From: Ralph Tang <r33tang@uwaterloo.ca>
Date: Thu, 23 Apr 2020 17:06:42 -0400
Subject: [PATCH] Change evaluation methodology (#2)

* Change evaluation methodology

- Make MRR, P@1, and R@3 default
- Add natural language queries and keyword queries
- Add 9 more QD pairs

* Physical sciences topics

* Add temperature questions

* Add more examples

* Implement QA transformer reranker

* Fix dataset typo

* Implement random baseline

* Implement more reranker model types

- Implement question answering reranker
- Implement sequence classification reranker

* Add final dataset examples

* Remove missing IDs from dataset

* Improve cosine similarity matrix provider name

* Add dataset statistics calculation

* Add version to dataset

* Add random MRR calculation

* Fix off-by-one in random MRR computation

* Prepare for release

- Add setuptools script
- Fix circular imports

* Add more detailed README

* Change license to Apache

* Fix classifier name

* Clarify README

Co-authored-by: Nikhil Gupta <niks.gupta09@gmail.com>
Co-authored-by: Edwin Zhang <edwinzhang64@gmail.com>
---
 LICENSE                                     | 201 +++++
 README.md                                   |  33 +-
 data/kaggle-lit-review-0.1.json             | 862 ++++++++++++++++++++
 data/kaggle-lit-review.json                 | 459 -----------
 pygaggle/data/kaggle.py                     |  47 +-
 pygaggle/data/relevance.py                  |  34 +-
 pygaggle/model/encode.py                    |   2 +-
 pygaggle/model/evaluate.py                  |  77 +-
 pygaggle/model/tokenize.py                  |   2 +-
 pygaggle/rerank/__init__.py                 |   4 -
 pygaggle/rerank/bm25.py                     |   4 +-
 pygaggle/rerank/random.py                   |  19 +
 pygaggle/rerank/similarity.py               |   6 +-
 pygaggle/rerank/transformer.py              |  83 +-
 pygaggle/run/evaluate_kaggle_highlighter.py |  87 +-
 scripts/evaluate-highlighters.sh            |  16 +-
 setup.py                                    |  39 +
 17 files changed, 1432 insertions(+), 543 deletions(-)
 create mode 100644 LICENSE
 create mode 100644 data/kaggle-lit-review-0.1.json
 delete mode 100644 data/kaggle-lit-review.json
 create mode 100644 pygaggle/rerank/random.py
 create mode 100644 setup.py

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..6fffd10b
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                               Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020 Jimmy Lin
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/README.md b/README.md
index a46be9c7..6f425524 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,39 @@
 # PyGaggle
 
-A gaggle of CORD-19 rerankers.
+A gaggle of rerankers for [CovidQA](https://github.com/castorini/pygaggle/blob/master/data/) and CORD-19. 
 
 ## Installation
 
-1. `conda env create -f environment.yml && conda activate pygaggle`
+1. For pip, do `pip install pygaggle`. If you prefer Anaconda, use `conda env create -f environment.yml && conda activate pygaggle`.
 
 2. Install [PyTorch 1.4+](http://pytorch.org/).
 
-3. Download the index: `sh scripts/update-index.sh`
+3. Download the index: `sh scripts/update-index.sh`.
 
-4. Make sure you have an installation of Java 8+: `javac --version` 
+4. Make sure you have an installation of Java 11+: `javac --version`.
 
+5. Install [Anserini](https://github.com/castorini/anserini).
 
-## Evaluating Highlighters
 
-Run `sh scripts/evaluate-highlighters.sh`.
\ No newline at end of file
+## Running rerankers on CovidQA
+
+By default, the script uses `data/lucene-index-covid-paragraph` for the index path.
+If this is undesirable, set the environment variable `CORD19_INDEX_PATH` to the path of the index.
+
+
+### Unsupervised Methods
+
+**BM25**: `python -um pygaggle.run.evaluate_kaggle_highlighter --method bm25`
+
+**BERT**: `python -um pygaggle.run.evaluate_kaggle_highlighter --method transformer --model-name bert-base-cased`
+
+**SciBERT**: `python -um pygaggle.run.evaluate_kaggle_highlighter --method transformer --model-name allenai/scibert_scivocab_cased`
+
+**BioBERT**: `python -um pygaggle.run.evaluate_kaggle_highlighter --method transformer --model-name biobert`
+
+
+### Supervised Methods
+
+**T5 (MARCO)**: `python -um pygaggle.run.evaluate_kaggle_highlighter --method t5`
+
+Instructions for our other MARCO and SQuAD models coming soon.
diff --git a/data/kaggle-lit-review-0.1.json b/data/kaggle-lit-review-0.1.json
new file mode 100644
index 00000000..00f20eb3
--- /dev/null
+++ b/data/kaggle-lit-review-0.1.json
@@ -0,0 +1,862 @@
+{
+  "version":"0.1",
+  "categories":[
+    {
+      "name":"Incubation period",
+      "sub_categories":[
+        {
+          "nq_name":"What is the incubation period of the virus?",
+          "kq_name":"Incubation period of the virus",
+          "answers":[
+            {
+              "id":"wuclekt6",
+              "title":"Longitudinal analysis of laboratory findings during the process of recovery for patients with COVID-19",
+              "exact_answer":"4 days (IQR, 2-7)"
+            },
+            {
+              "id":"e3t1f0rt",
+              "title":"Epidemiological Characteristics of COVID-19; a Systemic Review and Meta-Analysis 1",
+              "exact_answer":"5.84 (99% CI: 4.83, 6.85) days"
+            },
+            {
+              "id":"ragcpbl6",
+              "title":"Evolving epidemiology of novel coronavirus diseases 2019 and possible interruption of local transmission outside Hubei Province in China: a descriptive and modeling study",
+              "exact_answer":"mean incubation period of 5.2 days"
+            },
+            {
+              "id":"n0uwy77g",
+              "title":"Clinical characteristics and durations of hospitalized patients with COVID-19 in Beijing: a retrospective cohort study",
+              "exact_answer":"4 (3-7) days"
+            },
+            {
+              "id":"x23ej29m",
+              "title":"Clinical features and obstetric and neonatal outcomes of pregnant patients with COVID-19 in Wuhan, China: a retrospective, single-centre, descriptive study",
+              "exact_answer":"5 days (range 2-9 days)"
+            },
+            {
+              "id":"56zhxd6e",
+              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries Summary Background",
+              "exact_answer":"7.44 days"
+            },
+            {
+              "id":"zph6r4il",
+              "title":"Epidemiological, clinical and virological characteristics of 74 cases of coronavirus-infected disease 2019 (COVID-19) with gastrointestinal symptoms",
+              "exact_answer":"4 days (IQR 3-7 days)"
+            },
+            {
+              "id":"djq0lvr2",
+              "title":"Is a 14-day quarantine period optimal for effectively controlling coronavirus disease 2019 (COVID-19)?",
+              "exact_answer":"The median incubation period of both male and female adults was similar (7-day) but significantly shorter than that (9-day) of child cases"
+            },
+            {
+              "id":"n0vmb946",
+              "title":"The difference in the incubation period of 2019 novel coronavirus (SARS-CoV-2) infection between travelers to Hubei and non-travelers: The need of a longer quarantine period",
+              "exact_answer":"1.8 and 7.2 days"
+            },
+            {
+              "id":"awgyxn3t",
+              "title":"Clinical Characteristics of 34 Children with Coronavirus Disease-2019 in the West of China: a Multiple-center Case Series",
+              "exact_answer":"10.50 (7.75 -25.25) days"
+            },
+            {
+              "id":"0hnh4n9e",
+              "title":"Investigation of three clusters of COVID-19 in Singapore: implications for surveillance and response measures",
+              "exact_answer":"4 days (IQR 3-6)"
+            },
+            {
+              "id":"it4ka7v0",
+              "title":"Estimation of incubation period distribution of COVID-19 using disease onset forward time: a novel cross-sectional and forward follow-up study",
+              "exact_answer":"median of incubation period is 8·13 days (95% confidence interval [CI]: 7·37-8·91), the mean is 8·62 days (95% CI: 8·02-9·28)"
+            },
+            {
+              "id":"glq0lckz",
+              "title":"Clinical Characteristics of SARS-CoV-2 Pneumonia Compared to Controls in Chinese Han Population",
+              "exact_answer":"4 days (IQR, 2 to 7)"
+            },
+            {
+              "id":"8anqfkmo",
+              "title":"The Incubation Period of Coronavirus Disease 2019 (COVID-19) From Publicly Reported Confirmed Cases: Estimation and Application",
+              "exact_answer":"5.1 days (CI, 4.5 to 5.8 days)"
+            },
+            {
+              "id":"v3gww4iv",
+              "title":"Transmission of corona virus disease 2019 during the incubation period may lead to a quarantine loophole",
+              "exact_answer":"4.9 days (95% confidence interval [CI], 4.4 to 5.4) days"
+            },
+            {
+              "id":"66ulqu11",
+              "title":"Transmission interval estimates suggest pre-symptomatic spread of COVID-19",
+              "exact_answer":"7.1 (6.13, 8.25) days for Singapore and 9 (7.92, 10.2) days for Tianjin"
+            },
+            {
+              "id":"ti9b1etu",
+              "title":"Transmission and clinical characteristics of coronavirus disease 2019 in 104 outside-Wuhan patients, China",
+              "exact_answer":"6 days, ranged from 1 to 32 days"
+            },
+            {
+              "id":"k3f7ohzg",
+              "title":"Characteristics of COVID-19 infection in Beijing",
+              "exact_answer":"6.7 days"
+            },
+            {
+              "id":"jxtch47t",
+              "title":"Epidemiologic and Clinical Characteristics of 91 Hospitalized Patients with COVID-19 in Zhejiang, China: A retrospective, multi-centre case series",
+              "exact_answer":"6 (IQR, 3-8) days"
+            },
+            {
+              "id":"dbzrd23n",
+              "title":"Title: A descriptive study of the impact of diseases control and prevention on the epidemics 1 dynamics and clinical features of SARS-CoV-2 outbreak in Shanghai, lessons learned for",
+              "exact_answer":"6.4 days (95% 175 CI 5.3 to 7.6)"
+            },
+            {
+              "id":"j3avpu1y",
+              "title":"A familial cluster of pneumonia associated with the 2019 novel coronavirus indicating person-to-person transmission: a study of a family cluster",
+              "exact_answer":"3-6 days"
+            },
+            {
+              "id":"1mxjklgx",
+              "title":"Epidemiological characteristics of 1212 COVID-19 patients in Henan, China. medRxiv",
+              "exact_answer":"average, mode and median incubation periods are 7.4, 4 and 7 days"
+            },
+            {
+              "id":"ykofrn9i",
+              "title":"Incubation Period and Other Epidemiological Characteristics of 2019 Novel Coronavirus Infections with Right Truncation: A Statistical Analysis of Publicly Available Case Data",
+              "exact_answer":"5.6 days (95% CI: 4.4, 7.4)"
+            },
+            {
+              "id":"u8goc7io",
+              "title":"Title: The incubation period of 2019-nCoV infections among travellers from Wuhan, China",
+              "exact_answer":"6.4 (5.6 -7.7, 95% CI) days"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the length of viral shedding after illness onset?",
+          "kq_name":"Length of viral shedding after illness onset",
+          "answers":[
+            {
+              "id":"r5a46n9a",
+              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
+              "exact_answer":"2 days (range, 3-38 ) in nasopharyngeal swabs, 19 days (range, 5-37) in sputum and 18 days (range, 7-26) in stools"
+            },
+            {
+              "id":"k36rymkv",
+              "title":"Clinical course and risk factors for mortality of adult inpatients with COVID-19 in Wuhan, China: a retrospective cohort study",
+              "exact_answer":"20·0 days (IQR 17·0-24·0)"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the incubation period across different age groups?",
+          "kq_name":"Incubation period across different age groups",
+          "answers":[
+            {
+              "id":"giabjjnz",
+              "title":"Children are unlikely to have been the primary source of household SARS-CoV-2 infections",
+              "exact_answer":"7.74 d ± 3.22"
+            },
+            {
+              "id":"djq0lvr2",
+              "title":"Is a 14-day quarantine period optimal for effectively controlling coronavirus disease 2019 (COVID-19)?",
+              "exact_answer":"median incubation period of both male and female adults was similar (7-day) but significantly shorter than that (9-day) of child cases"
+            },
+            {
+              "id":"awgyxn3t",
+              "title":"Clinical Characteristics of 34 Children with Coronavirus Disease-2019 in the West of China: a Multiple-center Case Series",
+              "exact_answer":"10.50 (7.75 -25.25) days"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Asymptomatic shedding",
+      "sub_categories":[
+        {
+          "nq_name":"What is the proportion of patients who were asymptomatic?",
+          "kq_name":"Proportion of patients who were asymptomatic",
+          "answers":[
+            {
+              "id":"bmsmegbs",
+              "title":"A considerable proportion of individuals with asymptomatic SARS-CoV-2 infection in Tibetan population",
+              "exact_answer":"21.7%"
+            },
+            {
+              "id":"7w1bhaz6",
+              "title":"High incidence of asymptomatic SARS-CoV-2 infection, Chongqing, China",
+              "exact_answer":"19%"
+            },
+            {
+              "id":"xsqgrd5l",
+              "title":"High transmissibility of COVID-19 near symptom onset",
+              "exact_answer":"there were 32 laboratory-confirmed COVID-19 patients, including five household/family clusters and four asymptomatic patients"
+            },
+            {
+              "id":"56zhxd6e",
+              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries",
+              "exact_answer":"49 (14.89%) were asymptomatic"
+            },
+            {
+              "id":"atnz63pk",
+              "title":"Estimating the Asymptomatic Proportion of 2019 Novel Coronavirus onboard the Princess Cruises Ship, 2020",
+              "exact_answer":"17.9%"
+            },
+            {
+              "id":"ofoqk100",
+              "title":"Clinical Characteristics of 24 Asymptomatic Infections with COVID-19 Screened among Close Contacts in Nanjing, China",
+              "exact_answer":"The remaining 7 (29.2%) cases showed normal CT image and had no symptoms during hospitalization."
+            },
+            {
+              "id":"k3f7ohzg",
+              "title":"Characteristics of COVID-19 infection in Beijing",
+              "exact_answer":"13 (5.0%) asymptomatic cases"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the proportion of pediatric patients who were asymptomatic?",
+          "kq_name":"Proportion of pediatric patients who were asymptomatic",
+          "answers":[
+            {
+              "id":"xsgxd5sy",
+              "title":"Epidemiological and Clinical Characteristics of Children with Coronavirus Disease 2019",
+              "exact_answer":"20 (27.03%) cases"
+            },
+            {
+              "id":"dmrtsxik",
+              "title":"Articles Clinical and epidemiological features of 36 children with coronavirus disease 2019 (COVID-19) in Zhejiang, China: an observational cohort study",
+              "exact_answer":"ten (28%) patients"
+            },
+            {
+              "id":"7w1bhaz6",
+              "title":"High incidence of asymptomatic SARS-CoV-2 infection, Chongqing, China",
+              "exact_answer":"28.6%"
+            },
+            {
+              "id":"jvhrp51s",
+              "title":"Title: The clinical and epidemiological features and hints of 82 confirmed COVID-19 pediatric cases aged 0-16 in Wuhan, China",
+              "exact_answer":"8 no symptoms"
+            },
+            {
+              "id":"j58f1lwa",
+              "title":"Preliminary epidemiological analysis on children and adolescents with novel coronavirus disease 2019 outside Hubei Province, China: an observational study utilizing crowdsourced data",
+              "exact_answer":"2 (8.0%) cases"
+            },
+            {
+              "id":"mar8zt2t",
+              "title":"The different clinical characteristics of corona virus disease cases between children and their families in China -the character of children with COVID-19",
+              "exact_answer":"six (66.7%) children"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the asymptomatic transmission during incubation?",
+          "kq_name":"Asymptomatic transmission during incubation",
+          "answers":[
+            {
+              "id":"eflwztji",
+              "title":"Temporal dynamics in viral shedding and transmissibility of COVID-19",
+              "exact_answer":"44% of transmission prior to symptom onset"
+            },
+            {
+              "id":"v3gww4iv",
+              "title":"Transmission of corona virus disease 2019 during the incubation period may lead to a quarantine loophole",
+              "exact_answer":"(73.0%) were infected before the symptom onset of the first-generation cases"
+            },
+            {
+              "id":"56zhxd6e",
+              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries Summary Background",
+              "exact_answer":"In 102 (43.78%) infector-infectee pairs, transmission occurred before infectors' symptom onsets"
+            },
+            {
+              "id":"v3gww4iv",
+              "title":"Transmission of corona virus disease 2019 during the incubation period may lead to a quarantine loophole",
+              "exact_answer":"(73.0%) were infected before the symptom onset of the first-generation cases"
+            },
+            {
+              "id":"st5vs6gq",
+              "title":"Title: The serial interval of COVID-19 from publicly reported confirmed cases Running Head: The serial interval of COVID-19",
+              "exact_answer":"12.6% of reports indicating pre-symptomatic transmission"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Persistence of sources",
+      "sub_categories":[
+        {
+          "nq_name":"What is the length of viral shedding in stool?",
+          "kq_name":"Length of viral shedding in stool",
+          "answers":[
+            {
+              "id":"k86ljbxu",
+              "title":"Do children need a longer time to shed SARS- CoV-2 in stool than adults?",
+              "exact_answer":"100% positive on the third week after onset and 30% positive 29 days later"
+            },
+            {
+              "id":"ouca1bol",
+              "title":"Evaluation of SARS-CoV-2 RNA shedding in clinical specimens and clinical characteristics of 10 patients with COVID-19 in Macau",
+              "exact_answer":"detected in feces till 14 days after the onset of symptoms"
+            },
+            {
+              "id":"r5a46n9a",
+              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
+              "exact_answer":"18 days (range, 7-26)"
+            },
+            {
+              "id":"1fyag5x3",
+              "title":"Virus shedding patterns in nasopharyngeal and fecal specimens of COVID-19 patients 2 3",
+              "exact_answer":"22.0 days (IQR 15.5 to 23.5)"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the length of viral shedding from the nasopharynx?",
+          "kq_name":"Length of viral shedding from nasopharynx",
+          "answers":[
+            {
+              "id":"1fyag5x3",
+              "title":"Virus shedding patterns in nasopharyngeal and fecal specimens of COVID-19 patients 2 3",
+              "exact_answer":"10.0 days (IQR 8.0 to 17.0)"
+            },
+            {
+              "id":"r5a46n9a",
+              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
+              "exact_answer":"12 days (range, 3-38 )"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the length of viral shedding in urine?",
+          "kq_name":"Length of viral shedding in urine",
+          "answers":[
+            {
+              "id":"1fyag5x3",
+              "title":"Virus shedding patterns in nasopharyngeal and fecal specimens of COVID-19 patients 2 3",
+              "exact_answer":"all patient data were and urine samples were all negative, except for urine samples from two 53 severe cases at the latest available detection point (16 or 21 d.a.o)"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the length of viral shedding in blood?",
+          "kq_name":"Length of viral shedding in blood",
+          "answers":[
+            {
+              "id":"ac4aesoa",
+              "title":"Comparisons of nucleic acid conversion time of SARS-CoV-2 of different samples in ICU and non-ICU patients Conversion time of SARS-CoV-2 RT-PCR in ICU and non-ICU patients Letter to the Editor Comparisons of nucleic acid conversion time of SARS-CoV-2 of different samples in ICU and non-ICU patients",
+              "exact_answer":"10.17 ± 6.134 and 14.63 ± 5.878 days in non-ICU and ICU patients respectively"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the prevalence of viral shedding in blood?",
+          "kq_name":"Prevalence of viral shedding in blood",
+          "answers":[
+            {
+              "id":"r5a46n9a",
+              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
+              "exact_answer":"12 plasmas (5.7%) from 9 patients (14.3%) were positive"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Diagnostics",
+      "sub_categories":[
+        {
+          "nq_name":"What is the sensitivity and specificity of COVID-19 tests?",
+          "kq_name":"Sensitivity and specificity of COVID-19 tests",
+          "answers":[
+            {
+              "id":"9p7hqk1u",
+              "title":"Journal Pre-proof COVID-19 pneumonia: a review of typical CT findings and differential diagnosis COVID-19 pneumonia: a review of typical CT findings and differential diagnosis",
+              "exact_answer":"A large series based on 1014 patients reported a 97% sensitivity of chest CT for the diagnosis of COVID-19, while the mean time interval between initial negative and positive RT-PCR was approximately 5 days"
+            },
+            {
+              "id":"aa7slcnc",
+              "title":"Highly accurate and sensitive diagnostic detection of SARS-CoV-2 by digital PCR",
+              "exact_answer":"The overall sensitivity, specificity and diagnostic accuracy of RT-dPCR were 90%, 100% and 93 %, respectively"
+            },
+            {
+              "id":"na8odvj7",
+              "title":"Serological detection of 2019-nCoV respond to the epidemic: A useful complement to nucleic acid testing",
+              "exact_answer":"The areas under the ROC curves of IgM and IgG were 0.988 and 1.000, respectively."
+            },
+            {
+              "id":"py38bel4",
+              "title":"Clinical significance of IgM and IgG test for diagnosis of highly suspected COVID-19 infection",
+              "exact_answer":"The positive detection rate of combination of IgM and IgG for patients with COVID-19 negative and positive nucleic acid test was 72.73% and 87.50%."
+            },
+            {
+              "id":"5skk3nj4",
+              "title":"Imaging manifestations and diagnostic value of chest CT of coronavirus disease 2019 (COVID-19) in the Xiaogan area",
+              "exact_answer":"the overall accuracy rate of CT examination in the present study was 97.3%"
+            },
+            {
+              "id":"cv3qgno3",
+              "title":"Rapid Molecular Detection of SARS-CoV-2 (COVID-19) Virus RNA Using Colorimetric LAMP",
+              "exact_answer":"colorimetric LAMP assay showed 100% agreement with the RT-qPCR results across a range of C q values"
+            },
+            {
+              "id":"chln5r8w",
+              "title":"Diagnosis of the Coronavirus disease (COVID-19): rRT-PCR or CT?",
+              "exact_answer":"sensitivity was therefore 97.2%"
+            },
+            {
+              "id":"s7uqawbd",
+              "title":"Rapid colorimetric detection of COVID-19 coronavirus using a reverse tran- scriptional loop-mediated isothermal amplification (RT-LAMP) diagnostic plat- form: iLACO",
+              "exact_answer":"iLACO is very sensitive, and as low as 10 copies of ORF1ab gene were detected successfully"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Seasonality",
+      "sub_categories":[
+        {
+          "nq_name":"How does temperature affect the transmission of COVID-19?",
+          "kq_name":"Effects of temperature on the transmission of COVID-19",
+          "answers":[
+            {
+              "id":"r141na6j",
+              "title":"Eco-epidemiological assessment of the COVID-19 epidemic in China",
+              "exact_answer":"Adjusted incidence rate ratios suggested brighter, warmer and drier conditions were associated with lower incidence"
+            },
+            {
+              "id":"mdyojac2",
+              "title":"Climate effect on COVID-19 spread rate: an online surveillance tool",
+              "exact_answer":"a plausible negative correlation between warmer climate and COVID-19 spread rate"
+            },
+            {
+              "id":"vc2eheb6",
+              "title":"Causal empirical estimates suggest COVID-19 transmission rates are highly seasonal",
+              "exact_answer":"a 1 • C increase in local temperature reduces new COVID-19 cases per 1 million people by 13%, with a 95% confidence interval of [-21%, -4%]"
+            },
+            {
+              "id":"fcaeoyxd",
+              "title":"Climate affects global patterns of COVID-19 early outbreak dynamics",
+              "exact_answer":"Temperature and humidity strongly impact the variation of the growth rate of Covid-19 cases across the globe"
+            },
+            {
+              "id":"drzphrqj",
+              "title":"Preliminary evidence that higher temperatures are associated with lower incidence of COVID-19, for cases reported globally up to 29th February 2020",
+              "exact_answer":"higher average temperature was strongly associated with lower COVID-19 incidence for temperatures of 1°C and higher"
+            }
+          ]
+        },
+        {
+          "nq_name":"How does humidity affect the transmission of COVID-19?",
+          "kq_name":"Effects of humidity on the transmission of COVID-19",
+          "answers":[
+            {
+              "id":"6anr4xdw",
+              "title":"Effects of temperature variation and humidity on the death of COVID-19 in Wuhan, China",
+              "exact_answer":"both per 1 unit increase of temperature and absolute humidity were related to the decreased COVID-19 death counts"
+            },
+            {
+              "id":"qz2joxys",
+              "title":"Role of temperature and humidity in the modulation of the doubling time of COVID-19 cases",
+              "exact_answer":"doubling time correlates positively with temperature and inversely with humidity, suggesting that a decrease in the rate of progression of COVID-19 with the arrival of spring and summer in the north hemisphere"
+            },
+            {
+              "id":"fcaeoyxd",
+              "title":"Title: Climate Affects Global Patterns Of Covid-19 Early Outbreak Dynamics",
+              "exact_answer":"humidity strongly impact the variation of the growth rate of Covid-19 cases across the globe"
+            }
+          ]
+        },
+        {
+          "nq_name":"How does seasonality affect the transmission of COVID-19?",
+          "kq_name":"Effects of seasonality on the transmission of COVID-19",
+          "answers":[
+            {
+              "id":"f4hj35dr",
+              "title":"Projecting the transmission dynamics of SARS-CoV-2 through the post-pandemic period",
+              "exact_answer":"Winter/spring establishments favored longer-lasting outbreaks with shorter peaks (Fig 3A) , while autumn/winter establishments led to more acute outbreaks (Fig 3B) "
+            },
+            {
+              "id":"zxx7tikz",
+              "title":"Susceptible supply limits the role of climate in the COVID-19 pandemic",
+              "exact_answer":"summer temperatures will not substantially limit pandemic growth"
+            },
+            {
+              "id":"f3qeoyvf",
+              "title":"Title: Modeling the Corona Virus Outbreak in IRAN",
+              "exact_answer":"available data is too small to identify seasonal patterns and make predictable variation in value"
+            },
+            {
+              "id":"okvu49y3",
+              "title":"Application of the ARIMA model on the COVID- 2019 epidemic dataset",
+              "exact_answer":"prevalence and incidence of COVID-2019 are not influenced by the seasonality"
+            },
+            {
+              "id":"3p2dl8yf",
+              "title":"Potential impact of seasonal forcing on a SARS-CoV-2 pandemic",
+              "exact_answer":"seasonal variation might slow down a pandemic"
+            },
+            {
+              "id":"x4qdiln9",
+              "title":"A spatial model of CoVID-19 transmission in England and Wales: early spread and peak timing",
+              "exact_answer":"Seasonal changes in transmission rate could shift the timing of the peak into winter months"
+            },
+            {
+              "id":"f4hj35dr",
+              "title":"Projecting the transmission dynamics of SARS-CoV-2 through the post-pandemic period",
+              "exact_answer":"HCoV-OC43 and HCoV-HKU1 cause annual wintertime outbreaks of respiratory illness in temperate regions (9, 10), suggesting that wintertime climate and host behaviors may facilitate transmission, as is true for influenza"
+            },
+            {
+              "id":"nzat41wu",
+              "title":"Social distancing strategies for curbing the COVID-19 epidemic",
+              "exact_answer":"If SARS-Cov-2 transmission is similarly subject to seasonal forcing, summer outbreaks would naturally have lower peaks than winter outbreaks"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Physical science",
+      "sub_categories":[
+        {
+          "nq_name":"What purity of ethanol inactivates COVID-19?",
+          "kq_name":"Purity of ethanol to inactivate COVID-19",
+          "answers":[
+            {
+              "id":"c1n994j6",
+              "title":"Efficient inactivation of SARS-CoV-2 by WHO-recommended hand rub formulations and alcohols",
+              "exact_answer":"WHO 21 formulation I, based on 85 % ethanol, efficiently inactivated the virus with reduction 22 factors"
+            },
+            {
+              "id":"c1n994j6",
+              "title":"Efficient inactivation of SARS-CoV-2 by WHO-recommended hand rub formulations and alcohols",
+              "exact_answer":"Both alcohols, ethanol (Fig. 9 2A) and 2-propanol ( Fig. 2B) were able to reduce viral titers in 30 s exposure"
+            },
+            {
+              "id":"c1n994j6",
+              "title":"Efficient inactivation of SARS-CoV-2 by WHO-recommended hand rub formulations and alcohols",
+              "exact_answer":"minimal concentration of 30 % ethanol or 2-propanol is sufficient for viral 12 inactivation"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the temperature used for inactivating COVID-19?",
+          "kq_name":"Temperature used for inactivating COVID-19",
+          "answers":[
+            {
+              "id":"dm1wkpnv",
+              "title":"Can N95 respirators be reused after disinfection? And for how many times?",
+              "exact_answer":"heating (dry or in the presence humidity) <100 °C can preserve the filtration characteristics of a pristine N95 respirator"
+            },
+            {
+              "id":"d48u5w0h",
+              "title":"Rapid evidence summary on SARS-CoV-2 survivorship and disinfection, and a reusable PPE protocol using a double-hit process",
+              "exact_answer":"60°C for 90 minutes"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the UVGI intensity used for inactivating COVID-19?",
+          "kq_name":"UVGI intensity used for inactivating COVID-19",
+          "answers":[
+            {
+              "id":"dm1wkpnv",
+              "title":"Can N95 respirators be reused after disinfection? And for how many times?",
+              "exact_answer":"(254 nm, 17 mW/cm 2 )"
+            },
+            {
+              "id":"d48u5w0h",
+              "title":"Rapid evidence summary on SARS-CoV-2 survivorship and disinfection, and a reusable PPE protocol using a double-hit process",
+              "exact_answer":"at least 1,000 mJ/cm 2"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Metaresearch",
+      "sub_categories":[
+        {
+          "kq_name":"Sample size used in COVID-19 studies",
+          "nq_name":"How large is the sample size used in COVID-19 studies?",
+          "answers":[
+            {
+              "id":"wuclekt6",
+              "title":"Longitudinal analysis of laboratory findings during the process of recovery for patients with COVID-19",
+              "exact_answer":"59 cases"
+            },
+            {
+              "id":"ragcpbl6",
+              "title":"Evolving epidemiology of novel coronavirus diseases 2019 and possible interruption of local transmission outside Hubei Province in China: a descriptive and modeling study",
+              "exact_answer":"49 cases"
+            },
+            {
+              "id":"56zhxd6e",
+              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries Summary Background",
+              "exact_answer":"587 cases"
+            },
+            {
+              "id":"djq0lvr2",
+              "title":"Is a 14-day quarantine period optimal for effectively controlling coronavirus disease 2019 (COVID-19)?",
+              "exact_answer":"2015 cases"
+            },
+            {
+              "id":"0hnh4n9e",
+              "title":"Investigation of three clusters of COVID-19 in Singapore: implications for surveillance and response measures",
+              "exact_answer":"36 individuals"
+            },
+            {
+              "id":"it4ka7v0",
+              "title":"Estimation of incubation period distribution of COVID-19 using disease onset forward time: a novel cross-sectional and forward follow-up study",
+              "exact_answer":"sample size of 1211"
+            },
+            {
+              "id":"tne83uu0",
+              "title":"Epidemiologic Characteristics of COVID-19 in Guizhou, China Affiliations: Guizhou Center for Disease Control and Prevention Abstract",
+              "exact_answer":"162 cases"
+            },
+            {
+              "id":"glq0lckz",
+              "title":"Clinical Characteristics of SARS-CoV-2 Pneumonia Compared to Controls in Chinese Han Population",
+              "exact_answer":"69 hospitalized patients"
+            },
+            {
+              "id":"ti9b1etu",
+              "title":"Transmission and clinical characteristics of coronavirus disease 2019 in 104 outside-Wuhan patients, China",
+              "exact_answer":"104 cases"
+            },
+            {
+              "id":"1mxjklgx",
+              "title":"Epidemiological characteristics of 1212 COVID-19 patients in Henan, China. medRxiv",
+              "exact_answer":"1212 patients"
+            },
+            {
+              "id":"u8goc7io",
+              "title":"Title: The incubation period of 2019-nCoV infections among travellers from Wuhan, China",
+              "exact_answer":"88 confirmed cases"
+            },
+            {
+              "id":"r5a46n9a",
+              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
+              "exact_answer":"67 patients"
+            },
+            {
+              "id":"giabjjnz",
+              "title":"Children are unlikely to have been the primary source of household SARS-CoV-2 infections",
+              "exact_answer":"40 articles"
+            },
+            {
+              "id":"bmsmegbs",
+              "title":"A considerable proportion of individuals with asymptomatic SARS- CoV-2 infection in Tibetan population",
+              "exact_answer":"83 cases"
+            },
+            {
+              "id":"56zhxd6e",
+              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries Summary Background",
+              "exact_answer":"329 cases"
+            },
+            {
+              "id":"atnz63pk",
+              "title":"Estimating the Asymptomatic Proportion of 2019 Novel Coronavirus onboard the Princess Cruises Ship, 2020",
+              "exact_answer":"634 cases,"
+            },
+            {
+              "id":"k3f7ohzg",
+              "title":"Characteristics of COVID-19 infection in Beijing",
+              "exact_answer":"262 patients"
+            },
+            {
+              "id":"dmrtsxik",
+              "title":"Articles Clinical and epidemiological features of 36 children with coronavirus disease 2019 (COVID-19) in Zhejiang, China: an observational cohort study",
+              "exact_answer":"36 children"
+            },
+            {
+              "id":"jvhrp51s",
+              "title":"Title: The clinical and epidemiological features and hints of 82 confirmed COVID-19 pediatric cases aged 0-16 in Wuhan, China",
+              "exact_answer":"82 confirmed cases"
+            },
+            {
+              "id":"ouca1bol",
+              "title":"Evaluation of SARS-CoV-2 RNA shedding in clinical specimens and clinical characteristics of 10 patients with COVID-19 in Macau",
+              "exact_answer":"10 cases"
+            },
+            {
+              "id":"ac4aesoa",
+              "title":"Comparisons of nucleic acid conversion time of SARS-CoV-2 of different samples in ICU and non-ICU patients Conversion time of SARS-CoV-2 RT-PCR in ICU and non-ICU patients Letter to the Editor Comparisons of nucleic acid conversion time of SARS-CoV-2 of different samples in ICU and non-ICU patients",
+              "exact_answer":"thirty-two patients"
+            },
+            {
+              "id":"4aps0kvp",
+              "title":"Molecular and serological investigation of 2019-nCoV infected patients: implication of multiple shedding routes",
+              "exact_answer":"16 people"
+            },
+            {
+              "id":"3q2bj7cr",
+              "title":"Prolonged viral shedding in feces of pediatric patients with coronavirus disease 2019",
+              "exact_answer":"three paediatric cases"
+            },
+            {
+              "id":"nrdiqees",
+              "title":"History of coronary heart disease increases the mortality rate of COVID-19 patients: a nested case-control study",
+              "exact_answer":"275 publicly reported confirmed cases"
+            },
+            {
+              "id":"as6tbfrh",
+              "title":"Title: Clinical characteristics associated with COVID-19 severity in California",
+              "exact_answer":"54 patients"
+            },
+            {
+              "id":"5ciaonf0",
+              "title":"Title: Epidemiological and clinical features of 291 cases with coronavirus disease 2019 in areas adjacent to Hubei, China: a double-center observational study",
+              "exact_answer":"291 patients"
+            },
+            {
+              "id":"niw61l9r",
+              "title":"Neurological Manifestations of Hospitalized Patients with COVID-19 in Wuhan, China: a retrospective case series study",
+              "exact_answer":"214 patients"
+            },
+            {
+              "id":"pd70i3d8",
+              "title":"Neutrophil-to-Lymphocyte Ratio Predicts Severe Illness Patients with 2019 Novel Coronavirus in the Early Stage",
+              "exact_answer":"61 patients"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Hypertension",
+      "sub_categories":[
+        {
+          "nq_name":"What is the RR for severe infection in COVID-19 patients with hypertension?",
+          "kq_name":"RR for severe infection in COVID-19 patients with hypertension",
+          "answers":[
+            {
+              "id":"4mnmaky6",
+              "title":"The relationship of COVID-19 severity with cardiovascular disease and its traditional risk factors: A systematic review and meta-analysis",
+              "exact_answer":"hypertension (10 studies; 2.74 [2.12-3.54])"
+            },
+            {
+              "id":"yx8b2moc",
+              "title":"Incidence, clinical characteristics and prognostic factor of patients with COVID-19: a systematic review and meta-analysis Running title: Predictors of clinical prognosis of COVID-19",
+              "exact_answer":"hypertension (RR = 4.48; 95% CI"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the HR for severe infection in COVID-19 patients with hypertension?",
+          "kq_name":"HR for severe infection in COVID-19 patients with hypertension",
+          "answers":[
+            {
+              "id":"7s9ot4vq",
+              "title":"Comorbidity and its impact on 1,590 patients with COVID-19 in China: A Nationwide Analysis",
+              "exact_answer":"hypertension (HR 1.58, 95%CI 1.07-2.32)"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the OR for severe infection in COVID-19 patients with hypertension?",
+          "kq_name":"OR for severe infection in COVID-19 patients with hypertension",
+          "answers":[
+            {
+              "id":"v6frcc5r",
+              "title":"Effects of hypertension, diabetes and coronary heart disease on COVID-19 diseases severity: a systematic review and meta-analysis",
+              "exact_answer":"There were significant correlations between COVID-19 severity and hypertension [OR=2.3 [95% CI (1.76, 3.00), P<0.01]"
+            },
+            {
+              "id":"vof63qat",
+              "title":"Systematic review and meta-analysis of predictive symptoms and comorbidities for severe COVID-19 infection",
+              "exact_answer":"hypertension (1.97, 95% CI 1.40 -2.77)"
+            },
+            {
+              "id":"o56j4qio",
+              "title":"Prevalence of comorbidities in the novel Wuhan coronavirus (COVID-19) infection: a systematic review and meta-analysis Prevalence of comorbidities in the Novel Wuhan Coronavirus (COVID-19) infection: a systematic review and meta-analysis",
+              "exact_answer":"the pooled odds ratio of hypertension, respiratory system disease, cardiovascular disease in severe patients were (OR 2.36, 95% CI: 1.46-3.83) ,(OR 2.46, 95% CI: 1.76-3.44) and (OR 3.42, 95% CI: 1.88-6.22)respectively"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the mortality rate for COVID-19 patients with hypertension?",
+          "kq_name":"Mortality rate for COVID-19 patients with hypertension",
+          "answers":[
+            {
+              "id":"r0r1kvkp",
+              "title":"Development and external validation of a prognostic multivariable model on admission for hospitalized patients with COVID-19",
+              "exact_answer":"Patients with hypertension, showed a significantly higher mortality rate (p<0·001)."
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name":"Diabetes",
+      "sub_categories":[
+        {
+          "nq_name":"What is the OR for severe infection in COVID-19 patients with diabetes?",
+          "kq_name":"OR for severe infection in COVID-19 patients with diabetes",
+          "answers":[
+            {
+              "id":"oqo8pa1p",
+              "title":"Risk factors for severe corona virus disease 2019 (COVID-19) patients : a systematic review and meta analysis",
+              "exact_answer":"OR = 3.04 [2.01, 4.60]"
+            },
+            {
+              "id":"v6frcc5r",
+              "title":"Effects of hypertension, diabetes and coronary heart disease on COVID-19 diseases severity: a systematic review and meta-analysis",
+              "exact_answer":"[OR=2.67, 95% CI (1.91, 3.74), P<0.01]"
+            },
+            {
+              "id":"yx8b2moc",
+              "title":"Incidence, clinical characteristics and prognostic factor of patients with COVID-19: a systematic review and meta-analysis Running title: Predictors of clinical prognosis of COVID-19",
+              "exact_answer":"2.49; 95% CI, ; n = 10; I 2 = 44%"
+            },
+            {
+              "id":"o56j4qio",
+              "title":"Journal Pre-proof Prevalence of comorbidities in the novel Wuhan coronavirus (COVID-19) infection: a systematic review and meta-analysis Prevalence of comorbidities in the Novel Wuhan Coronavirus (COVID-19) infection: a systematic review and meta-analysis",
+              "exact_answer":"OR 2.07, 95% CI: 0.89-4.82"
+            },
+            {
+              "id":"o56j4qio",
+              "title":"Journal Pre-proof Prevalence of comorbidities in the novel Wuhan coronavirus (COVID-19) infection: a systematic review and meta-analysis Prevalence of comorbidities in the Novel Wuhan Coronavirus (COVID-19) infection: a systematic review and meta-analysis",
+              "exact_answer":"OR 2.07, 95% CI: 0.89-4.82"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the RR for severe infection in COVID-19 patients with diabetes?",
+          "kq_name":"RR for severe infection in COVID-19 patients with diabetes",
+          "answers":[
+            {
+              "id":"4mnmaky6",
+              "title":"The relationship of COVID-19 severity with cardiovascular disease and its traditional risk factors: A systematic review and meta-analysis",
+              "exact_answer":"relative risk estimate of 2.81 (2.01-3.93)"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the HR for death in COVID-19 patients with diabetes?",
+          "kq_name":"HR for death in COVID-19 patients with diabetes",
+          "answers":[
+            {
+              "id":"nrdiqees",
+              "title":"History of coronary heart disease increases the mortality rate of COVID-19 patients: a nested case-control study",
+              "exact_answer":"HR=1.1, p=0.61"
+            }
+          ]
+        },
+        {
+          "nq_name":"What is the AHR for death in COVID-19 patients with diabetes?",
+          "kq_name":"AHR for death in COVID-19 patients with diabetes",
+          "answers":[
+            {
+              "id":"skknfc6h",
+              "title":"Comorbid Diabetes Mellitus was Associated with Poorer Prognosis in Patients with COVID-19: A Retrospective Cohort Study",
+              "exact_answer":"[aHR]=3.64; 95% confidence interval [CI]: 1.09, 12.21"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/data/kaggle-lit-review.json b/data/kaggle-lit-review.json
deleted file mode 100644
index a1fafc5c..00000000
--- a/data/kaggle-lit-review.json
+++ /dev/null
@@ -1,459 +0,0 @@
-{
-  "categories":[
-    {
-      "name":"Incubation period",
-      "sub_categories":[
-        {
-          "name":"What is the incubation period of the virus?",
-          "answers":[
-            {
-              "id":"wuclekt6",
-              "title":"Longitudinal analysis of laboratory findings during the process of recovery for patients with COVID-19",
-              "exact_answer":"4 days (IQR, 2-7)"
-            },
-            {
-              "id":"e3t1f0rt",
-              "title":"Epidemiological Characteristics of COVID-19; a Systemic Review and Meta-Analysis 1",
-              "exact_answer":"5.84 (99% CI: 4.83, 6.85) days"
-            },
-            {
-              "id":"ragcpbl6",
-              "title":"Evolving epidemiology of novel coronavirus diseases 2019 and possible interruption of local transmission outside Hubei Province in China: a descriptive and modeling study",
-              "exact_answer":"5·2 days"
-            },
-            {
-              "id":"n0uwy77g",
-              "title":"Clinical characteristics and durations of hospitalized patients with COVID-19 in Beijing: a retrospective cohort study",
-              "exact_answer":"4 (3-7) days"
-            },
-            {
-              "id":"<missing>",
-              "title":"Early Transmission Dynamics in Wuhan, China, of Novel Coronavirus–Infected Pneumonia",
-              "exact_answer":"5.2 days (95% confidence interval [CI], 4.1 to 7.0)"
-            },
-            {
-              "id":"x23ej29m",
-              "title":"Clinical features and obstetric and neonatal outcomes of pregnant patients with COVID-19 in Wuhan, China: a retrospective, single-centre, descriptive study",
-              "exact_answer":"5 days (range 2-9 days)"
-            },
-            {
-              "id":"56zhxd6e",
-              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries Summary Background",
-              "exact_answer":"7.44 days"
-            },
-            {
-              "id":"zph6r4il",
-              "title":"Epidemiological, clinical and virological characteristics of 74 cases of coronavirus-infected disease 2019 (COVID-19) with gastrointestinal symptoms",
-              "exact_answer":"4 days (IQR 3-7 days)"
-            },
-            {
-              "id":"djq0lvr2",
-              "title":"Is a 14-day quarantine period optimal for effectively controlling coronavirus disease 2019 (COVID-19)?",
-              "exact_answer":"The median incubation period of both male and female adults was similar (7-day) but significantly shorter than that (9-day) of child cases"
-            },
-            {
-              "id":"n0vmb946",
-              "title":"The difference in the incubation period of 2019 novel coronavirus (SARS-CoV-2) infection between travelers to Hubei and non-travelers: The need of a longer quarantine period",
-              "exact_answer":"1.8 and 7.2 days"
-            },
-            {
-              "id":"awgyxn3t",
-              "title":"Clinical Characteristics of 34 Children with Coronavirus Disease-2019 in the West of China: a Multiple-center Case Series",
-              "exact_answer":"10.50 (7.75 -25.25) days"
-            },
-            {
-              "id":"0hnh4n9e",
-              "title":"Investigation of three clusters of COVID-19 in Singapore: implications for surveillance and response measures",
-              "exact_answer":"4 days (IQR 3-6)"
-            },
-            {
-              "id":"it4ka7v0",
-              "title":"Estimation of incubation period distribution of COVID-19 using disease onset forward time: a novel cross-sectional and forward follow-up study",
-              "exact_answer":"median of incubation period is 8·13 days (95% confidence interval [CI]: 7·37-8·91), the mean is 8·62 days (95% CI: 8·02-9·28)"
-            },
-            {
-              "id":"glq0lckz",
-              "title":"Clinical Characteristics of SARS-CoV-2 Pneumonia Compared to Controls in Chinese Han Population",
-              "exact_answer":"4 days (IQR, 2 to 7)"
-            },
-            {
-              "id":"8anqfkmo",
-              "title":"The Incubation Period of Coronavirus Disease 2019 (COVID-19) From Publicly Reported Confirmed Cases: Estimation and Application",
-              "exact_answer":"5.1 days (CI, 4.5 to 5.8 days)"
-            },
-            {
-              "id":"v3gww4iv",
-              "title":"Transmission of corona virus disease 2019 during the incubation period may lead to a quarantine loophole",
-              "exact_answer":"4.9 days (95% confidence interval [CI], 4.4 to 5.4) days"
-            },
-            {
-              "id":"66ulqu11",
-              "title":"Transmission interval estimates suggest pre-symptomatic spread of COVID-19",
-              "exact_answer":"7.1 (6.13, 8.25) days for Singapore and 9 (7.92, 10.2) days for Tianjin"
-            },
-            {
-              "id":"ti9b1etu",
-              "title":"Transmission and clinical characteristics of coronavirus disease 2019 in 104 outside-Wuhan patients, China",
-              "exact_answer":"6 days, ranged from 1 to 32 days"
-            },
-            {
-              "id":"k3f7ohzg",
-              "title":"Characteristics of COVID-19 infection in Beijing",
-              "exact_answer":"6.7 days"
-            },
-            {
-              "id":"jxtch47t",
-              "title":"Epidemiologic and Clinical Characteristics of 91 Hospitalized Patients with COVID-19 in Zhejiang, China: A retrospective, multi-centre case series",
-              "exact_answer":"6 (IQR, 3-8) days"
-            },
-            {
-              "id":"dbzrd23n",
-              "title":"Title: A descriptive study of the impact of diseases control and prevention on the epidemics 1 dynamics and clinical features of SARS-CoV-2 outbreak in Shanghai, lessons learned for",
-              "exact_answer":"6.4 days (95% 175 CI 5.3 to 7.6)"
-            },
-            {
-              "id":"j3avpu1y",
-              "title":"A familial cluster of pneumonia associated with the 2019 novel coronavirus indicating person-to-person transmission: a study of a family cluster",
-              "exact_answer":"3-6 days"
-            },
-            {
-              "id":"1mxjklgx",
-              "title":"Epidemiological characteristics of 1212 COVID-19 patients in Henan, China. medRxiv",
-              "exact_answer":"average, mode and median incubation periods are 7.4, 4 and 7 days"
-            },
-            {
-              "id":"ykofrn9i",
-              "title":"Incubation Period and Other Epidemiological Characteristics of 2019 Novel Coronavirus Infections with Right Truncation: A Statistical Analysis of Publicly Available Case Data",
-              "exact_answer":"5.6 days (95% CI: 4.4, 7.4)"
-            },
-            {
-              "id":"u8goc7io",
-              "title":"Title: The incubation period of 2019-nCoV infections among travellers from Wuhan, China",
-              "exact_answer":"6.4 (5.6 -7.7, 95% CI) days"
-            }
-          ]
-        },
-        {
-          "name":"Length of viral shedding after illness onset",
-          "answers":[
-            {
-              "id":"bg0cw5s6",
-              "title":"Factors associated with prolonged viral shedding and impact of Lopinavir/Ritonavir treatment in patients with SARS-CoV-2 infection",
-              "exact_answer":"23 days (IQR, 18-32 days)"
-            },
-            {
-              "id":"r5a46n9a",
-              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
-              "exact_answer":"12 (3-38), 19 (5-37), and 18 (7-26) days in nasopharyngeal swabs, sputum and stools, respectively"
-            },
-            {
-              "id":"k36rymkv",
-              "title":"Clinical course and risk factors for mortality of adult inpatients with COVID-19 in Wuhan, China: a retrospective cohort study",
-              "exact_answer":"20·0 days (IQR 17·0–24·0)"
-            }
-          ]
-        },
-        {
-          "name":"Incubation period across different age groups",
-          "answers":[
-            {
-              "id":"giabjjnz",
-              "title":"Children are unlikely to have been the primary source of household SARS-CoV-2 infections",
-              "exact_answer":"7.74 d ± 3.22"
-            },
-            {
-              "id":"djq0lvr2",
-              "title":"Is a 14-day quarantine period optimal for effectively controlling coronavirus disease 2019 (COVID-19)?",
-              "exact_answer":"median incubation period of both male and female adults was similar (7-day) but significantly shorter than that (9-day) of child cases"
-            },
-            {
-              "id":"awgyxn3t",
-              "title":"Clinical Characteristics of 34 Children with Coronavirus Disease-2019 in the West of China: a Multiple-center Case Series",
-              "exact_answer":"10.50 (7.75 -25.25) days"
-            }
-          ]
-        }
-      ]
-    },
-    {
-      "name":"Asymptomatic shedding",
-      "sub_categories":[
-        {
-          "name":"Proportion of patients who were asymptomatic",
-          "answers":[
-            {
-              "id":"bmsmegbs",
-              "title":"A considerable proportion of individuals with asymptomatic SARS-CoV-2 infection in Tibetan population",
-              "exact_answer":"21.7%"
-            },
-            {
-              "id":"jjgfgqwg",
-              "title":"Modes of contact and risk of transmission in COVID-19 among close contacts",
-              "exact_answer":"6.2%"
-            },
-            {
-              "id":"7w1bhaz6",
-              "title":"High incidence of asymptomatic SARS-CoV-2 infection, Chongqing, China",
-              "exact_answer":"19%"
-            },
-            {
-              "id":"xsqgrd5l",
-              "title":"High transmissibility of COVID-19 near symptom onset",
-              "exact_answer":"there were 32 laboratory-confirmed COVID-19 patients, including five household/family clusters and four asymptomatic patients"
-            },
-            {
-              "id":"6su2x8mk",
-              "title":"Non-severe vs severe symptomatic COVID-19: 104 cases from the outbreak on the cruise ship “Diamond Princess” in Japan",
-              "exact_answer":"76 and 28 patients were classified as non-severe (asymptomatic, mild)"
-            },
-            {
-              "id":"rjm1dqk7",
-              "title":"Epidemiological characteristics of 2019 novel coronavirus family clustering in Zhejiang Province",
-              "exact_answer":"54 asymptomatic infected cases"
-            },
-            {
-              "id":"56zhxd6e",
-              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries",
-              "exact_answer":"49 (14.89%) were asymptomatic"
-            },
-            {
-              "id":"atnz63pk",
-              "title":"Estimating the Asymptomatic Proportion of 2019 Novel Coronavirus onboard the Princess Cruises Ship, 2020",
-              "exact_answer":"17.9%"
-            },
-            {
-              "id":"ofoqk100",
-              "title":"Clinical Characteristics of 24 Asymptomatic Infections with COVID-19 Screened among Close Contacts in Nanjing, China",
-              "exact_answer":"The remaining 7 (29.2%) cases showed normal CT image and had no symptoms during hospitalization."
-            },
-            {
-              "id":"k3f7ohzg",
-              "title":"Characteristics of COVID-19 infection in Beijing",
-              "exact_answer":"13 (5.0%) asymptomatic cases"
-            },
-            {
-              "id":"f3h74j1n",
-              "title":"Estimation of the asymptomatic ratio of novel coronavirus infections (COVID-19)",
-              "exact_answer":"the asymptomatic ratio at 41.6%"
-            }
-          ]
-        },
-        {
-          "name":"Proportion of pediatric patients who were asymptomatic",
-          "answers":[
-            {
-              "id":"xsgxd5sy",
-              "title":"Epidemiological and Clinical Characteristics of Children with Coronavirus Disease 2019",
-              "exact_answer":"20 (27.03%) cases"
-            },
-            {
-              "id":"dmrtsxik",
-              "title":"Articles Clinical and epidemiological features of 36 children with coronavirus disease 2019 (COVID-19) in Zhejiang, China: an observational cohort study",
-              "exact_answer":"ten (28%) patients"
-            },
-            {
-              "id":"7w1bhaz6",
-              "title":"High incidence of asymptomatic SARS-CoV-2 infection, Chongqing, China",
-              "exact_answer":"(28.6%) in children group under 14, next in elder group over 70 (27.3%)"
-            },
-            {
-              "id":"jvhrp51s",
-              "title":"Title: The clinical and epidemiological features and hints of 82 confirmed COVID-19 pediatric cases aged 0-16 in Wuhan, China",
-              "exact_answer":"8 (9.76%)"
-            },
-            {
-              "id":"j58f1lwa",
-              "title":"Preliminary epidemiological analysis on children and adolescents with novel coronavirus disease 2019 outside Hubei Province, China: an observational study utilizing crowdsourced data",
-              "exact_answer":"2 (8.0%) cases"
-            },
-            {
-              "id":"mar8zt2t",
-              "title":"The different clinical characteristics of corona virus disease cases between children and their families in China -the character of children with COVID-19",
-              "exact_answer":"six (66.7%) children"
-            }
-          ]
-        },
-        {
-          "name":"Asymptomatic transmission during incubation",
-          "answers":[
-            {
-              "id":"eflwztji",
-              "title":"Temporal dynamics in viral shedding and transmissibility of COVID-19",
-              "exact_answer":"44% of transmission prior to symptom onset"
-            },
-            {
-              "id":"v3gww4iv",
-              "title":"Transmission of corona virus disease 2019 during the incubation period may lead to a quarantine loophole",
-              "exact_answer":"(73.0%) were infected before the symptom onset of the first-generation cases"
-            },
-            {
-              "id":"56zhxd6e",
-              "title":"Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries Summary Background",
-              "exact_answer":"In 102 (43.78%) infector-infectee pairs, transmission occurred before infectors' symptom onsets"
-            },
-            {
-              "id":"v3gww4iv",
-              "title":"Transmission of corona virus disease 2019 during the incubation period may lead to a quarantine loophole",
-              "exact_answer":"(73.0%) were infected before the symptom onset of the first-generation cases"
-            },
-            {
-              "id":"st5vs6gq",
-              "title":"Title: The serial interval of COVID-19 from publicly reported confirmed cases Running Head: The serial interval of COVID-19",
-              "exact_answer":"12.6% of reports indicating pre-symptomatic transmission"
-            }
-          ]
-        }
-      ]
-    },
-    {
-      "name":"Persistence of sources",
-      "sub_categories":[
-        {
-          "name":"Length of viral shedding in stool",
-          "answers":[
-            {
-              "id":"k86ljbxu",
-              "title":"Do children need a longer time to shed SARS- CoV-2 in stool than adults?",
-              "exact_answer":"100% positive on the third week after onset and 30% positive 29 days later"
-            },
-            {
-              "id":"ouca1bol",
-              "title":"Evaluation of SARS-CoV-2 RNA shedding in clinical specimens and clinical characteristics of 10 patients with COVID-19 in Macau",
-              "exact_answer":"detected in feces till 14 days after the onset of symptoms"
-            },
-            {
-              "id":"r5a46n9a",
-              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
-              "exact_answer":"18 days (range, 7-26)"
-            },
-            {
-              "id":"1fyag5x3",
-              "title":"Virus shedding patterns in nasopharyngeal and fecal specimens of COVID-19 patients 2 3",
-              "exact_answer":"22.0 days (IQR 15.5 to 23.5)"
-            }
-          ]
-        },
-        {
-          "name":"Length of viral shedding from nasopharynx",
-          "answers":[
-            {
-              "id":"1fyag5x3",
-              "title":"Virus shedding patterns in nasopharyngeal and fecal specimens of COVID-19 patients 2 3",
-              "exact_answer":"10.0 days (IQR 8.0 to 17.0)"
-            },
-            {
-              "id":"r5a46n9a",
-              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
-              "exact_answer":"12 days (range, 3-38 )"
-            }
-          ]
-        },
-        {
-          "name":"Length of viral shedding in urine",
-          "answers":[
-            {
-              "id":"1fyag5x3",
-              "title":"Virus shedding patterns in nasopharyngeal and fecal specimens of COVID-19 patients 2 3",
-              "exact_answer":"all patient data were and urine samples were all negative, except for urine samples from two 53 severe cases at the latest available detection point (16 or 21 d.a.o)"
-            }
-          ]
-        },
-        {
-          "name":"Length of viral shedding in blood",
-          "answers":[
-            {
-              "id":"ac4aesoa",
-              "title":"Comparisons of nucleic acid conversion time of SARS-CoV-2 of different samples in ICU and non-ICU patients Conversion time of SARS-CoV-2 RT-PCR in ICU and non-ICU patients Letter to the Editor Comparisons of nucleic acid conversion time of SARS-CoV-2 of different samples in ICU and non-ICU patients",
-              "exact_answer":"10.17 ± 6.134 and 14.63 ± 5.878 days in non-ICU and ICU patients respectively"
-            }
-          ]
-        },
-        {
-          "name":"Prevalence of viral shedding in blood",
-          "answers":[
-            {
-              "id":"r5a46n9a",
-              "title":"Viral Kinetics and Antibody Responses in Patients with COVID-19",
-              "exact_answer":"12 plasmas (5.7%) from 9 patients (14.3%) were positive"
-            }
-          ]
-        }
-      ]
-    },
-    {
-      "name":"Diagnostics",
-      "sub_categories":[
-        {
-          "name":"Sensitivity and specificity of COVID-19 tests",
-          "answers":[
-            {
-              "id":"9p7hqk1u",
-              "title":"Journal Pre-proof COVID-19 pneumonia: a review of typical CT findings and differential diagnosis COVID-19 pneumonia: a review of typical CT findings and differential diagnosis",
-              "exact_answer":"A large series based on 1014 patients reported a 97% sensitivity of chest CT for the diagnosis of COVID-19, while the mean time interval between initial negative and positive RT-PCR was approximately 5 days"
-            },
-            {
-              "id":"aa7slcnc",
-              "title":"Highly accurate and sensitive diagnostic detection of SARS-CoV-2 by digital PCR",
-              "exact_answer":"The overall sensitivity, specificity and diagnostic accuracy of RT-dPCR were 90%, 100% and 93 %, respectively"
-            },
-            {
-              "id":"na8odvj7",
-              "title":"Serological detection of 2019-nCoV respond to the epidemic: A useful complement to nucleic acid testing",
-              "exact_answer":"The areas under the ROC curves of IgM and IgG were 0.988 and 1.000, respectively."
-            },
-            {
-              "id":"<missing>",
-              "title":"Molecular immune pathogenesis and diagnosis of COVID-19",
-              "exact_answer":"RT-qPCR can only achieve a sensitivity of 50% to 79%, depending on the protocol used the sample type and number of clinical specimens collected"
-            },
-            {
-              "id":"<missing>",
-              "title":"Molecular immune pathogenesis and diagnosis of COVID-19",
-              "exact_answer":"The sensitivity of SARS-CoV N-based IgG ELISA (94.7%) is significantly higher than that of SARS-CoV S-based IgG ELISA (58.9%)"
-            },
-            {
-              "id":"py38bel4",
-              "title":"Clinical significance of IgM and IgG test for diagnosis of highly suspected COVID-19 infection",
-              "exact_answer":"The positive detection rate of combination of IgM and IgG for patients with COVID-19 negative and positive nucleic acid test was 72.73% and 87.50%."
-            },
-            {
-              "id":"5skk3nj4",
-              "title":"Imaging manifestations and diagnostic value of chest CT of coronavirus disease 2019 (COVID-19) in the Xiaogan area",
-              "exact_answer":"the overall accuracy rate of CT examination in the present study was 97.3%"
-            },
-            {
-              "id":"cv3qgno3",
-              "title":"Rapid Molecular Detection of SARS-CoV-2 (COVID-19) Virus RNA Using Colorimetric LAMP",
-              "exact_answer":"colorimetric LAMP assay showed 100% agreement with the RT-qPCR results across a range of C q values"
-            },
-            {
-              "id":"<missing>",
-              "title":"Differential diagnosis of illness in patients under investigation for the novel coronavirus (SARS-CoV-2), Italy, February 2020",
-              "exact_answer":"Broad screening for respiratory pathogens revealed a high rate of influenza virus infections, accounting for 28.5% of all suspected cases of SARS-CoV-2 infection"
-            },
-            {
-              "id":"8gncbgot",
-              "title":"Potential Rapid Diagnostics, Vaccine and Therapeutics for 2019 Novel Coronavirus (2019-nCoV): A Systematic Review",
-              "exact_answer":"E gene and RdRp gene assays produced the best result (5.2 and 3.8 copies per reaction at 95% detection probability, respectively)"
-            },
-            {
-              "id":"chln5r8w",
-              "title":"Diagnosis of the Coronavirus disease (COVID-19): rRT-PCR or CT?",
-              "exact_answer":"Sensitivity of CT examinations was 97.2% at presentation, whereas first round rRT-PCR sensitivity was 84.6%"
-            },
-            {
-              "id":"s7uqawbd",
-              "title":"Rapid colorimetric detection of COVID-19 coronavirus using a reverse tran- scriptional loop-mediated isothermal amplification (RT-LAMP) diagnostic plat- form: iLACO",
-              "exact_answer":"iLACO is very sensitive, and as low as 10 copies of ORF1ab gene were detected successfully"
-            },
-            {
-              "id":"<missing>",
-              "title":"Detection of 2019 novel coronavirus (2019-nCoV) by real-time RT-PCR",
-              "exact_answer":"All assays were highly sensitive, with best results obtained for the E gene and RdRp gene assays (5.2 and 3.8 copies per reaction at 95% detection probability, respectively)"
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/pygaggle/data/kaggle.py b/pygaggle/data/kaggle.py
index 46fbd17d..cbadd1fb 100644
--- a/pygaggle/data/kaggle.py
+++ b/pygaggle/data/kaggle.py
@@ -1,13 +1,15 @@
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from typing import List
 import json
 import logging
 
 from pydantic import BaseModel
+import scipy.special as sp
+import numpy as np
 
-from .relevance import RelevanceExample, LuceneDocumentLoader
+from .relevance import RelevanceExample, Cord19DocumentLoader
 from pygaggle.model.tokenize import SpacySenticizer
-from pygaggle.rerank import Query, Text
+from pygaggle.rerank.base import Query, Text
 
 
 __all__ = ['MISSING_ID', 'LitReviewCategory', 'LitReviewAnswer', 'LitReviewDataset', 'LitReviewSubcategory']
@@ -23,7 +25,8 @@ class LitReviewAnswer(BaseModel):
 
 
 class LitReviewSubcategory(BaseModel):
-    name: str
+    nq_name: str
+    kq_name: str
     answers: List[LitReviewAnswer]
 
 
@@ -33,6 +36,7 @@ class LitReviewCategory(BaseModel):
 
 
 class LitReviewDataset(BaseModel):
+    version: str
     categories: List[LitReviewCategory]
 
     @classmethod
@@ -40,30 +44,51 @@ def from_file(cls, filename: str) -> 'LitReviewDataset':
         with open(filename) as f:
             return cls(**json.load(f))
 
-    @property
-    def query_answer_pairs(self):
-        return ((subcat.name, ans) for cat in self.categories
+    def query_answer_pairs(self, split: str = 'nq'):
+        return ((subcat.nq_name if split == 'nq' else subcat.kq_name, ans) for cat in self.categories
                 for subcat in cat.sub_categories
                 for ans in subcat.answers)
 
-    def to_senticized_dataset(self, index_path: str) -> List[RelevanceExample]:
-        loader = LuceneDocumentLoader(index_path)
+    def to_senticized_dataset(self,
+                              index_path: str,
+                              split: str = 'nq') -> List[RelevanceExample]:
+        loader = Cord19DocumentLoader(index_path)
         tokenizer = SpacySenticizer()
         example_map = OrderedDict()
         rel_map = OrderedDict()
-        for query, document in self.query_answer_pairs:
+        for query, document in self.query_answer_pairs(split=split):
             if document.id == MISSING_ID:
                 logging.warning(f'Skipping {document.title} (missing ID)')
                 continue
             key = (query, document.id)
-            example_map.setdefault(key, tokenizer(loader.load_document(document.id)))
+            try:
+                doc = loader.load_document(document.id)
+                example_map.setdefault(key, tokenizer(doc.all_text))
+            except ValueError as e:
+                logging.warning(f'Skipping {document.id} ({e})')
+                continue
             sents = example_map[key]
             rel_map.setdefault(key, [False] * len(sents))
             for idx, s in enumerate(sents):
                 if document.exact_answer in s:
                     rel_map[key][idx] = True
+        mean_stats = defaultdict(list)
         for (_, doc_id), rels in rel_map.items():
+            int_rels = np.array(list(map(int, rels)))
+            p = int_rels.sum()
+            mean_stats['Average spans'].append(p)
+            mean_stats['Random P@1'].append(np.mean(int_rels))
+            n = len(int_rels) - p
+            N = len(int_rels)
+            mean_stats['Random R@3'].append(1 - (n * (n - 1) * (n - 2)) / (N * (N - 1) * (N - 2)))
+            numer = np.array([sp.comb(n, i) / (N - i) for i in range(0, n + 1)]) * p
+            denom = np.array([sp.comb(N, i) for i in range(0, n + 1)])
+            rr = 1 / np.arange(1, n + 2)
+            rmrr = np.sum(numer * rr / denom)
+            mean_stats['Random MRR'].append(rmrr)
             if not any(rels):
                 logging.warning(f'{doc_id} has no relevant answers')
+        for k, v in mean_stats.items():
+            logging.info(f'{k}: {np.mean(v)}')
         return [RelevanceExample(Query(query), list(map(lambda s: Text(s, dict(docid=docid)), sents)), rels)
                 for ((query, docid), sents), (_, rels) in zip(example_map.items(), rel_map.items())]
diff --git a/pygaggle/data/relevance.py b/pygaggle/data/relevance.py
index b53f5982..a0e65620 100644
--- a/pygaggle/data/relevance.py
+++ b/pygaggle/data/relevance.py
@@ -1,16 +1,15 @@
 from dataclasses import dataclass
 from functools import lru_cache
-from itertools import chain
 from typing import List
 import json
 import re
 
 from pyserini.search import pysearch
 
-from pygaggle.rerank import Query, Text
+from pygaggle.rerank.base import Query, Text
 
 
-__all__ = ['RelevanceExample', 'LuceneDocumentLoader']
+__all__ = ['RelevanceExample', 'Cord19DocumentLoader']
 
 
 @dataclass
@@ -20,15 +19,34 @@ class RelevanceExample:
     labels: List[bool]
 
 
-class LuceneDocumentLoader:
+@dataclass
+class Cord19Document:
+    abstract: str
+    body_text: str
+    ref_entries: str
+
+    @property
+    def all_text(self):
+        return '\n'.join((self.abstract, self.body_text, self.ref_entries))
+
+
+class Cord19DocumentLoader:
     double_space_pattern = re.compile(r'\s\s+')
 
     def __init__(self, index_path: str):
         self.searcher = pysearch.SimpleSearcher(index_path)
 
     @lru_cache(maxsize=1024)
-    def load_document(self, id: str) -> str:
-        article = json.loads(self.searcher.doc(id).lucene_document().get('raw'))
+    def load_document(self, id: str) -> Cord19Document:
+        def unfold(entries):
+            return '\n'.join(x['text'] for x in entries)
+        try:
+            article = json.loads(self.searcher.doc(id).lucene_document().get('raw'))
+        except json.decoder.JSONDecodeError:
+            raise ValueError('article not found')
+        except AttributeError:
+            raise ValueError('document unretrievable')
         ref_entries = article['ref_entries'].values()
-        text = '\n'.join(x['text'] for x in chain(article['abstract'], article['body_text'], ref_entries))
-        return text
+        return Cord19Document(unfold(article['abstract']),
+                              unfold(article['body_text']),
+                              unfold(ref_entries))
diff --git a/pygaggle/model/encode.py b/pygaggle/model/encode.py
index ddfc9be4..9b19d3e8 100644
--- a/pygaggle/model/encode.py
+++ b/pygaggle/model/encode.py
@@ -6,7 +6,7 @@
 import torch.nn as nn
 
 from .tokenize import BatchTokenizer
-from pygaggle.rerank import TextType
+from pygaggle.rerank.base import TextType
 
 
 __all__ = ['LongBatchEncoder', 'EncoderOutputBatch', 'SingleEncoderOutput', 'SpecialTokensCleaner']
diff --git a/pygaggle/model/evaluate.py b/pygaggle/model/evaluate.py
index e0a39906..3c355bbb 100644
--- a/pygaggle/model/evaluate.py
+++ b/pygaggle/model/evaluate.py
@@ -6,8 +6,8 @@
 from tqdm import tqdm
 import numpy as np
 
-from pygaggle.data import RelevanceExample
-from pygaggle.rerank import Reranker
+from pygaggle.data.kaggle import RelevanceExample
+from pygaggle.rerank.base import Reranker
 
 
 __all__ = ['RerankerEvaluator', 'metric_names']
@@ -34,6 +34,11 @@ def value(self):
         return np.mean(self.scores)
 
 
+class TruncatingMixin:
+    def truncated_rels(self, scores: List[float]) -> np.ndarray:
+        return np.array(scores)
+
+
 def register_metric(name):
     def wrap_fn(metric_cls):
         METRIC_MAP[name] = metric_cls
@@ -46,43 +51,69 @@ def metric_names():
     return list(METRIC_MAP.keys())
 
 
-def truncated_rels(scores: List[float], top_k: int) -> np.ndarray:
-    rel_idxs = sorted(list(enumerate(scores)), key=lambda x: x[1], reverse=True)[:top_k]
-    rel_idxs = [x[0] for x in rel_idxs]
-    score_rels = np.zeros(len(scores), dtype=int)
-    score_rels[rel_idxs] = 1
-    return score_rels
+class TopkMixin(TruncatingMixin):
+    top_k: int = None
+
+    def truncated_rels(self, scores: List[float]) -> np.ndarray:
+        rel_idxs = sorted(list(enumerate(scores)), key=lambda x: x[1], reverse=True)[self.top_k:]
+        scores = np.array(scores)
+        scores[[x[0] for x in rel_idxs]] = 0
+        return scores
+
 
+class DynamicThresholdingMixin(TruncatingMixin):
+    threshold: float = 0.5
 
-@register_metric('recall')
-class RecallAccumulator(MeanAccumulator):
-    top_k = None
+    def truncated_rels(self, scores: List[float]) -> np.ndarray:
+        scores = np.array(scores)
+        scores[scores < self.threshold * np.max(scores)] = 0
+        return scores
 
+
+class RecallAccumulator(TruncatingMixin, MeanAccumulator):
     def accumulate(self, scores: List[float], gold: RelevanceExample):
-        score_rels = truncated_rels(scores, self.top_k)
+        score_rels = self.truncated_rels(scores)
+        score_rels[score_rels != 0] = 1
         gold_rels = np.array(gold.labels, dtype=int)
         score = recall_score(gold_rels, score_rels, zero_division=1)
         self.scores.append(score)
 
 
-@register_metric('precision')
-class PrecisionAccumulator(MeanAccumulator):
-    top_k = None
-
+class PrecisionAccumulator(TruncatingMixin, MeanAccumulator):
     def accumulate(self, scores: List[float], gold: RelevanceExample):
-        score_rels = truncated_rels(scores, self.top_k)
+        score_rels = self.truncated_rels(scores)
+        score_rels[score_rels != 0] = 1
+        score_rels = score_rels.astype(int)
         gold_rels = np.array(gold.labels, dtype=int)
-        self.scores.append((score_rels & gold_rels).sum() / score_rels.sum())
+        sum_score = score_rels.sum()
+        if sum_score > 0:
+            self.scores.append((score_rels & gold_rels).sum() / sum_score)
 
 
-@register_metric('recall@1')
-class RecallAt1Metric(RecallAccumulator):
+@register_metric('precision@1')
+class PrecisionAt1Metric(TopkMixin, PrecisionAccumulator):
     top_k = 1
 
 
-@register_metric('precision@1')
-class PrecisionAt1Metric(PrecisionAccumulator):
-    top_k = 1
+@register_metric('recall@3')
+class RecallAt3Metric(TopkMixin, RecallAccumulator):
+    top_k = 3
+
+
+@register_metric('mrr')
+class MrrMetric(MeanAccumulator):
+    def accumulate(self, scores: List[float], gold: RelevanceExample):
+        scores = sorted(list(enumerate(scores)), key=lambda x: x[1], reverse=True)
+        rr = next((1 / (rank_idx + 1) for rank_idx, (idx, _) in enumerate(scores) if gold.labels[idx]), 0)
+        self.scores.append(rr)
+
+
+class ThresholdedRecallMetric(DynamicThresholdingMixin, RecallAccumulator):
+    threshold = 0.5
+
+
+class ThresholdedPrecisionMetric(DynamicThresholdingMixin, PrecisionAccumulator):
+    threshold = 0.5
 
 
 class RerankerEvaluator:
diff --git a/pygaggle/model/tokenize.py b/pygaggle/model/tokenize.py
index 4a23dca3..b6c2015d 100644
--- a/pygaggle/model/tokenize.py
+++ b/pygaggle/model/tokenize.py
@@ -6,7 +6,7 @@
 from transformers import PreTrainedTokenizer
 import torch
 
-from pygaggle.rerank import Query, Text, TextType
+from pygaggle.rerank.base import Query, Text, TextType
 
 
 __all__ = ['BatchTokenizer',
diff --git a/pygaggle/rerank/__init__.py b/pygaggle/rerank/__init__.py
index cb01d169..e69de29b 100644
--- a/pygaggle/rerank/__init__.py
+++ b/pygaggle/rerank/__init__.py
@@ -1,4 +0,0 @@
-from .base import *
-from .similarity import *
-from .bm25 import *
-from .transformer import *
diff --git a/pygaggle/rerank/bm25.py b/pygaggle/rerank/bm25.py
index 8460305d..da0b296f 100644
--- a/pygaggle/rerank/bm25.py
+++ b/pygaggle/rerank/bm25.py
@@ -7,7 +7,7 @@
 from pyserini.index.pyutils import IndexReaderUtils
 import numpy as np
 
-from pygaggle.rerank import Reranker, Query, Text
+from .base import Reranker, Query, Text
 
 
 __all__ = ['Bm25Reranker']
@@ -45,5 +45,7 @@ def rerank(self, query: Query, texts: List[Text]) -> List[Text]:
                 idfs = {w: self.index_utils.compute_bm25_term_weight(text.raw['docid'], w) for w in tf}
             score = sum(idfs[w] * tf[w] * (self.k1 + 1) /
                         (tf[w] + self.k1 * (1 - self.b + self.b * (d_len / mean_len))) for w in tf)
+            if np.isnan(score):
+                score = 0
             text.score = score
         return texts
diff --git a/pygaggle/rerank/random.py b/pygaggle/rerank/random.py
new file mode 100644
index 00000000..aca01742
--- /dev/null
+++ b/pygaggle/rerank/random.py
@@ -0,0 +1,19 @@
+from copy import deepcopy
+from typing import List
+import random
+
+from .base import Query, Text, Reranker
+
+
+__all__ = ['RandomReranker']
+
+
+class RandomReranker(Reranker):
+    def __init__(self, seed: int = 0):
+        self.rand = random.Random(seed)
+
+    def rerank(self, query: Query, texts: List[Text]) -> List[Text]:
+        texts = deepcopy(texts)
+        for text in texts:
+            text.score = self.rand.random()
+        return texts
diff --git a/pygaggle/rerank/similarity.py b/pygaggle/rerank/similarity.py
index 729232f0..abd969be 100644
--- a/pygaggle/rerank/similarity.py
+++ b/pygaggle/rerank/similarity.py
@@ -2,10 +2,10 @@
 
 import torch
 
-from pygaggle.model import SingleEncoderOutput
+from pygaggle.model.encode import SingleEncoderOutput
 
 
-__all__ = ['SimilarityMatrixProvider', 'InnerProductMatrixProvider']
+__all__ = ['SimilarityMatrixProvider', 'CosineSimilarityMatrixProvider']
 
 
 class SimilarityMatrixProvider:
@@ -16,7 +16,7 @@ def compute_matrix(self,
         pass
 
 
-class InnerProductMatrixProvider(SimilarityMatrixProvider):
+class CosineSimilarityMatrixProvider(SimilarityMatrixProvider):
     @torch.no_grad()
     def compute_matrix(self, encoded_query: SingleEncoderOutput, encoded_document: SingleEncoderOutput) -> torch.Tensor:
         query_repr = encoded_query.encoder_output
diff --git a/pygaggle/rerank/transformer.py b/pygaggle/rerank/transformer.py
index 0314ed19..419c2c41 100644
--- a/pygaggle/rerank/transformer.py
+++ b/pygaggle/rerank/transformer.py
@@ -1,15 +1,19 @@
 from copy import deepcopy
 from typing import List
 
-from transformers import T5ForConditionalGeneration, PreTrainedModel
+from transformers import T5ForConditionalGeneration, PreTrainedModel, PreTrainedTokenizer, BertForQuestionAnswering
 import torch
 
+from .base import Reranker, Query, Text
+from .similarity import SimilarityMatrixProvider
 from pygaggle.model import greedy_decode, QueryDocumentBatchTokenizer, BatchTokenizer,\
     QueryDocumentBatch, LongBatchEncoder, SpecialTokensCleaner
-from pygaggle.rerank import Reranker, Query, Text, SimilarityMatrixProvider
 
 
-__all__ = ['T5Reranker', 'TransformerReranker']
+__all__ = ['T5Reranker',
+           'UnsupervisedTransformerReranker',
+           'SequenceClassificationTransformerReranker',
+           'QuestionAnsweringTransformerReranker']
 
 
 class T5Reranker(Reranker):
@@ -32,14 +36,14 @@ def rerank(self, query: Query, texts: List[Text]) -> List[Text]:
 
             # 6136 and 1176 are the indexes of the tokens false and true in T5.
             batch_scores = batch_scores[:, [6136, 1176]]
-            batch_log_probs = torch.nn.functional.log_softmax(batch_scores, dim=1)
-            batch_log_probs = batch_log_probs[:, 1].tolist()
+            batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
+            batch_log_probs = batch_scores[:, 1].tolist()
             for doc, score in zip(batch.documents, batch_log_probs):
                 doc.score = score
         return texts
 
 
-class TransformerReranker(Reranker):
+class UnsupervisedTransformerReranker(Reranker):
     methods = dict(max=lambda x: x.max().item(),
                    mean=lambda x: x.mean().item(),
                    absmean=lambda x: x.abs().mean().item(),
@@ -50,7 +54,8 @@ def __init__(self,
                  tokenizer: BatchTokenizer,
                  sim_matrix_provider: SimilarityMatrixProvider,
                  method: str = 'max',
-                 clean_special: bool = True):
+                 clean_special: bool = True,
+                 argmax_only: bool = False):
         assert method in self.methods, 'inappropriate scoring method'
         self.model = model
         self.tokenizer = tokenizer
@@ -60,16 +65,78 @@ def __init__(self,
         self.clean_special = clean_special
         self.cleaner = SpecialTokensCleaner(tokenizer.tokenizer)
         self.device = next(self.model.parameters(), None).device
+        self.argmax_only = argmax_only
 
     @torch.no_grad()
     def rerank(self, query: Query, texts: List[Text]) -> List[Text]:
         encoded_query = self.encoder.encode_single(query)
         encoded_documents = self.encoder.encode(texts)
         texts = deepcopy(texts)
+        max_score = None
         for enc_doc, text in zip(encoded_documents, texts):
             if self.clean_special:
                 enc_doc = self.cleaner.clean(enc_doc)
             matrix = self.sim_matrix_provider.compute_matrix(encoded_query, enc_doc)
-            score = self.methods[self.method](matrix)
+            score = self.methods[self.method](matrix) if matrix.size(1) > 0 else -10000
             text.score = score
+            max_score = score if max_score is None else max(max_score, score)
+        if self.argmax_only:
+            for text in texts:
+                if text.score != max_score:
+                    text.score = max_score - 10000
+        return texts
+
+
+class SequenceClassificationTransformerReranker(Reranker):
+    def __init__(self,
+                 model: PreTrainedModel,
+                 tokenizer: PreTrainedTokenizer):
+        self.tokenizer = tokenizer
+        self.model = model
+        self.device = next(model.parameters()).device
+
+    @torch.no_grad()
+    def rerank(self, query: Query, texts: List[Text]) -> List[Text]:
+        texts = deepcopy(texts)
+        for text in texts:
+            ret = self.tokenizer.encode_plus(query.text,
+                                             text.text,
+                                             max_length=512,
+                                             return_token_type_ids=True,
+                                             return_tensors='pt')
+            input_ids = ret['input_ids'].to(self.device)
+            tt_ids = ret['token_type_ids'].to(self.device)
+            output, = self.model(input_ids, token_type_ids=tt_ids)
+            if output.size(1) > 1:
+                text.score = torch.nn.functional.log_softmax(output, 1)[0, -1].item()
+            else:
+                text.score = output.item()
+        return texts
+
+
+class QuestionAnsweringTransformerReranker(Reranker):
+    def __init__(self, model: PreTrainedModel, tokenizer: PreTrainedTokenizer):
+        self.tokenizer = tokenizer
+        self.model = model
+        self.device = next(model.parameters()).device
+
+    @torch.no_grad()
+    def rerank(self, query: Query, texts: List[Text]) -> List[Text]:
+        texts = deepcopy(texts)
+        for text in texts:
+            ret = self.tokenizer.encode_plus(query.text,
+                                             text.text,
+                                             max_length=512,
+                                             return_tensors='pt',
+                                             return_token_type_ids=True)
+            input_ids = ret['input_ids'].to(self.device)
+            tt_ids = ret['token_type_ids'].to(self.device)
+            start_scores, end_scores = self.model(input_ids, token_type_ids=tt_ids)
+            start_scores = start_scores[0]
+            end_scores = end_scores[0]
+            start_scores[(1 - tt_ids[0]).bool()] = -5000
+            end_scores[(1 - tt_ids[0]).bool()] = -5000
+            smax_val, smax_idx = start_scores.max(0)
+            emax_val, emax_idx = end_scores.max(0)
+            text.score = max(smax_val.item(), emax_val.item())
         return texts
diff --git a/pygaggle/run/evaluate_kaggle_highlighter.py b/pygaggle/run/evaluate_kaggle_highlighter.py
index 52d6505d..9810cd79 100644
--- a/pygaggle/run/evaluate_kaggle_highlighter.py
+++ b/pygaggle/run/evaluate_kaggle_highlighter.py
@@ -3,18 +3,24 @@
 import logging
 
 from pydantic import BaseModel, validator
-from transformers import AutoModel, AutoTokenizer
+from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, BertForQuestionAnswering, \
+    BertForSequenceClassification
 import torch
 
 from .args import ArgumentParserBuilder, opt
-from pygaggle.rerank import TransformerReranker, InnerProductMatrixProvider, Reranker, T5Reranker, Bm25Reranker
+from pygaggle.rerank.base import Reranker
+from pygaggle.rerank.bm25 import Bm25Reranker
+from pygaggle.rerank.transformer import UnsupervisedTransformerReranker, T5Reranker, \
+    SequenceClassificationTransformerReranker, QuestionAnsweringTransformerReranker
+from pygaggle.rerank.random import RandomReranker
+from pygaggle.rerank.similarity import CosineSimilarityMatrixProvider
 from pygaggle.model import SimpleBatchTokenizer, CachedT5ModelLoader, T5BatchTokenizer, RerankerEvaluator, metric_names
 from pygaggle.data import LitReviewDataset
 from pygaggle.settings import Settings
 
 
 SETTINGS = Settings()
-METHOD_CHOICES = ('transformer', 'bm25', 't5')
+METHOD_CHOICES = ('transformer', 'bm25', 't5', 'seq_class_transformer', 'qa_transformer', 'random')
 
 
 class KaggleEvaluationOptions(BaseModel):
@@ -22,8 +28,11 @@ class KaggleEvaluationOptions(BaseModel):
     method: str
     batch_size: int
     device: str
+    split: str
+    do_lower_case: bool
     metrics: List[str]
     model_name: Optional[str]
+    tokenizer_name: Optional[str]
 
     @validator('dataset')
     def dataset_exists(cls, v: Path):
@@ -41,6 +50,12 @@ def model_name_sane(cls, v: Optional[str], values, **kwargs):
             return 'monologg/biobert_v1.1_pubmed'
         return v
 
+    @validator('tokenizer_name')
+    def tokenizer_sane(cls, v: str, values, **kwargs):
+        if v is None:
+            return values['model_name']
+        return v
+
 
 def construct_t5(options: KaggleEvaluationOptions) -> Reranker:
     loader = CachedT5ModelLoader(SETTINGS.t5_model_dir,
@@ -50,17 +65,56 @@ def construct_t5(options: KaggleEvaluationOptions) -> Reranker:
                                  SETTINGS.flush_cache)
     device = torch.device(options.device)
     model = loader.load().to(device).eval()
-    tokenizer = AutoTokenizer.from_pretrained(options.model_name)
+    tokenizer = AutoTokenizer.from_pretrained(options.model_name, do_lower_case=options.do_lower_case)
     tokenizer = T5BatchTokenizer(tokenizer, options.batch_size)
     return T5Reranker(model, tokenizer)
 
 
 def construct_transformer(options: KaggleEvaluationOptions) -> Reranker:
     device = torch.device(options.device)
-    model = AutoModel.from_pretrained(options.model_name).to(device).eval()
-    tokenizer = SimpleBatchTokenizer(AutoTokenizer.from_pretrained(options.model_name), options.batch_size)
-    provider = InnerProductMatrixProvider()
-    return TransformerReranker(model, tokenizer, provider)
+    try:
+        model = AutoModel.from_pretrained(options.model_name).to(device).eval()
+    except OSError:
+        model = AutoModel.from_pretrained(options.model_name, from_tf=True).to(device).eval()
+    tokenizer = SimpleBatchTokenizer(AutoTokenizer.from_pretrained(options.tokenizer_name,
+                                                                   do_lower_case=options.do_lower_case),
+                                     options.batch_size)
+    provider = CosineSimilarityMatrixProvider()
+    return UnsupervisedTransformerReranker(model, tokenizer, provider)
+
+
+def construct_seq_class_transformer(options: KaggleEvaluationOptions) -> Reranker:
+    try:
+        model = AutoModelForSequenceClassification.from_pretrained(options.model_name)
+    except OSError:
+        try:
+            model = AutoModelForSequenceClassification.from_pretrained(options.model_name, from_tf=True)
+        except AttributeError:
+            # Hotfix for BioBERT MS MARCO. Refactor.
+            BertForSequenceClassification.bias = torch.nn.Parameter(torch.zeros(2))
+            BertForSequenceClassification.weight = torch.nn.Parameter(torch.zeros(2, 768))
+            model = BertForSequenceClassification.from_pretrained(options.model_name, from_tf=True)
+            model.classifier.weight = BertForSequenceClassification.weight
+            model.classifier.bias = BertForSequenceClassification.bias
+    device = torch.device(options.device)
+    model = model.to(device).eval()
+    tokenizer = AutoTokenizer.from_pretrained(options.tokenizer_name, do_lower_case=options.do_lower_case)
+    return SequenceClassificationTransformerReranker(model, tokenizer)
+
+
+def construct_qa_transformer(options: KaggleEvaluationOptions) -> Reranker:
+    # We load a sequence classification model first -- again, as a workaround. Refactor.
+    try:
+        model = AutoModelForSequenceClassification.from_pretrained(options.model_name)
+    except OSError:
+        model = AutoModelForSequenceClassification.from_pretrained(options.model_name, from_tf=True)
+    fixed_model = BertForQuestionAnswering(model.config)
+    fixed_model.qa_outputs = model.classifier
+    fixed_model.bert = model.bert
+    device = torch.device(options.device)
+    model = fixed_model.to(device).eval()
+    tokenizer = AutoTokenizer.from_pretrained(options.tokenizer_name, do_lower_case=options.do_lower_case)
+    return QuestionAnsweringTransformerReranker(model, tokenizer)
 
 
 def construct_bm25(_: KaggleEvaluationOptions) -> Reranker:
@@ -69,25 +123,32 @@ def construct_bm25(_: KaggleEvaluationOptions) -> Reranker:
 
 def main():
     apb = ArgumentParserBuilder()
-    apb.add_opts(opt('--dataset', type=Path, default='data/kaggle-lit-review.json'),
+    apb.add_opts(opt('--dataset', type=Path, default='data/kaggle-lit-review-0.1.json'),
                  opt('--method', required=True, type=str, choices=METHOD_CHOICES),
                  opt('--model-name', type=str),
+                 opt('--split', type=str, default='nq', choices=('nq', 'kq')),
                  opt('--batch-size', '-bsz', type=int, default=96),
                  opt('--device', type=str, default='cuda:0'),
+                 opt('--tokenizer-name', type=str),
+                 opt('--do-lower-case', action='store_true'),
                  opt('--metrics', type=str, nargs='+', default=metric_names(), choices=metric_names()))
     args = apb.parser.parse_args()
-
     options = KaggleEvaluationOptions(**vars(args))
     ds = LitReviewDataset.from_file(str(options.dataset))
-    examples = ds.to_senticized_dataset(SETTINGS.cord19_index_path)
-    construct_map = dict(transformer=construct_transformer, bm25=construct_bm25, t5=construct_t5)
+    examples = ds.to_senticized_dataset(SETTINGS.cord19_index_path, split=options.split)
+    construct_map = dict(transformer=construct_transformer,
+                         bm25=construct_bm25,
+                         t5=construct_t5,
+                         seq_class_transformer=construct_seq_class_transformer,
+                         qa_transformer=construct_qa_transformer,
+                         random=lambda _: RandomReranker())
     reranker = construct_map[options.method](options)
     evaluator = RerankerEvaluator(reranker, options.metrics)
     width = max(map(len, args.metrics)) + 1
     stdout = []
     for metric in evaluator.evaluate(examples):
         logging.info(f'{metric.name:<{width}}{metric.value:.5}')
-        stdout.append(f'{metric.name.title()}\t{metric.value}')
+        stdout.append(f'{metric.name}\t{metric.value}')
     print('\n'.join(stdout))
 
 
diff --git a/scripts/evaluate-highlighters.sh b/scripts/evaluate-highlighters.sh
index c7599d1b..e7344374 100644
--- a/scripts/evaluate-highlighters.sh
+++ b/scripts/evaluate-highlighters.sh
@@ -1,7 +1,13 @@
 mkdir -p results
-python -um pygaggle.run.evaluate_kaggle_highlighter --method bm25 > results/bm25.log
-python -um pygaggle.run.evaluate_kaggle_highlighter --method t5 > results/t5.log
-python -um pygaggle.run.evaluate_kaggle_highlighter --method transformer --model-name biobert > results/biobert.log
-python -um pygaggle.run.evaluate_kaggle_highlighter --method transformer --model-name allenai/scibert_scivocab_cased > results/scibert.log
-python -um pygaggle.run.evaluate_kaggle_highlighter --method transformer --model-name bert-base-cased > results/bert.log
+for split in kq nq; do
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method random > results/random-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method bm25 > results/bm25-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method t5 > results/t5-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method transformer --model-name bert-base-cased > results/bbc-unsup-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method transformer --model-name biobert > results/biobert-unsup-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method transformer --model-name allenai/scibert_scivocab_cased > results/scibert-unsup-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method seq_class_transformer --model-name ~/models/biobert-msmarco > results/biobert-marco-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method seq_class_transformer --model-name ~/models/bbu-marco --do-lower-case > results/bert-marco-$split.log;
+  python -um pygaggle.run.evaluate_kaggle_highlighter --split $split --method qa_transformer --model-name ~/models/biobert-squad1 > results/biobert-squadv1-$split.log;
+done
 for name in results/*; do echo $name; cat $name; echo; done
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..5bfd1ab2
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,39 @@
+import setuptools
+
+
+with open('README.md') as fh:
+    long_description = fh.read()
+
+reqs = [
+    'coloredlogs==14.0',
+    'numpy==1.18.2',
+    'pydantic==1.5',
+    'pyserini==0.9.0.0',
+    'scikit-learn>=0.22',
+    'scipy>=1.4',
+    'spacy==2.2.4',
+    'tensorboard>=2.1.0',
+    'tensorflow>=2.2.0rc1',
+    'tokenizers==0.5.2',
+    'tqdm==4.45.0',
+    'transformers==2.7.0'
+]
+
+setuptools.setup(
+    name='pygaggle',
+    version='0.0.1',
+    author='PyGaggle Gaggle',
+    author_email='r33tang@uwaterloo.ca',
+    description='A gaggle of rerankers for CovidQA and CORD-19',
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    url='https://github.com/castorini/pygaggle',
+    install_requires=reqs,
+    packages=setuptools.find_packages(),
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'License :: OSI Approved :: Apache Software License',
+        'Operating System :: OS Independent',
+    ],
+    python_requires='>=3.7',
+)
\ No newline at end of file