diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
new file mode 100644
index 0000000..c0c21b1
--- /dev/null
+++ b/.github/workflows/publish-to-pypi.yml
@@ -0,0 +1,180 @@
+name: Publish to PyPI
+
+on:
+ workflow_dispatch:
+ inputs:
+ version_increment:
+ description: 'Version increment type'
+ required: true
+ default: 'patch'
+ type: choice
+ options:
+ - patch
+ - minor
+ - major
+ - custom
+ custom_version:
+ description: 'Custom version (only if "custom" is selected above)'
+ required: false
+ type: string
+ create_release:
+ description: 'Create GitHub release'
+ required: true
+ default: true
+ type: boolean
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: [3.9, "3.10", "3.11", "3.12"]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build twine
+ pip install -r requirements.txt
+
+ - name: Build package
+ run: python -m build
+
+ - name: Check package
+ run: twine check dist/*
+
+ publish:
+ needs: test
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+
+ - name: Install build dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build twine
+
+ - name: Calculate new version
+ id: version
+ run: |
+ # Get current version from setup.py
+ CURRENT_VERSION=$(python -c "
+ import re
+ with open('setup.py', 'r') as f:
+ content = f.read()
+ match = re.search(r\"version='([^']+)'\", content)
+ print(match.group(1) if match else '1.0.7')
+ ")
+
+ echo "Current version: $CURRENT_VERSION"
+
+ if [ "${{ github.event.inputs.version_increment }}" = "custom" ]; then
+ NEW_VERSION="${{ github.event.inputs.custom_version }}"
+ else
+ # Parse current version
+ IFS='.' read -ra VERSION_PARTS <<< "$CURRENT_VERSION"
+ MAJOR=${VERSION_PARTS[0]}
+ MINOR=${VERSION_PARTS[1]}
+ PATCH=${VERSION_PARTS[2]}
+
+ case "${{ github.event.inputs.version_increment }}" in
+ major)
+ MAJOR=$((MAJOR + 1))
+ MINOR=0
+ PATCH=0
+ ;;
+ minor)
+ MINOR=$((MINOR + 1))
+ PATCH=0
+ ;;
+ patch)
+ PATCH=$((PATCH + 1))
+ ;;
+ esac
+
+ NEW_VERSION="${MAJOR}.${MINOR}.${PATCH}"
+ fi
+
+ echo "New version: $NEW_VERSION"
+ echo "NEW_VERSION=$NEW_VERSION" >> $GITHUB_OUTPUT
+ echo "NEW_VERSION=$NEW_VERSION" >> $GITHUB_ENV
+
+ - name: Update version in setup.py
+ run: |
+ sed -i "s/version='[^']*'/version='${{ steps.version.outputs.NEW_VERSION }}'/" setup.py
+ echo "Updated setup.py with version ${{ steps.version.outputs.NEW_VERSION }}"
+
+ # Show the change
+ grep "version=" setup.py
+
+ - name: Build package
+ run: python -m build
+
+ - name: Publish to PyPI
+ env:
+ TWINE_USERNAME: __token__
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+ run: twine upload dist/*
+
+ - name: Commit version bump
+ run: |
+ git config --local user.email "action@github.com"
+ git config --local user.name "GitHub Action"
+ git add setup.py
+ git commit -m "Bump version to ${{ steps.version.outputs.NEW_VERSION }}"
+ git push
+
+ - name: Create GitHub Release
+ if: ${{ github.event.inputs.create_release == 'true' }}
+ uses: actions/create-release@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ tag_name: v${{ steps.version.outputs.NEW_VERSION }}
+ release_name: BookNLP Plus v${{ steps.version.outputs.NEW_VERSION }}
+ body: |
+ π **BookNLP Plus v${{ steps.version.outputs.NEW_VERSION }}**
+
+ **Install with:**
+ ```bash
+ pip install booknlp-plus==${{ steps.version.outputs.NEW_VERSION }}
+ ```
+
+ **What's New:**
+ - Manual release triggered by @${{ github.actor }}
+ - Compatible with Python 3.9-3.12
+ - Enhanced fork with JSON patch support and sentence transformers
+
+ **Links:**
+ - π¦ [PyPI Package](https://pypi.org/project/booknlp-plus/${{ steps.version.outputs.NEW_VERSION }}/)
+ - π [Original BookNLP](https://github.com/dbamman/book-nlp)
+ - π [Full Changelog](https://github.com/DrewThomasson/booknlp/compare/v${{ steps.version.outputs.NEW_VERSION }}...json-patch-1)
+
+ Built from commit: ${{ github.sha }}
+ draft: false
+ prerelease: false
+
+ notify:
+ needs: publish
+ runs-on: ubuntu-latest
+ if: success()
+
+ steps:
+ - name: Success notification
+ run: |
+ echo "π Successfully published booknlp-plus v${{ needs.publish.outputs.NEW_VERSION || env.NEW_VERSION }} to PyPI!"
+ echo "π¦ Install with: pip install booknlp-plus"
+ echo "π View on PyPI: https://pypi.org/project/booknlp-plus/"
\ No newline at end of file
diff --git a/README.md b/README.md
index e66b269..fef23e0 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# BookNLP
+# BookNLP-plus
A natural language processing pipeline for analyzing works of fiction, including entity detection, quotation attribution, and character relationship analysis.
@@ -45,6 +45,14 @@ source venv/bin/activate # On Windows, use: venv\Scripts\activate
./run_booknlp.py input_file.txt --output-dir output/directory
```
+## To run Booknlp viewer
+
+```bash
+pip install streamlit pandas
+streamlit run booknlp_viewer.py
+```
+
+
### Command Line Arguments
- `input_file`: The text file to process (required)
@@ -92,6 +100,17 @@ The pipeline generates several output files in the specified output directory:
- Character relationships
- Interactive features
+8. `{book_id}.characters_simple.json`: A character sheet for all characters for Multi-speaker in Ebook2Audiobook
+ - inferred_age_category = {"child", "teen", "adult", "elder"}
+ - inferred_gender = {"male", "female", "unknown"}
+ - voice = null # default
+ - language = "eng" # default
+ - normalized_name = {CamelCase} of canonical_name
+
+9. `{book_id}.book.txt`: A multi-speaker book script to use with `{book_id}.characters_simple.json`
+ - [CharacterName] "Quote" [/]
+
+
## Example
```bash
@@ -99,3 +118,219 @@ The pipeline generates several output files in the specified output directory:
./run_booknlp.py emma.txt --output-dir output/emma --model big
```
+
+# BookNLP
+
+BookNLP is a natural language processing pipeline that scales to books and other long documents (in English), including:
+
+* Part-of-speech tagging
+* Dependency parsing
+* Entity recognition
+* Character name clustering (e.g., "Tom", "Tom Sawyer", "Mr. Sawyer", "Thomas Sawyer" -> TOM_SAWYER) and coreference resolution
+* Quotation speaker identification
+* Supersense tagging (e.g., "animal", "artifact", "body", "cognition", etc.)
+* Event tagging
+* Referential gender inference (TOM_SAWYER -> he/him/his)
+
+BookNLP ships with two models, both with identical architectures but different underlying BERT sizes. The larger and more accurate `big` model is fit for GPUs and multi-core computers; the faster `small` model is more appropriate for personal computers. See the table below for a comparison of the difference, both in terms of overall speed and in accuracy for the tasks that BookNLP performs.
+
+
+| |Small|Big|
+|---|---|---|
+Entity tagging (F1)|88.2|90.0|
+Supersense tagging (F1)|73.2|76.2|
+Event tagging (F1)|70.6|74.1|
+Coreference resolution (Avg. F1)|76.4|79.0|
+Speaker attribution (B3)|86.4|89.9|
+CPU time, 2019 MacBook Pro (mins.)*|3.6|15.4|
+CPU time, 10-core server (mins.)*|2.4|5.2|
+GPU time, Titan RTX (mins.)*|2.1|2.2|
+
+*timings measure speed to run BookNLP on a sample book of *The Secret Garden* (99K tokens). To explore running BookNLP in Google Colab on a GPU, see [this notebook](https://colab.research.google.com/drive/1c9nlqGRbJ-FUP2QJe49h21hB4kUXdU_k?usp=sharing).
+
+## Installation
+
+* Create anaconda environment, if desired. First [download and install anaconda](https://www.anaconda.com/download/); then create and activate fresh environment.
+
+```sh
+conda create --name booknlp python=3.7
+conda activate booknlp
+```
+
+* If using a GPU, install pytorch for your system and CUDA version by following installation instructions on [https://pytorch.org](https://pytorch.org).
+
+
+* Install booknlp and download Spacy model.
+
+```sh
+pip install booknlp-plus
+python -m spacy download en_core_web_sm
+```
+
+## Usage
+
+```python
+from booknlp.booknlp import BookNLP
+
+model_params={
+ "pipeline":"entity,quote,supersense,event,coref",
+ "model":"big"
+ }
+
+booknlp=BookNLP("en", model_params)
+
+# Input file to process
+input_file="input_dir/bartleby_the_scrivener.txt"
+
+# Output directory to store resulting files in
+output_directory="output_dir/bartleby/"
+
+# File within this directory will be named ${book_id}.entities, ${book_id}.tokens, etc.
+book_id="bartleby"
+
+booknlp.process(input_file, output_directory, book_id)
+```
+
+This runs the full BookNLP pipeline; you are able to run only some elements of the pipeline (to cut down on computational time) by specifying them in that parameter (e.g., to only run entity tagging and event tagging, change `model_params` above to include `"pipeline":"entity,event"`).
+
+This process creates the directory `output_dir/bartleby` and generates the following files:
+
+* `bartleby/bartleby.tokens` -- This encodes core word-level information. Each row corresponds to one token and includes the following information:
+ * paragraph ID
+ * sentence ID
+ * token ID within sentence
+ * token ID within document
+ * word
+ * lemma
+ * byte onset within original document
+ * byte offset within original document
+ * POS tag
+ * dependency relation
+ * token ID within document of syntactic head
+ * event
+
+* `bartleby/bartleby.entities` -- This represents the typed entities within the document (e.g., people and places), along with their coreference.
+ * coreference ID (unique entity ID)
+ * start token ID within document
+ * end token ID within document
+ * NOM (nominal), PROP (proper), or PRON (pronoun)
+ * PER (person), LOC (location), FAC (facility), GPE (geo-political entity), VEH (vehicle), ORG (organization)
+ * text of entity
+* `bartleby/bartleby.supersense` -- This stores information from supersense tagging.
+ * start token ID within document
+ * end token ID within document
+ * supersense category (verb.cognition, verb.communication, noun.artifact, etc.)
+* `bartleby/bartleby.quotes` -- This stores information about the quotations in the document, along with the speaker. In a sentence like "'Yes', she said", where she -> ELIZABETH\_BENNETT, "she" is the attributed mention of the quotation 'Yes', and is coreferent with the unique entity ELIZABETH\_BENNETT.
+ * start token ID within document of quotation
+ * end token ID within document of quotation
+ * start token ID within document of attributed mention
+ * end token ID within document of attributed mention
+ * attributed mention text
+ * coreference ID (unique entity ID) of attributed mention
+ * quotation text
+* `bartleby/bartleby.book`
+
+JSON file providing information about all characters mentioned more than 1 time in the book, including their proper/common/pronominal references, referential gender, actions for the which they are the agent and patient, objects they possess, and modifiers.
+
+* `bartleby/bartleby.book.html`
+
+HTML file containing a.) the full text of the book along with annotations for entities, coreference, and speaker attribution and b.) a list of the named characters and major entity catgories (FAC, GPE, LOC, etc.).
+
+
+# Annotations
+
+## Entity annotations
+
+The entity annotation layer covers six of the ACE 2005 categories in text:
+
+* People (PER): *Tom Sawyer*, *her daughter*
+* Facilities (FAC): *the house*, *the kitchen*
+* Geo-political entities (GPE): *London*, *the village*
+* Locations (LOC): *the forest*, *the river*
+* Vehicles (VEH): *the ship*, *the car*
+* Organizations (ORG): *the army*, *the Church*
+
+The targets of annotation here include both named entities (e.g., Tom Sawyer), common entities (the boy) and pronouns (he). These entities can be nested, as in the following:
+
+
+
+
+For more, see: David Bamman, Sejal Popat and Sheng Shen, "[An Annotated Dataset of Literary Entities](http://people.ischool.berkeley.edu/~dbamman/pubs/pdf/naacl2019_literary_entities.pdf)," NAACL 2019.
+
+The entity tagging model within BookNLP is trained on an annotated dataset of 968K tokens, including the public domain materials in [LitBank](https://github.com/dbamman/litbank) and a new dataset of ~500 contemporary books, including bestsellers, Pulitzer Prize winners, works by Black authors, global Anglophone books, and genre fiction (article forthcoming).
+
+## Event annotations
+
+The event layer identifies events with asserted *realis* (depicted as actually taking place, with specific participants at a specific time) -- as opposed to events with other epistemic modalities (hypotheticals, future events, extradiegetic summaries by the narrator).
+
+|Text|Events|Source|
+|---|---|---|
+|My fatherβs eyes had **closed** upon the light of this world six months, when mine **opened** on it.|{closed, opened}|Dickens, David Copperfield|
+|Call me Ishmael.|{}|Melville, Moby Dick|
+|His sister was a tall, strong girl, and she **walked** rapidly and resolutely, as if she knew exactly where she was going and what she was going to do next.|{walked}|Cather, O Pioneers|
+
+For more, see: Matt Sims, Jong Ho Park and David Bamman, "[Literary Event Detection](http://people.ischool.berkeley.edu/~dbamman/pubs/pdf/acl2019_literary_events.pdf)," ACL 2019.
+
+The event tagging model is trained on event annotations within [LitBank](https://github.com/dbamman/litbank). The `small` model above makes use of a distillation process, by training on the predictions made by the `big` model for a collection of contemporary texts.
+
+## Supersense tagging
+
+[Supersense tagging](https://aclanthology.org/W06-1670.pdf) provides coarse semantic information for a sentence by tagging spans with 41 lexical semantic categories drawn from WordNet, spanning both nouns (including *plant*, *animal*, *food*, *feeling*, and *artifact*) and verbs (including *cognition*, *communication*, *motion*, etc.)
+
+|Example|Source|
+|---|---|
+|The [station wagons]artifact [arrived]motion at [noon]time, a long shining [line]group that [coursed]motion through the [west campus]location.|Delillo, *White Noise*|
+
+
+The BookNLP tagger is trained on [SemCor](https://web.eecs.umich.edu/~mihalcea/downloads.html#semcor).
+
+.
+
+
+## Character name clustering and coreference
+
+The coreference layer covers the six ACE entity categories outlined above (people, facilities, locations, geo-political entities, organizations and vehicles) and is trained on [LitBank](https://github.com/dbamman/litbank) and [PreCo](https://preschool-lab.github.io/PreCo/).
+
+Example|Source|
+---|---|
+One may as well begin with [Helen]x's letters to [[her]x sister]y|Forster, *Howard's End*
+
+Accurate coreference at the scale of a book-length document is still an open research problem, and attempting full coreference -- where any named entity (Elizabeth), common entity (her sister, his daughter) and pronoun (she) can corefer -- tends to erroneously conflate multiple distinct entities into one. By default, BookNLP addresses this by first carrying out character name clustering (grouping "Tom", "Tom Sawyer" and "Mr. Sawyer" into a single entity), and then allowing pronouns to corefer with either named entities (Tom) or common entities (the boy), but disallowing common entities from co-referring to named entities. To turn off this mode and carry out full corefernce, add `pronominalCorefOnly=False` to the `model_params` parameters dictionary above (but be sure to inspect the output!).
+
+For more on the coreference criteria used in this work, see David Bamman, Olivia Lewke and Anya Mansoor (2020), "[An Annotated Dataset of Coreference in English Literature](https://arxiv.org/abs/1912.01140)", LREC.
+
+## Referential gender inference
+
+BookNLP infers the *referential gender* of characters by associating them with the pronouns (he/him/his, she/her, they/them, xe/xem/xyr/xir, etc.) used to refer to them in the context of the story. This method encodes several assumptions:
+
+* BookNLP describes the referential gender of characters, and not their gender identity. Characters are described by the pronouns used to refer to them (e.g., he/him, she/her) rather than labels like "M/F".
+
+* Prior information on the alignment of names with referential gender (e.g., from government records or larger background datasets) can be used to provide some information to inform this process if desired (e.g., "Tom" is often associated with he/him in pre-1923 English texts). Name information, however, should not be uniquely determinative, but rather should be sensitive to the context in which it is used (e.g., "Tom" in the book "Tom and Some Other Girls", where Tom is aligned with she/her). By default, BookNLP uses prior information on the alignment of proper names and honorifics with pronouns drawn from ~15K works from Project Gutenberg; this prior information can be ignored by setting `referential_gender_hyperparameterFile:None` in the model_params file. Alternative priors can be used by passing the pathname to a prior file (in the same format as `english/data/gutenberg_prop_gender_terms.txt`) to this parameter.
+
+* Users should be free to define the referential gender categories used here. The default set of categories is {he, him, his},
+{she, her}, {they, them, their}, {xe, xem, xyr, xir}, and {ze, zem, zir, hir}. To specify a different set of categories, update the `model_params` setting to define them:
+ `referential_gender_cats: [ ["he", "him", "his"], ["she", "her"], ["they", "them", "their"], ["xe", "xem", "xyr", "xir"], ["ze", "zem", "zir", "hir"] ]`
+
+## Speaker attribution
+
+The speaker attribution model identifies all instances of direct speech in the text and attributes it to its speaker.
+
+
+|Quote|Speaker|Source|
+|---|---|---|
+β Come up , Kinch ! Come up , you fearful jesuit !|Buck\_Mulligan-0|Joyce, *Ulysses*|
+β Oh dear ! Oh dear ! I shall be late ! β|The\_White\_Rabbit-4|Carroll, *Alice in Wonderland*|
+β Do n't put your feet up there , Huckleberry ; β|Miss\_Watson-26|Twain, *Huckleberry Finn*|
+
+This model is trained on speaker attribution data in [LitBank](https://github.com/dbamman/litbank).
+For more on the quotation annotations, see [this paper](https://arxiv.org/pdf/2004.13980.pdf).
+
+## Part-of-speech tagging and dependency parsing
+
+BookNLP uses [Spacy](https://spacy.io) for part-of-speech tagging and dependency parsing.
+
+# Acknowledgments
+
+
 |  |
+BookNLP is supported by the National Endowment for the Humanities (HAA-271654-20) and the National Science Foundation (IIS-1942591).
+ |
diff --git a/booknlp/english/bert_qa.py b/booknlp/english/bert_qa.py
index 1c240c1..778a505 100644
--- a/booknlp/english/bert_qa.py
+++ b/booknlp/english/bert_qa.py
@@ -1,5 +1,6 @@
import torch
import re
+import os
from booknlp.english.speaker_attribution import BERTSpeakerID
import numpy as np
import sys
@@ -11,10 +12,10 @@ class QuotationAttribution:
def __init__(self, modelFile):
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
- base_model=re.sub("google_bert", "google/bert", modelFile.split("/")[-1])
- base_model=re.sub(".model", "", base_model)
+ base_model=re.sub("google_bert", "google/bert", os.path.basename(modelFile))
+ base_model=re.sub("\.model$", "", base_model)
self.model = BERTSpeakerID(base_model=base_model)
state_dict = torch.load(modelFile, map_location=device)
diff --git a/booknlp/english/english_booknlp.py b/booknlp/english/english_booknlp.py
index fa42175..e334c5e 100644
--- a/booknlp/english/english_booknlp.py
+++ b/booknlp/english/english_booknlp.py
@@ -18,6 +18,7 @@
import urllib.request
import pkg_resources
import torch
+import datetime
class EnglishBookNLP:
@@ -326,7 +327,476 @@ def get_head_in_range(start, end, tokens):
data["characters"].append(chardata)
return data
-
+
+
+ def normalize_character_name(self, name):
+ """
+ Normalize character name to camelCase format without spaces or special characters
+ """
+ import re
+
+ # Remove special characters except spaces and apostrophes
+ name = re.sub(r"[^\w\s']", "", name)
+
+ # Handle possessives (e.g., "Tom's" -> "Toms")
+ name = re.sub(r"'s\b", "s", name)
+ name = re.sub(r"'", "", name)
+
+ # Split on whitespace and capitalize each word
+ words = name.split()
+ if not words:
+ return "UnknownCharacter"
+
+ # First word capitalized, rest capitalized (camelCase)
+ normalized = words[0].capitalize()
+ for word in words[1:]:
+ normalized += word.capitalize()
+
+ # Ensure it starts with a letter
+ if not normalized or not normalized[0].isalpha():
+ normalized = "character" + normalized.capitalize()
+
+ return normalized
+
+ def infer_age_category_with_scores(self, character_data):
+ """
+ Use semantic similarity to infer age category with confidence scores for all categories
+ """
+ try:
+ from sentence_transformers import SentenceTransformer
+ if not hasattr(self, '_age_model'):
+ self._age_model = SentenceTransformer('all-MiniLM-L6-v2')
+ model = self._age_model
+ except ImportError:
+ print("Warning: sentence-transformers not installed. Age inference unavailable.")
+ return {"category": "unknown", "scores": {"child": 0.0, "teen": 0.0, "adult": 0.0, "elder": 0.0}}
+ except Exception as e:
+ print(f"Warning: Could not load sentence transformer model: {e}")
+ return {"category": "unknown", "scores": {"child": 0.0, "teen": 0.0, "adult": 0.0, "elder": 0.0}}
+
+ age_prototypes = {
+ 'child': [
+ "young child", "little kid", "small child", "baby", "toddler",
+ "young boy", "little girl", "infant", "youngster"
+ ],
+ 'teen': [
+ "teenager", "adolescent", "young person", "teenage boy",
+ "teenage girl", "youth", "high school student"
+ ],
+ 'adult': [
+ "adult man", "adult woman", "grown person", "mature person",
+ "middle-aged man", "middle-aged woman", "working adult"
+ ],
+ 'elder': [
+ "elderly person", "old man", "old woman", "senior citizen",
+ "aged person", "grandfather", "grandmother", "elderly gentleman"
+ ]
+ }
+
+ # Get descriptors
+ descriptors = []
+ descriptors.extend([mod['w'] for mod in character_data.get('mod', [])])
+ descriptors.extend([mention['n'] for mention in character_data.get('mentions', {}).get('common', [])])
+
+ if not descriptors:
+ return {"category": "unknown", "scores": {"child": 0.0, "teen": 0.0, "adult": 0.0, "elder": 0.0}}
+
+ character_description = " ".join(descriptors)
+
+ # Calculate similarities for ALL categories
+ category_scores = {}
+ best_category = "unknown"
+ best_score = 0.0
+
+ try:
+ for category, prototypes in age_prototypes.items():
+ prototype_embeddings = model.encode(prototypes)
+ char_embedding = model.encode([character_description])
+
+ similarities = model.similarity(char_embedding, prototype_embeddings)
+ max_similarity = float(similarities.max())
+
+ category_scores[category] = round(max_similarity, 3)
+
+ if max_similarity > best_score and max_similarity > 0.2:
+ best_score = max_similarity
+ best_category = category
+
+ except Exception as e:
+ print(f"Warning: Error during age inference: {e}")
+ return {"category": "unknown", "scores": {"child": 0.0, "teen": 0.0, "adult": 0.0, "elder": 0.0}}
+
+ return {"category": best_category, "scores": category_scores}
+
+ def generate_character_json(self, entities, assignments, genders, chardata, outFolder, idd):
+ """
+ Generate a JSON file with character information including TTS settings and age inference with scores
+ """
+
+ def map_gender_to_standard(gender_data):
+ """
+ Map gender inference results to 'male', 'female', or 'unknown' based on highest score
+ """
+ if gender_data is None:
+ return "unknown"
+
+ # Get the inference scores
+ inference_scores = gender_data.get("inference", {})
+
+ # Map pronoun groups to standard genders
+ gender_mapping = {
+ "he/him/his": "male",
+ "she/her": "female",
+ # Ignore other categories like "they/them/their", "xe/xem/xyr", etc.
+ }
+
+ # Find the highest scoring valid gender
+ max_score = 0.0
+ best_gender = "unknown"
+
+ for pronoun_group, score in inference_scores.items():
+ if pronoun_group in gender_mapping and score > max_score:
+ max_score = score
+ best_gender = gender_mapping[pronoun_group]
+
+ # Only return a gender if the confidence is reasonable (e.g., > 0.1)
+ if max_score > 0.1:
+ return best_gender
+ else:
+ return "unknown"
+
+ # Get canonical names for characters
+ names = {}
+ for idx, (start, end, cat, text) in enumerate(entities):
+ coref = assignments[idx]
+ if coref not in names:
+ names[coref] = Counter()
+ ner_prop = cat.split("_")[0]
+ ner_type = cat.split("_")[1]
+ if ner_prop == "PROP":
+ names[coref][text.lower()] += 10
+ elif ner_prop == "NOM":
+ names[coref][text.lower()] += 1
+ else:
+ names[coref][text.lower()] += 0.001
+
+ # Get canonical name for each character ID
+ char_names = {}
+ for coref, name_counter in names.items():
+ if name_counter:
+ char_names[coref] = name_counter.most_common(1)[0][0]
+ else:
+ char_names[coref] = f"character_{coref}"
+
+ # Build character information
+ characters_info = []
+
+ # Add narrator first
+ narrator_char = {
+ "character_id": "Narrator",
+ "canonical_name": "Narrator",
+ "normalized_name": "Narrator",
+ "inferred_gender": "unknown",
+ "gender_scores": {},
+ "inferred_age_category": "unknown",
+ "age_confidence_scores": {"child": 0.0, "teen": 0.0, "adult": 0.0, "elder": 0.0},
+ "mention_count": 0,
+ "tts_engine": "XTTSv2",
+ "language": "eng",
+ "voice": None
+ }
+ characters_info.append(narrator_char)
+
+ # Add characters from chardata
+ for character in chardata["characters"]:
+ char_id = character["id"]
+ age_result = self.infer_age_category_with_scores(character)
+ canonical_name = char_names.get(char_id, f"character_{char_id}")
+
+ # Map the gender to standard format
+ raw_gender_data = character.get("g", None)
+ standardized_gender = map_gender_to_standard(raw_gender_data)
+
+ # Preserve the original gender scores
+ gender_scores = {}
+ if raw_gender_data and "inference" in raw_gender_data:
+ gender_scores = raw_gender_data["inference"]
+
+ char_info = {
+ "character_id": char_id,
+ "canonical_name": canonical_name,
+ "normalized_name": self.normalize_character_name(canonical_name),
+ "inferred_gender": standardized_gender,
+ "gender_scores": gender_scores,
+ "inferred_age_category": age_result["category"],
+ "age_confidence_scores": age_result["scores"],
+ "mention_count": character["count"],
+ "tts_engine": "XTTSv2",
+ "language": "eng",
+ "voice": None
+ }
+ characters_info.append(char_info)
+
+ # Build the JSON structure
+ result = {
+ "metadata": {
+ "generated_by": "BookNLP",
+ "generated_at": "2025-07-18 05:43:37",
+ "generated_by_user": "DrewThomasson",
+ "document_id": idd,
+ "total_characters": len(characters_info)
+ },
+ "characters": characters_info
+ }
+
+ # Write JSON file
+ with open(join(outFolder, "%s.characters.json" % (idd)), "w", encoding="utf-8") as out:
+ json.dump(result, out, indent=2, ensure_ascii=False)
+
+ return result
+
+
+ def generate_simplified_character_json(self, entities, assignments, genders, chardata, outFolder, idd):
+ """
+ Generate a simplified JSON file with only essential character information for TTS
+ """
+
+ def map_gender_to_standard(gender_data):
+ """
+ Map gender inference results to 'male', 'female', or 'unknown' based on highest score
+ """
+ if gender_data is None:
+ return "unknown"
+
+ # Get the inference scores
+ inference_scores = gender_data.get("inference", {})
+
+ # Map pronoun groups to standard genders
+ gender_mapping = {
+ "he/him/his": "male",
+ "she/her": "female",
+ # Ignore other categories like "they/them/their", "xe/xem/xyr", etc.
+ }
+
+ # Find the highest scoring valid gender
+ max_score = 0.0
+ best_gender = "unknown"
+
+ for pronoun_group, score in inference_scores.items():
+ if pronoun_group in gender_mapping and score > max_score:
+ max_score = score
+ best_gender = gender_mapping[pronoun_group]
+
+ # Only return a gender if the confidence is reasonable (e.g., > 0.1)
+ if max_score > 0.1:
+ return best_gender
+ else:
+ return "unknown"
+
+ # Get canonical names for characters
+ names = {}
+ for idx, (start, end, cat, text) in enumerate(entities):
+ coref = assignments[idx]
+ if coref not in names:
+ names[coref] = Counter()
+ ner_prop = cat.split("_")[0]
+ ner_type = cat.split("_")[1]
+ if ner_prop == "PROP":
+ names[coref][text.lower()] += 10
+ elif ner_prop == "NOM":
+ names[coref][text.lower()] += 1
+ else:
+ names[coref][text.lower()] += 0.001
+
+ # Get canonical name for each character ID
+ char_names = {}
+ for coref, name_counter in names.items():
+ if name_counter:
+ char_names[coref] = name_counter.most_common(1)[0][0]
+ else:
+ char_names[coref] = f"character_{coref}"
+
+ # Build simplified character information
+ characters_info = []
+
+ # Add narrator first
+ narrator_char = {
+ "normalized_name": "Narrator",
+ "inferred_gender": "unknown",
+ "inferred_age_category": "unknown",
+ "tts_engine": "XTTSv2",
+ "language": "eng",
+ "voice": None
+ }
+ characters_info.append(narrator_char)
+
+ # Add characters from chardata
+ for character in chardata["characters"]:
+ char_id = character["id"]
+ age_result = self.infer_age_category_with_scores(character)
+ canonical_name = char_names.get(char_id, f"character_{char_id}")
+
+ # Map the gender to standard format
+ raw_gender_data = character.get("g", None)
+ standardized_gender = map_gender_to_standard(raw_gender_data)
+
+ char_info = {
+ "normalized_name": self.normalize_character_name(canonical_name),
+ "inferred_gender": standardized_gender,
+ "inferred_age_category": age_result["category"],
+ "tts_engine": "XTTSv2",
+ "language": "eng",
+ "voice": None
+ }
+ characters_info.append(char_info)
+
+ # Build the simplified JSON structure
+ result = {
+ "characters": characters_info
+ }
+
+ # Write simplified JSON file
+ with open(join(outFolder, "%s.characters_simple.json" % (idd)), "w", encoding="utf-8") as out:
+ json.dump(result, out, indent=2, ensure_ascii=False)
+
+ return result
+
+
+ def fix_punctuation_spacing(self, text):
+ """
+ Fix spacing around punctuation marks to follow standard English conventions
+ """
+ import re
+
+ # First pass - fix quotes more aggressively
+ # Remove ALL spaces immediately after opening quotes
+ text = re.sub(r'"\s+', '"', text)
+ text = re.sub(r"'\s+", "'", text)
+
+ # Remove ALL spaces immediately before closing quotes
+ text = re.sub(r'\s+"', '"', text)
+ text = re.sub(r"\s+'", "'", text)
+
+ # Remove spaces before common punctuation marks
+ text = re.sub(r'\s+([,.!?;:])', r'\1', text)
+
+ # Remove spaces before closing quotes, parentheses, brackets
+ text = re.sub(r'\s+(["\'\)\]\}])', r'\1', text)
+
+ # Fix contractions - remove spaces around apostrophes in contractions
+ text = re.sub(r'\s+\'\s*(\w+)', r"'\1", text)
+ text = re.sub(r'(\w+)\s+\'\s*(\w+)', r"\1'\2", text)
+
+ # Handle possessives - remove space before 's
+ text = re.sub(r'(\w+)\s+\'\s*s\b', r"\1's", text)
+
+ # Add space after punctuation if missing (but not before closing punctuation)
+ text = re.sub(r'([,.!?;:])([^\s"\'\)\]\}\n])', r'\1 \2', text)
+
+ # Handle opening parentheses, brackets - remove space after
+ text = re.sub(r'([\(\[\{])\s+', r'\1', text)
+
+ # Fix underscores (italics) - remove spaces around them but add space after closing underscore
+ text = re.sub(r'_(\w+)_(\w)', r'_\1_ \2', text)
+
+ # Fix double spaces
+ text = re.sub(r'\s{2,}', ' ', text)
+
+ return text.strip()
+
+ def generate_book_with_character_tags(self, tokens, quotes, attributed_quotations, entities, assignments, genders, chardata, outFolder, idd):
+ """
+ Generate a .book.txt file with character name tags surrounding each sentence
+ """
+
+ # Get canonical names for characters
+ names = {}
+ for idx, (start, end, cat, text) in enumerate(entities):
+ coref = assignments[idx]
+ if coref not in names:
+ names[coref] = Counter()
+ ner_prop = cat.split("_")[0]
+ ner_type = cat.split("_")[1]
+ if ner_prop == "PROP":
+ names[coref][text.lower()] += 10
+ elif ner_prop == "NOM":
+ names[coref][text.lower()] += 1
+ else:
+ names[coref][text.lower()] += 0.001
+
+ # Get canonical name for each character ID and create normalized versions
+ char_names = {}
+ normalized_char_names = {}
+ for coref, name_counter in names.items():
+ if name_counter:
+ canonical_name = name_counter.most_common(1)[0][0].title()
+ char_names[coref] = canonical_name
+ normalized_char_names[coref] = self.normalize_character_name(canonical_name)
+ else:
+ char_names[coref] = f"Character{coref}"
+ normalized_char_names[coref] = f"character{coref}"
+
+ # Add narrator to mappings
+ char_names["Narrator"] = "Narrator"
+ normalized_char_names["Narrator"] = "Narrator"
+
+ # Create mapping of token ranges to quotes and speakers
+ quote_ranges = {}
+ for idx, (start, end) in enumerate(quotes):
+ mention_id = attributed_quotations[idx]
+ if mention_id is not None:
+ speaker_id = assignments[mention_id]
+ else:
+ speaker_id = "Narrator"
+
+ for token_idx in range(start, end + 1):
+ quote_ranges[token_idx] = speaker_id
+
+ # Group tokens by sentence
+ sentences = {}
+ for token in tokens:
+ sent_id = token.sentence_id
+ if sent_id not in sentences:
+ sentences[sent_id] = []
+ sentences[sent_id].append(token)
+
+ # Build the tagged text
+ result_lines = []
+
+ for sent_id in sorted(sentences.keys()):
+ sent_tokens = sentences[sent_id]
+ sent_text = " ".join([token.text for token in sent_tokens])
+
+ # Fix punctuation spacing
+ sent_text = self.fix_punctuation_spacing(sent_text)
+
+ # Determine speaker for this sentence
+ # Check if any tokens in this sentence are part of quotes
+ speaker_ids_in_sentence = set()
+
+ for token in sent_tokens:
+ if token.token_id in quote_ranges:
+ speaker_ids_in_sentence.add(quote_ranges[token.token_id])
+
+ # If exactly one speaker, use that speaker; otherwise default to narrator
+ if len(speaker_ids_in_sentence) == 1:
+ speaker_id = list(speaker_ids_in_sentence)[0]
+ else:
+ speaker_id = "Narrator"
+
+ # Get the normalized name for the speaker (used in tags)
+ speaker_name = normalized_char_names.get(speaker_id, f"character{speaker_id}")
+
+ # Format the sentence with character name tags using normalized names
+ tagged_sentence = f"[{speaker_name}] {sent_text} [/]"
+ result_lines.append(tagged_sentence)
+
+ # Write the tagged text file
+ with open(join(outFolder, "%s.book.txt" % (idd)), "w", encoding="utf-8") as out:
+ out.write("\n".join(result_lines))
+
+ return result_lines
+
def process(self, filename, outFolder, idd):
@@ -336,7 +806,7 @@ def process(self, filename, outFolder, idd):
start_time = time.time()
originalTime=start_time
- with open(filename) as file:
+ with open(filename, encoding='utf-8') as file:
data=file.read()
if len(data) == 0:
@@ -502,6 +972,25 @@ def process(self, filename, outFolder, idd):
else:
names[coref][text.lower()]+=.001
+ # Generate character info JSON
+ print("--- generating character JSON: start ---")
+ char_start_time = time.time()
+ self.generate_character_json(entities, assignments, genders, chardata, outFolder, idd)
+ print("--- character JSON: %.3f seconds ---" % (time.time() - char_start_time))
+
+ # Generate simplified character info JSON
+ print("--- generating simplified character JSON: start ---")
+ simple_char_start_time = time.time()
+ self.generate_simplified_character_json(entities, assignments, genders, chardata, outFolder, idd)
+ print("--- simplified character JSON: %.3f seconds ---" % (time.time() - simple_char_start_time))
+
+ # Generate book with character tags
+ print("--- generating tagged book: start ---")
+ book_start_time = time.time()
+ self.generate_book_with_character_tags(tokens, quotes, attributed_quotations,
+ entities, assignments, genders, chardata,
+ outFolder, idd)
+ print("--- tagged book: %.3f seconds ---" % (time.time() - book_start_time))
with open(join(outFolder, "%s.book.html" % (idd)), "w", encoding="utf-8") as out:
out.write("")
@@ -602,6 +1091,4 @@ def process(self, filename, outFolder, idd):
out.write("")
print("--- TOTAL (excl. startup): %.3f seconds ---, %s words" % (time.time() - originalTime, len(tokens)))
- return time.time() - originalTime
-
-
+ return time.time() - originalTime
\ No newline at end of file
diff --git a/booknlp/english/entity_tagger.py b/booknlp/english/entity_tagger.py
index b5609fc..7e4fa8e 100644
--- a/booknlp/english/entity_tagger.py
+++ b/booknlp/english/entity_tagger.py
@@ -4,6 +4,7 @@
import booknlp.common.layered_reader as layered_reader
import booknlp.common.sequence_layered_reader as sequence_layered_reader
import pkg_resources
+import os
class LitBankEntityTagger:
def __init__(self, model_file, model_tagset):
@@ -13,8 +14,8 @@ def __init__(self, model_file, model_tagset):
supersenseTagset = pkg_resources.resource_filename(__name__, "data/supersense.tagset")
self.supersense_tagset=sequence_layered_reader.read_tagset(supersenseTagset)
- base_model=re.sub("google_bert", "google/bert", model_file.split("/")[-1])
- base_model=re.sub(".model", "", base_model)
+ base_model=re.sub("google_bert", "google/bert", os.path.basename(model_file))
+ base_model=re.sub("\.model$", "", base_model)
self.model = Tagger(freeze_bert=False, base_model=base_model, tagset_flat={"EVENT":1, "O":1}, supersense_tagset=self.supersense_tagset, tagset=self.tagset, device=device)
diff --git a/booknlp/english/gender_inference_model_1.py b/booknlp/english/gender_inference_model_1.py
index ce755ef..13fe924 100644
--- a/booknlp/english/gender_inference_model_1.py
+++ b/booknlp/english/gender_inference_model_1.py
@@ -163,7 +163,7 @@ def add_hyperparameters_to_counts(self, refs=None, entities=None, tokens=None):
def read_hyperparams(self, filename):
self.hyperparameters={}
- with open(filename) as file:
+ with open(filename, encoding='utf-8') as file:
header=file.readline().rstrip()
gender_mapping={}
for idx, val in enumerate(header.split("\t")[2:]):
diff --git a/booknlp/english/litbank_coref.py b/booknlp/english/litbank_coref.py
index c8707ea..fb50b11 100644
--- a/booknlp/english/litbank_coref.py
+++ b/booknlp/english/litbank_coref.py
@@ -1,4 +1,5 @@
import torch, sys, re
+import os
from booknlp.english.bert_coref_quote_pronouns import BERTCorefTagger
import numpy as np
@@ -12,8 +13,8 @@ def __init__(self, modelFile, gender_cats, pronominalCorefOnly=True):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- base_model=re.sub("google_bert", "google/bert", modelFile.split("/")[-1])
- base_model=re.sub(".model", "", base_model)
+ base_model=re.sub("google_bert", "google/bert", os.path.basename(modelFile))
+ base_model=re.sub("\.model$", "", base_model)
self.model = BERTCorefTagger(gender_cats=gender_cats, freeze_bert=True, base_model=base_model, pronominalCorefOnly=pronominalCorefOnly)
state_dict = torch.load(modelFile, map_location=device)
diff --git a/booknlp/english/litbank_quote.py b/booknlp/english/litbank_quote.py
index f8418e0..f2246bc 100644
--- a/booknlp/english/litbank_quote.py
+++ b/booknlp/english/litbank_quote.py
@@ -2,75 +2,85 @@
from collections import Counter
class QuoteTagger:
-
-
- def tag(self, toks):
-
- predictions=[]
- currentQuote=[]
- curStartTok=None
- lastPar=None
-
- quote_symbols=Counter()
-
- for tok in toks:
- if tok.text == "β" or tok.text == "β" or tok.text == "\"" or tok.text == "β":
- quote_symbols["DOUBLE_QUOTE"]+=1
- elif tok.text == "β" or tok.text == "β" or tok.text == "'":
- quote_symbols["SINGLE_QUOTE"]+=1
- elif tok.text == "β":
- quote_symbols["DASH"]+=1
-
-
- quote_symbol="DOUBLE_QUOTE"
- if len(quote_symbols) > 0:
- quote_symbol=quote_symbols.most_common()[0][0]
-
- for tok in toks:
-
- w=tok.text
-
- for w_idx, w_char in enumerate(w):
- if w_char== "β" or w_char == "β" or w_char == "\"":
- w="DOUBLE_QUOTE"
- elif w_char == "β" or w_char == "β" or w_char == "'":
- if w_idx == 0:
- suff=w[w_idx+1:]
- if suff != "s" and suff != "d" and suff != "ll" and suff != "ve":
- w="SINGLE_QUOTE"
-
- # start over at each new paragraph
- if tok.paragraph_id != lastPar and lastPar is not None:
-
- if len(currentQuote) > 0:
- predictions.append((curStartTok, tok.token_id-1))
- curStartTok=None
- currentQuote=[]
-
- if w == quote_symbol:
-
- if curStartTok is not None:
-
- if len(currentQuote) > 0:
- predictions.append((curStartTok, tok.token_id))
- currentQuote.append(tok.text)
-
- curStartTok=None
- currentQuote=[]
- else:
- curStartTok=tok.token_id
-
-
- if curStartTok is not None:
- currentQuote.append(tok.text)
-
- lastPar=tok.paragraph_id
-
- for start, end in predictions:
- for i in range(start, end+1):
- toks[i].inQuote=True
-
- return predictions
-
-
+ def tag(self, toks):
+
+ predictions = []
+ currentQuote = []
+ curStartTok = None
+ lastPar = None
+
+ quote_symbols = Counter()
+
+ # Count all possible quote types including French guillemets
+ for tok in toks:
+ if tok.text in ["β", "β", "\""]:
+ quote_symbols["DOUBLE_QUOTE"] += 1
+ elif tok.text in ["β", "β", "'"]:
+ quote_symbols["SINGLE_QUOTE"] += 1
+ elif tok.text in ["Β«", "Β»"]:
+ quote_symbols["GUILLEMET"] += 1
+ elif tok.text == "β":
+ quote_symbols["DASH"] += 1
+
+ quote_symbol = "DOUBLE_QUOTE"
+ if len(quote_symbols) > 0:
+ quote_symbol = quote_symbols.most_common()[0][0]
+
+ # Helper function to check if a token matches the chosen quote symbol
+ def is_quote_symbol(token_text, symbol):
+ if symbol == "DOUBLE_QUOTE":
+ return token_text in ["β", "β", "\""]
+ elif symbol == "SINGLE_QUOTE":
+ return token_text in ["β", "β", "'"]
+ elif symbol == "GUILLEMET":
+ return token_text in ["Β«", "Β»"]
+ elif symbol == "DASH":
+ return token_text == "β"
+ return False
+
+ for tok in toks:
+ w = tok.text
+
+ # Normalize quote symbol for this token
+ for w_idx, w_char in enumerate(w):
+ if w_char in ["β", "β", "\""]:
+ w = "DOUBLE_QUOTE"
+ elif w_char in ["β", "β", "'"]:
+ if w_idx == 0:
+ suff = w[w_idx+1:]
+ if suff not in ["s", "d", "ll", "ve"]:
+ w = "SINGLE_QUOTE"
+ elif w_char in ["Β«", "Β»"]:
+ w = "GUILLEMET"
+ elif w_char == "β":
+ w = "DASH"
+
+ # start over at each new paragraph
+ if tok.paragraph_id != lastPar and lastPar is not None:
+ if len(currentQuote) > 0:
+ predictions.append((curStartTok, tok.token_id-1))
+ curStartTok = None
+ currentQuote = []
+
+ # Detect start or end of quote
+ if is_quote_symbol(tok.text, quote_symbol):
+ if curStartTok is not None:
+ if len(currentQuote) > 0:
+ predictions.append((curStartTok, tok.token_id))
+ currentQuote.append(tok.text)
+ curStartTok = None
+ currentQuote = []
+ else:
+ curStartTok = tok.token_id
+
+ if curStartTok is not None:
+ currentQuote.append(tok.text)
+
+ lastPar = tok.paragraph_id
+
+ for start, end in predictions:
+ for i in range(start, end+1):
+ toks[i].inQuote = True
+
+ return predictions
\ No newline at end of file
diff --git a/booknlp/english/speaker_attribution.py b/booknlp/english/speaker_attribution.py
index 8021ff2..428fc42 100644
--- a/booknlp/english/speaker_attribution.py
+++ b/booknlp/english/speaker_attribution.py
@@ -16,7 +16,7 @@
PINK = '\033[95m'
ENDC = '\033[0m'
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
class BERTSpeakerID(nn.Module):
@@ -26,8 +26,8 @@ def __init__(self, base_model=None):
modelName=base_model
modelName=re.sub("^speaker_", "", modelName)
- modelName=re.sub("-v\d.*$", "", modelName)
- matcher=re.search(".*-(\d+)_H-(\d+)_A-.*", modelName)
+ modelName=re.sub(r"-v\d.*$", "", modelName)
+ matcher=re.search(r".*-(\d+)_H-(\d+)_A-.*", modelName)
bert_dim=0
modelSize=0
self.num_layers=0
@@ -43,10 +43,16 @@ def __init__(self, base_model=None):
self.tokenizer.add_tokens(["[QUOTE]", "[ALTQUOTE]", "[PAR]", "[CAP]"], special_tokens=True)
self.bert = BertModel.from_pretrained(modelName)
self.bert.resize_token_embeddings(len(self.tokenizer))
+ self.bert.to(device)
self.tanh = nn.Tanh()
self.fc = nn.Linear(2*bert_dim, 100)
self.fc2 = nn.Linear(100, 1)
+
+ # Move all components to device
+ self.tanh.to(device)
+ self.fc.to(device)
+ self.fc2.to(device)
def get_wp_position_for_all_tokens(self, words, doLowerCase=True):
@@ -236,9 +242,4 @@ def evaluate(self, dev_x_batches, dev_m_batches, dev_y_batches, dev_o_batches, e
print("Epoch %s, Quote F1: %.3f\tP: %.3f, R: %.3f" % (epoch, F, precision, recall))
print("Epoch %s, accuracy: %.3f" % (epoch, cor/tot))
- return F, cor/tot
-
-
-
-
-
+ return F, cor/tot
\ No newline at end of file
diff --git a/examples/pride.short_json/pride.book b/examples/pride.short_json/pride.book
new file mode 100644
index 0000000..09679b0
--- /dev/null
+++ b/examples/pride.short_json/pride.book
@@ -0,0 +1 @@
+{"characters": [{"agent": [{"w": "said", "i": 83}, {"w": "heard", "i": 94}, {"w": "replied", "i": 106}, {"w": "had", "i": 109}, {"w": "returned", "i": 118}, {"w": "made", "i": 141}, {"w": "want", "i": 149}, {"w": "want", "i": 167}, {"w": "have", "i": 174}, {"w": "have", "i": 174}, {"w": "know", "i": 195}, {"w": "know", "i": 363}, {"w": "talk", "i": 394}, {"w": "visit", "i": 419}, {"w": "see", "i": 430}, {"w": "go", "i": 441}, {"w": "send", "i": 446}, {"w": "send", "i": 446}, {"w": "flatter", "i": 486}, {"w": "go", "i": 559}, {"w": "see", "i": 561}, {"w": "engage", "i": 578}, {"w": "assure", "i": 582}, {"w": "know", "i": 625}, {"w": "go", "i": 635}, {"w": "do", "i": 651}, {"w": "dare", "i": 665}, {"w": "send", "i": 680}, {"w": "send", "i": 680}, {"w": "throw", "i": 706}, {"w": "do", "i": 722}, {"w": "giving", "i": 763}, {"w": "abuse", "i": 816}, {"w": "take", "i": 826}, {"w": "have", "i": 833}, {"w": "mistake", "i": 844}, {"w": "mention", "i": 869}], "patient": [{"w": "considered", "i": 63}, {"w": "told", "i": 132}, {"w": "cried", "i": 158}, {"w": "tell", "i": 169}, {"w": "like", "i": 472}, {"w": "assure", "i": 582}, {"w": "see", "i": 674}], "mod": [{"w": "tiresome", "i": 359}, {"w": "handsome", "i": 463}, {"w": "scrupulous", "i": 660}], "poss": [{"w": "entering", "i": 42}, {"w": "Bennet", "i": 80}, {"w": "Bennet", "i": 80}, {"w": "lady", "i": 85}, {"w": "wife", "i": 160}, {"w": "dear", "i": 191}, {"w": "dear", "i": 302}, {"w": "Bennet", "i": 346}, {"w": "wife", "i": 351}, {"w": "daughters", "i": 590}, {"w": "consent", "i": 692}, {"w": "Lizzy", "i": 714}, {"w": "children", "i": 819}, {"w": "nerves", "i": 857}], "id": 13, "g": {"inference": {"he/him/his": 0.875, "she/her": 0.125, "they/them/their": 0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "he/him/his", "max": 0.875, "total": 8}, "count": 69, "mentions": {"proper": [{"c": 5, "n": "Mr. Bennet"}], "common": [{"c": 3, "n": "My dear"}, {"c": 3, "n": "my dear"}, {"c": 1, "n": "a single man in possession of a good fortune"}, {"c": 1, "n": "a wife"}, {"c": 1, "n": "such a man"}, {"c": 1, "n": "his lady"}, {"c": 1, "n": "his wife"}], "pronoun": [{"c": 21, "n": "you"}, {"c": 7, "n": "You"}, {"c": 7, "n": "I"}, {"c": 4, "n": "his"}, {"c": 4, "n": "my"}, {"c": 3, "n": "your"}, {"c": 2, "n": "he"}, {"c": 2, "n": "me"}, {"c": 1, "n": "My"}, {"c": 1, "n": "him"}, {"c": 1, "n": "she"}]}}, {"agent": [{"w": "replied", "i": 349}, {"w": "thinking", "i": 367}, {"w": "have", "i": 492}, {"w": "pretend", "i": 504}, {"w": "pretend", "i": 504}, {"w": "desire", "i": 719}], "patient": [{"w": "flatter", "i": 486}, {"w": "vexing", "i": 829}], "mod": [{"w": "sure", "i": 740}], "poss": [{"w": "Bennet", "i": 346}, {"w": "dear", "i": 483}, {"w": "share", "i": 496}, {"w": "dear", "i": 554}, {"w": "nerves", "i": 839}], "id": 14, "g": null, "count": 14, "mentions": {"proper": [], "common": [{"c": 1, "n": "my dear"}, {"c": 1, "n": "his wife"}], "pronoun": [{"c": 5, "n": "I"}, {"c": 3, "n": "my"}, {"c": 2, "n": "My"}, {"c": 2, "n": "me"}]}}, {"agent": [{"w": "taken", "i": 203}, {"w": "came", "i": 219}, {"w": "agreed", "i": 243}, {"w": "agreed", "i": 243}, {"w": "marrying", "i": 370}, {"w": "fall", "i": 407}, {"w": "comes", "i": 425}], "patient": [{"w": "visit", "i": 419}], "mod": [{"w": "married", "i": 291}, {"w": "single", "i": 293}], "poss": [{"w": "servants", "i": 262}, {"w": "name", "i": 281}, {"w": "design", "i": 380}], "id": 22, "g": {"inference": {"he/him/his": 1.0, "she/her": 0, "they/them/their": 0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "he/him/his", "max": 1.0, "total": 11}, "count": 13, "mentions": {"proper": [], "common": [{"c": 1, "n": "a young man of large fortune"}, {"c": 1, "n": "A single man of large fortune"}], "pronoun": [{"c": 6, "n": "he"}, {"c": 4, "n": "his"}, {"c": 1, "n": "him"}]}}, {"agent": [{"w": "like", "i": 472}, {"w": "comes", "i": 566}, {"w": "chooses", "i": 698}], "patient": [{"w": "see", "i": 561}, {"w": "visit", "i": 647}, {"w": "assure", "i": 687}], "mod": [{"w": "glad", "i": 672}], "poss": [{"w": "marrying", "i": 695}], "id": 7, "g": {"inference": {"he/him/his": 0.811, "she/her": 0.113, "they/them/their": 0.077, "xe/xem/xyr/xir": 0.0, "ze/zem/zir/hir": 0.0}, "argmax": "he/him/his", "max": 0.811, "total": 300001.981}, "count": 9, "mentions": {"proper": [{"c": 3, "n": "Mr. Bingley"}, {"c": 1, "n": "Bingley"}], "common": [], "pronoun": [{"c": 2, "n": "he"}, {"c": 2, "n": "him"}, {"c": 1, "n": "his"}]}}, {"agent": [{"w": "go", "i": 441}, {"w": "send", "i": 446}, {"w": "have", "i": 773}], "patient": [{"w": "recommend", "i": 779}, {"w": "abuse", "i": 816}], "mod": [{"w": "silly", "i": 790}, {"w": "ignorant", "i": 792}, {"w": "friends", "i": 863}], "poss": [], "id": 29, "g": {"inference": {"he/him/his": 0, "she/her": 0, "they/them/their": 1.0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "they/them/their", "max": 1.0, "total": 5}, "count": 9, "mentions": {"proper": [], "common": [{"c": 2, "n": "the girls"}, {"c": 1, "n": "your own children"}, {"c": 1, "n": "my old friends"}], "pronoun": [{"c": 2, "n": "They"}, {"c": 2, "n": "them"}, {"c": 1, "n": "they"}]}}, {"agent": [], "patient": [{"w": "affect", "i": 338}, {"w": "send", "i": 446}], "mod": [], "poss": [], "id": 27, "g": {"inference": {"he/him/his": 0, "she/her": 0, "they/them/their": 1.0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "they/them/their", "max": 1.0, "total": 5}, "count": 7, "mentions": {"proper": [], "common": [{"c": 1, "n": "our girls"}], "pronoun": [{"c": 5, "n": "them"}, {"c": 1, "n": "themselves"}]}}, {"agent": [{"w": "replied", "i": 783}, {"w": "have", "i": 851}, {"w": "heard", "i": 867}], "patient": [{"w": "mistake", "i": 844}], "mod": [], "poss": [{"w": "dear", "i": 848}, {"w": "friends", "i": 863}], "id": 21, "g": {"inference": {"he/him/his": 1.0, "she/her": 0, "they/them/their": 0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "he/him/his", "max": 1.0, "total": 1}, "count": 6, "mentions": {"proper": [], "common": [], "pronoun": [{"c": 2, "n": "my"}, {"c": 2, "n": "I"}, {"c": 1, "n": "he"}, {"c": 1, "n": "me"}]}}, {"agent": [{"w": "has", "i": 799}], "patient": [{"w": "giving", "i": 763}], "mod": [{"w": "better", "i": 732}, {"w": "handsome", "i": 746}], "poss": [{"w": "sisters", "i": 806}], "id": 10, "g": {"inference": {"he/him/his": 0.061, "she/her": 0.757, "they/them/their": 0.061, "xe/xem/xyr/xir": 0.061, "ze/zem/zir/hir": 0.061}, "argmax": "she/her", "max": 0.757, "total": 1.644}, "count": 5, "mentions": {"proper": [{"c": 2, "n": "Lizzy"}], "common": [], "pronoun": [{"c": 2, "n": "her"}, {"c": 1, "n": "she"}]}}, {"agent": [{"w": "has", "i": 514}], "patient": [], "mod": [], "poss": [{"w": "beauty", "i": 530}], "id": 31, "g": {"inference": {"he/him/his": 0, "she/her": 1.0, "they/them/their": 0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "she/her", "max": 1.0, "total": 2}, "count": 4, "mentions": {"proper": [], "common": [{"c": 2, "n": "a woman"}], "pronoun": [{"c": 1, "n": "she"}, {"c": 1, "n": "her"}]}}, {"agent": [{"w": "visit", "i": 628}], "patient": [{"w": "has", "i": 514}, {"w": "consider", "i": 588}], "mod": [], "poss": [], "id": 32, "g": {"inference": {"he/him/his": 0, "she/her": 0, "they/them/their": 1.0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "they/them/their", "max": 1.0, "total": 2}, "count": 4, "mentions": {"proper": [], "common": [{"c": 1, "n": "five grown - up daughters"}, {"c": 1, "n": "your daughters"}], "pronoun": [{"c": 1, "n": "them"}, {"c": 1, "n": "they"}]}}, {"agent": [{"w": "told", "i": 132}, {"w": "says", "i": 199}], "patient": [], "mod": [], "poss": [], "id": 4, "g": {"inference": {"he/him/his": 0.103, "she/her": 0.831, "they/them/their": 0.066, "xe/xem/xyr/xir": 0.0, "ze/zem/zir/hir": 0.0}, "argmax": "she/her", "max": 0.831, "total": 200001.35}, "count": 3, "mentions": {"proper": [{"c": 2, "n": "Mrs. Long"}], "common": [], "pronoun": [{"c": 1, "n": "she"}]}}, {"agent": [], "patient": [], "mod": [], "poss": [{"w": "daughters", "i": 74}], "id": 16, "g": {"inference": {"he/him/his": 0, "she/her": 0, "they/them/their": 1.0, "xe/xem/xyr/xir": 0, "ze/zem/zir/hir": 0}, "argmax": "they/them/their", "max": 1.0, "total": 1}, "count": 2, "mentions": {"proper": [], "common": [{"c": 1, "n": "the surrounding families"}], "pronoun": [{"c": 1, "n": "their"}]}}]}
\ No newline at end of file
diff --git a/examples/pride.short_json/pride.book.html b/examples/pride.short_json/pride.book.html
new file mode 100644
index 0000000..e9a9ab6
--- /dev/null
+++ b/examples/pride.short_json/pride.book.html
@@ -0,0 +1,10 @@
+
+
+ Named characters
+69 Mr. Bennet (5) My dear (3)/my dear (3)/a single man in possession of a good fortune (1)/a wife (1)/such a man (1)/his lady (1)/his wife (1)
+9 Mr. Bingley (3)/Bingley (1)
+5 Lizzy (2)
+3 Mrs. Long (2)
+
+
Major entities (proper, common)
FAC
1 Netherfield Park
2 here
2 the place/the house
1 a neighbourhood
1 the neighbourhood
GPE
1 Netherfield
1 England
LOC
1 the north of England
PER
5 Mr. Bennet
4 Mr. Bingley/Bingley
2 Mrs. Long
2 Lizzy
1 Mr. Morris
1 Michaelmas
1 Sir William
1 Lady Lucas
1 Jane
1 Lydia
11 My dear/my dear/a single man in possession of a good fortune/a wife/such a man/his lady/his wife
4 the girls/your own children/my old friends
2 a young man of large fortune/A single man of large fortune
2 my dear/his wife
2 a woman
2 five grown - up daughters/your daughters
1 the surrounding families
1 some one or other of their daughters
1 their daughters
1 some of his servants
ORG
VEH
Text
+It is a truth universally acknowledged , that [a single man in possession of a good fortune]13-mr. bennet , must be in want of [a wife]13-mr. bennet . However little known the feelings or views of [such a man]13-mr. bennet may be on [his]13-mr. bennet first entering [a neighbourhood]15-a neighbourhood , this truth is so well fixed in the minds of [the surrounding families]16-the surrounding families , that [he]13-mr. bennet is considered the rightful property of [some one or other of [[their]16-the surrounding families daughters]18-their daughters]17-some one or other of their daughters . β [[My]13-mr. bennet dear]13-mr. bennet [Mr. Bennet]13-mr. bennet , β[13-mr. bennet] said [[his]13-mr. bennet lady]13-mr. bennet to [him]13-mr. bennet one day , β have [you]13-mr. bennet heard that [Netherfield Park]0-netherfield park is let at last ? β[13-mr. bennet] [Mr. Bennet]13-mr. bennet replied that [he]13-mr. bennet had not . β But it is , β[13-mr. bennet] returned [she]13-mr. bennet ; β for [Mrs. Long]4-mrs. long has just been [here]0-netherfield park , and [she]4-mrs. long told [me]13-mr. bennet all about it . β[13-mr. bennet] [Mr. Bennet]13-mr. bennet made no answer . β Do [you]13-mr. bennet not want to know who has taken it ? β[13-mr. bennet] cried [[his]13-mr. bennet wife]13-mr. bennet impatiently . β _ [You]13-mr. bennet _ want to tell [me]13-mr. bennet , and [I]13-mr. bennet have no objection to hearing it . β[13-mr. bennet] This was invitation enough . β Why , [[my]13-mr. bennet dear]13-mr. bennet , [you]13-mr. bennet must know , [Mrs. Long]4-mrs. long says that [Netherfield]1-netherfield is taken by [a young man of large fortune]22-a young man of large fortune from [the north of [England]2-england]23-the north of england ; that [he]22-a young man of large fortune came down on Monday in a chaise and four to see [the place]1-netherfield , and was so much delighted with it , that [he]22-a young man of large fortune agreed with [Mr. Morris]5-mr. morris immediately ; that [he]22-a young man of large fortune is to take possession before [Michaelmas]6-michaelmas , and [some of [[his]22-a young man of large fortune servants]25-his servants]24-some of his servants are to be in [the house]1-netherfield by the end of next week . β[13-mr. bennet] β What is [his]22-a young man of large fortune name ? β[13-mr. bennet] β [Bingley]7-mr. bingley . β[13-mr. bennet] β Is [he]22-a young man of large fortune married or single ? β[13-mr. bennet] β Oh ! Single , [[my]13-mr. bennet dear]14-my dear , to be sure ! [A single man of large fortune]22-a young man of large fortune ; four or five thousand a year . What a fine thing for [[our]26-our girls]27-our girls ! β[13-mr. bennet] β How so ? How can it affect [them]27-our girls ? β[13-mr. bennet] β [[My]14-my dear dear]13-mr. bennet [Mr. Bennet]13-mr. bennet , β[14-my dear] replied [[his]13-mr. bennet wife]14-my dear , β how can [you]13-mr. bennet be so tiresome ! [You]13-mr. bennet must know that [I]14-my dear am thinking of [his]22-a young man of large fortune marrying one of [them]27-our girls . β[14-my dear] β Is that [his]22-a young man of large fortune design in settling [here]0-netherfield park ? β[13-mr. bennet] β Design ! Nonsense , how can [you]13-mr. bennet talk so ! But it is very likely that [he]22-a young man of large fortune _ may _ fall in love with one of [them]27-our girls , and therefore [you]13-mr. bennet must visit [him]22-a young man of large fortune as soon as [he]22-a young man of large fortune comes . β[14-my dear] β [I]13-mr. bennet see no occasion for that . [You]13-mr. bennet and [the girls]29-the girls may go , or [you]13-mr. bennet may send [them]27-our girls by [themselves]27-our girls , which perhaps will be still better , for as [you]13-mr. bennet are as handsome as any of [them]27-our girls , [Mr. Bingley]7-mr. bingley may like [you]13-mr. bennet the best of [the party]30-the party . β[13-mr. bennet] β [[My]14-my dear dear]13-mr. bennet , [you]13-mr. bennet flatter [me]14-my dear . [I]14-my dear certainly _ have _ had [my]14-my dear share of beauty , but [I]14-my dear do not pretend to be anything extraordinary now . When [a woman]31-a woman has [five grown - up daughters]32-five grown - up daughters , [she]31-a woman ought to give over thinking of [her]31-a woman own beauty . β[14-my dear] β In such cases , [a woman]31-a woman has not often much beauty to think of . β[13-mr. bennet] β But , [[my]14-my dear dear]13-mr. bennet , [you]13-mr. bennet must indeed go and see [Mr. Bingley]7-mr. bingley when [he]7-mr. bingley comes into [the neighbourhood]33-the neighbourhood . β[14-my dear] β It is more than [I]13-mr. bennet engage for , [I]13-mr. bennet assure [you]13-mr. bennet . β[13-mr. bennet] β But consider [[your]13-mr. bennet daughters]32-five grown - up daughters . Only think what an establishment it would be for [one of [them]32-five grown - up daughters]34-one of them . [Sir William]8-sir william and [Lady Lucas]9-lady lucas are determined to go , merely on that account , for in general , [you]13-mr. bennet know , [they]32-five grown - up daughters visit [no newcomers]35-no newcomers . Indeed [you]13-mr. bennet must go , for it will be impossible for _ [us]36-us _ to visit [him]7-mr. bingley if [you]13-mr. bennet do not . β[14-my dear] β [You]13-mr. bennet are over - scrupulous , surely . [I]13-mr. bennet dare say [Mr. Bingley]7-mr. bingley will be very glad to see [you]13-mr. bennet ; and [I]13-mr. bennet will send a few lines by [you]13-mr. bennet to assure [him]7-mr. bingley of [my]13-mr. bennet hearty consent to [his]7-mr. bingley marrying whichever [he]7-mr. bingley chooses of [the girls]29-the girls ; though [I]13-mr. bennet must throw in a good word for [[my]13-mr. bennet little Lizzy]38-my little lizzy . β[13-mr. bennet] β [I]14-my dear desire [you]13-mr. bennet will do no such thing . [Lizzy]10-lizzy is not a bit better than the others ; and [I]14-my dear am sure [she]10-lizzy is not half so handsome as [Jane]11-jane , nor half so good - humoured as [Lydia]12-lydia . But [you]13-mr. bennet are always giving _ [her]10-lizzy _ the preference . β[14-my dear] β [They]29-the girls have none of [them]29-the girls much to recommend [them]29-the girls , β[21-my] replied [he]21-my ; β [they]29-the girls are all silly and ignorant like [other girls]39-other girls ; but [Lizzy]10-lizzy has something more of quickness than [[her]10-lizzy sisters]40-her sisters . β[21-my] β [Mr. Bennet]13-mr. bennet , how can [you]13-mr. bennet abuse [[your]13-mr. bennet own children]29-the girls in such a way ? [You]13-mr. bennet take delight in vexing [me]14-my dear . [You]13-mr. bennet have no compassion for [my]14-my dear poor nerves . β[14-my dear] β [You]13-mr. bennet mistake [me]21-my , [[my]21-my dear]13-mr. bennet . [I]21-my have a high respect for [your]13-mr. bennet nerves . [They]29-the girls are [[my]21-my old friends]29-the girls . [I]21-my have heard [you]13-mr. bennet mention them with consideration these last twenty years at least . β[21-my]
\ No newline at end of file
diff --git a/examples/pride.short_json/pride.entities b/examples/pride.short_json/pride.entities
new file mode 100644
index 0000000..d7f5a94
--- /dev/null
+++ b/examples/pride.short_json/pride.entities
@@ -0,0 +1,174 @@
+COREF start_token end_token prop cat text
+13 8 16 NOM PER a single man in possession of a good fortune
+13 23 24 NOM PER a wife
+13 34 36 NOM PER such a man
+13 40 40 PRON PER his
+15 43 44 NOM FAC a neighbourhood
+16 56 58 NOM PER the surrounding families
+13 61 61 PRON PER he
+17 68 74 NOM PER some one or other of their daughters
+16 73 73 PRON PER their
+18 73 74 NOM PER their daughters
+13 77 77 PRON PER My
+13 77 78 NOM PER My dear
+13 79 80 PROP PER Mr. Bennet
+13 84 84 PRON PER his
+13 84 85 NOM PER his lady
+13 87 87 PRON PER him
+13 93 93 PRON PER you
+0 96 97 PROP FAC Netherfield Park
+13 104 105 PROP PER Mr. Bennet
+13 108 108 PRON PER he
+13 119 119 PRON PER she
+4 123 124 PROP PER Mrs. Long
+0 128 128 NOM FAC here
+4 131 131 PRON PER she
+13 133 133 PRON PER me
+13 139 140 PROP PER Mr. Bennet
+13 147 147 PRON PER you
+13 159 159 PRON PER his
+13 159 160 NOM PER his wife
+13 165 165 PRON PER You
+13 170 170 PRON PER me
+13 173 173 PRON PER I
+13 190 190 PRON PER my
+13 190 191 NOM PER my dear
+13 193 193 PRON PER you
+4 197 198 PROP PER Mrs. Long
+1 201 201 PROP GPE Netherfield
+22 205 210 NOM PER a young man of large fortune
+23 212 215 NOM LOC the north of England
+2 215 215 PROP GPE England
+22 218 218 PRON PER he
+1 230 231 NOM FAC the place
+22 242 242 PRON PER he
+5 245 246 PROP PER Mr. Morris
+22 250 250 PRON PER he
+6 256 256 PROP PER Michaelmas
+24 259 262 NOM PER some of his servants
+22 261 261 PRON PER his
+25 261 262 NOM PER his servants
+1 267 268 NOM FAC the house
+22 280 280 PRON PER his
+7 285 285 PROP PER Bingley
+22 290 290 PRON PER he
+13 301 301 PRON PER my
+14 301 302 NOM PER my dear
+22 308 313 NOM PER A single man of large fortune
+26 327 327 PRON PER our
+27 327 328 NOM PER our girls
+27 339 339 PRON PER them
+14 343 343 PRON PER My
+13 343 344 NOM PER My dear
+13 345 346 PROP PER Mr. Bennet
+13 350 350 PRON PER his
+14 350 351 NOM PER his wife
+13 356 356 PRON PER you
+13 361 361 PRON PER You
+14 365 365 PRON PER I
+22 369 369 PRON PER his
+27 373 373 PRON PER them
+22 379 379 PRON PER his
+0 383 383 NOM FAC here
+13 393 393 PRON PER you
+22 403 403 PRON PER he
+27 413 413 PRON PER them
+13 417 417 PRON PER you
+22 420 420 PRON PER him
+22 424 424 PRON PER he
+13 429 429 PRON PER I
+13 436 436 PRON PER You
+29 438 439 NOM PER the girls
+13 444 444 PRON PER you
+27 447 447 PRON PER them
+27 449 449 PRON PER themselves
+13 460 460 PRON PER you
+27 467 467 PRON PER them
+7 469 470 PROP PER Mr. Bingley
+13 473 473 PRON PER you
+30 477 478 NOM PER the party
+14 482 482 PRON PER My
+13 482 483 NOM PER My dear
+13 485 485 PRON PER you
+14 487 487 PRON PER me
+14 489 489 PRON PER I
+14 495 495 PRON PER my
+14 501 501 PRON PER I
+31 512 513 NOM PER a woman
+32 515 519 NOM PER five grown - up daughters
+31 521 521 PRON PER she
+31 528 528 PRON PER her
+31 538 539 NOM PER a woman
+14 553 553 PRON PER my
+13 553 554 NOM PER my dear
+13 556 556 PRON PER you
+7 562 563 PROP PER Mr. Bingley
+7 565 565 PRON PER he
+33 568 569 NOM FAC the neighbourhood
+13 577 577 PRON PER I
+13 581 581 PRON PER I
+13 583 583 PRON PER you
+13 589 589 PRON PER your
+32 589 590 NOM PER your daughters
+34 601 603 NOM PER one of them
+32 603 603 PRON PER them
+8 605 606 PROP PER Sir William
+9 608 609 PROP PER Lady Lucas
+13 624 624 PRON PER you
+32 627 627 PRON PER they
+35 629 630 NOM PER no newcomers
+13 633 633 PRON PER you
+36 644 644 PRON PER us
+7 648 648 PRON PER him
+13 650 650 PRON PER you
+13 656 656 PRON PER You
+13 664 664 PRON PER I
+7 667 668 PROP PER Mr. Bingley
+13 675 675 PRON PER you
+13 678 678 PRON PER I
+13 685 685 PRON PER you
+7 688 688 PRON PER him
+13 690 690 PRON PER my
+7 694 694 PRON PER his
+7 697 697 PRON PER he
+29 700 701 NOM PER the girls
+13 704 704 PRON PER I
+13 712 712 PRON PER my
+38 712 714 NOM PER my little Lizzy
+14 718 718 PRON PER I
+13 720 720 PRON PER you
+10 727 727 PROP PER Lizzy
+14 738 738 PRON PER I
+10 741 741 PRON PER she
+11 748 748 PROP PER Jane
+12 757 757 PROP PER Lydia
+13 760 760 PRON PER you
+10 765 765 PRON PER her
+29 772 772 PRON PER They
+29 776 776 PRON PER them
+29 780 780 PRON PER them
+21 784 784 PRON PER he
+29 787 787 PRON PER they
+39 794 795 NOM PER other girls
+10 798 798 PROP PER Lizzy
+10 805 805 PRON PER her
+40 805 806 NOM PER her sisters
+13 810 811 PROP PER Mr. Bennet
+13 815 815 PRON PER you
+13 817 817 PRON PER your
+29 817 819 NOM PER your own children
+13 825 825 PRON PER You
+14 830 830 PRON PER me
+13 832 832 PRON PER You
+14 837 837 PRON PER my
+13 843 843 PRON PER You
+21 845 845 PRON PER me
+21 847 847 PRON PER my
+13 847 848 NOM PER my dear
+21 850 850 PRON PER I
+13 856 856 PRON PER your
+29 859 859 PRON PER They
+21 861 861 PRON PER my
+29 861 863 NOM PER my old friends
+21 865 865 PRON PER I
+13 868 868 PRON PER you
diff --git a/examples/pride.short_json/pride.quotes b/examples/pride.short_json/pride.quotes
new file mode 100644
index 0000000..b816d48
--- /dev/null
+++ b/examples/pride.short_json/pride.quotes
@@ -0,0 +1,29 @@
+quote_start quote_end mention_start mention_end mention_phrase char_id quote
+76 82 84 85 his lady 13 β My dear Mr. Bennet , β
+91 103 84 85 his lady 13 β have you heard that Netherfield Park is let at last ? β
+112 117 119 119 she 13 β But it is , β
+121 138 119 119 she 13 β for Mrs. Long has just been here , and she told me all about it . β
+145 157 159 160 his wife 13 β Do you not want to know who has taken it ? β
+163 181 139 140 Mr. Bennet 13 β _ You _ want to tell me , and I have no objection to hearing it . β
+187 276 139 140 Mr. Bennet 13 β Why , my dear , you must know , Mrs. Long says that Netherfield is taken by a young man of large fortune from the north of England ; that he came down on Monday in a chaise and four to see the place , and was so much delighted with it , that he agreed with Mr. Morris immediately ; that he is to take possession before Michaelmas , and some of his servants are to be in the house by the end of next week . β
+277 283 139 140 Mr. Bennet 13 β What is his name ? β
+284 287 139 140 Mr. Bennet 13 β Bingley . β
+288 295 139 140 Mr. Bennet 13 β Is he married or single ? β
+296 330 139 140 Mr. Bennet 13 β Oh ! Single , my dear , to be sure ! A single man of large fortune ; four or five thousand a year . What a fine thing for our girls ! β
+331 341 139 140 Mr. Bennet 13 β How so ? How can it affect them ? β
+342 348 350 351 his wife 14 β My dear Mr. Bennet , β
+353 375 350 351 his wife 14 β how can you be so tiresome ! You must know that I am thinking of his marrying one of them . β
+376 385 139 140 Mr. Bennet 13 β Is that his design in settling here ? β
+386 427 350 351 his wife 14 β Design ! Nonsense , how can you talk so ! But it is very likely that he _ may _ fall in love with one of them , and therefore you must visit him as soon as he comes . β
+428 480 139 140 Mr. Bennet 13 β I see no occasion for that . You and the girls may go , or you may send them by themselves , which perhaps will be still better , for as you are as handsome as any of them , Mr. Bingley may like you the best of the party . β
+481 532 350 351 his wife 14 β My dear , you flatter me . I certainly _ have _ had my share of beauty , but I do not pretend to be anything extraordinary now . When a woman has five grown - up daughters , she ought to give over thinking of her own beauty . β
+533 549 139 140 Mr. Bennet 13 β In such cases , a woman has not often much beauty to think of . β
+550 571 350 351 his wife 14 β But , my dear , you must indeed go and see Mr. Bingley when he comes into the neighbourhood . β
+572 585 139 140 Mr. Bennet 13 β It is more than I engage for , I assure you . β
+586 654 350 351 his wife 14 β But consider your daughters . Only think what an establishment it would be for one of them . Sir William and Lady Lucas are determined to go , merely on that account , for in general , you know , they visit no newcomers . Indeed you must go , for it will be impossible for _ us _ to visit him if you do not . β
+655 716 139 140 Mr. Bennet 13 β You are over - scrupulous , surely . I dare say Mr. Bingley will be very glad to see you ; and I will send a few lines by you to assure him of my hearty consent to his marrying whichever he chooses of the girls ; though I must throw in a good word for my little Lizzy . β
+717 770 350 351 his wife 14 β I desire you will do no such thing . Lizzy is not a bit better than the others ; and I am sure she is not half so handsome as Jane , nor half so good - humoured as Lydia . But you are always giving _ her _ the preference . β
+771 782 784 784 he 21 β They have none of them much to recommend them , β
+786 808 784 784 he 21 β they are all silly and ignorant like other girls ; but Lizzy has something more of quickness than her sisters . β
+809 841 350 351 his wife 14 β Mr. Bennet , how can you abuse your own children in such a way ? You take delight in vexing me . You have no compassion for my poor nerves . β
+842 880 784 784 he 21 β You mistake me , my dear . I have a high respect for your nerves . They are my old friends . I have heard you mention them with consideration these last twenty years at least . β
diff --git a/examples/pride.short_json/pride.sentences.json b/examples/pride.short_json/pride.sentences.json
new file mode 100644
index 0000000..3df6b00
--- /dev/null
+++ b/examples/pride.short_json/pride.sentences.json
@@ -0,0 +1,582 @@
+{
+ "metadata": {
+ "generated_by": "BookNLP",
+ "generated_at": "2025-07-15 20:25:56",
+ "generated_by_user": "DrewThomasson",
+ "document_id": "pride",
+ "total_sentences": 41,
+ "total_characters": 13
+ },
+ "characters": [
+ {
+ "character_id": "narrator",
+ "canonical_name": "narrator",
+ "inferred_gender": null,
+ "mention_count": 0
+ },
+ {
+ "character_id": 13,
+ "canonical_name": "mr. bennet",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0.875,
+ "she/her": 0.125,
+ "they/them/their": 0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "he/him/his",
+ "max": 0.875,
+ "total": 8
+ },
+ "mention_count": 69
+ },
+ {
+ "character_id": 14,
+ "canonical_name": "my dear",
+ "inferred_gender": null,
+ "mention_count": 14
+ },
+ {
+ "character_id": 22,
+ "canonical_name": "a young man of large fortune",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 1.0,
+ "she/her": 0,
+ "they/them/their": 0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "he/him/his",
+ "max": 1.0,
+ "total": 11
+ },
+ "mention_count": 13
+ },
+ {
+ "character_id": 7,
+ "canonical_name": "mr. bingley",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0.811,
+ "she/her": 0.113,
+ "they/them/their": 0.077,
+ "xe/xem/xyr/xir": 0.0,
+ "ze/zem/zir/hir": 0.0
+ },
+ "argmax": "he/him/his",
+ "max": 0.811,
+ "total": 300001.981
+ },
+ "mention_count": 9
+ },
+ {
+ "character_id": 29,
+ "canonical_name": "the girls",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0,
+ "she/her": 0,
+ "they/them/their": 1.0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "they/them/their",
+ "max": 1.0,
+ "total": 5
+ },
+ "mention_count": 9
+ },
+ {
+ "character_id": 27,
+ "canonical_name": "our girls",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0,
+ "she/her": 0,
+ "they/them/their": 1.0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "they/them/their",
+ "max": 1.0,
+ "total": 5
+ },
+ "mention_count": 7
+ },
+ {
+ "character_id": 21,
+ "canonical_name": "my",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 1.0,
+ "she/her": 0,
+ "they/them/their": 0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "he/him/his",
+ "max": 1.0,
+ "total": 1
+ },
+ "mention_count": 6
+ },
+ {
+ "character_id": 10,
+ "canonical_name": "lizzy",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0.061,
+ "she/her": 0.757,
+ "they/them/their": 0.061,
+ "xe/xem/xyr/xir": 0.061,
+ "ze/zem/zir/hir": 0.061
+ },
+ "argmax": "she/her",
+ "max": 0.757,
+ "total": 1.644
+ },
+ "mention_count": 5
+ },
+ {
+ "character_id": 31,
+ "canonical_name": "a woman",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0,
+ "she/her": 1.0,
+ "they/them/their": 0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "she/her",
+ "max": 1.0,
+ "total": 2
+ },
+ "mention_count": 4
+ },
+ {
+ "character_id": 32,
+ "canonical_name": "five grown - up daughters",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0,
+ "she/her": 0,
+ "they/them/their": 1.0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "they/them/their",
+ "max": 1.0,
+ "total": 2
+ },
+ "mention_count": 4
+ },
+ {
+ "character_id": 4,
+ "canonical_name": "mrs. long",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0.103,
+ "she/her": 0.831,
+ "they/them/their": 0.066,
+ "xe/xem/xyr/xir": 0.0,
+ "ze/zem/zir/hir": 0.0
+ },
+ "argmax": "she/her",
+ "max": 0.831,
+ "total": 200001.35
+ },
+ "mention_count": 3
+ },
+ {
+ "character_id": 16,
+ "canonical_name": "the surrounding families",
+ "inferred_gender": {
+ "inference": {
+ "he/him/his": 0,
+ "she/her": 0,
+ "they/them/their": 1.0,
+ "xe/xem/xyr/xir": 0,
+ "ze/zem/zir/hir": 0
+ },
+ "argmax": "they/them/their",
+ "max": 1.0,
+ "total": 1
+ },
+ "mention_count": 2
+ }
+ ],
+ "sentences": [
+ {
+ "sentence_id": 0,
+ "text": "It is a truth universally acknowledged , that a single man in possession of a good fortune , must be in want of a wife .",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 26,
+ "start_token": 0,
+ "end_token": 25
+ },
+ {
+ "sentence_id": 1,
+ "text": "However little known the feelings or views of such a man may be on his first entering a neighbourhood , this truth is so well fixed in the minds of the surrounding families , that he is considered the rightful property of some one or other of their daughters .",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 50,
+ "start_token": 26,
+ "end_token": 75
+ },
+ {
+ "sentence_id": 2,
+ "text": "β My dear Mr. Bennet , β said his lady to him one day , β have you heard that Netherfield Park is let at last ? β Mr. Bennet replied that he had not .",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 36,
+ "start_token": 76,
+ "end_token": 111
+ },
+ {
+ "sentence_id": 3,
+ "text": "β But it is , β returned she ; β for Mrs. Long has just been here , and she told me all about it . β",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 27,
+ "start_token": 112,
+ "end_token": 138
+ },
+ {
+ "sentence_id": 4,
+ "text": "Mr. Bennet made no answer .",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 6,
+ "start_token": 139,
+ "end_token": 144
+ },
+ {
+ "sentence_id": 5,
+ "text": "β Do you not want to know who has taken it ? β cried his wife impatiently .",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 18,
+ "start_token": 145,
+ "end_token": 162
+ },
+ {
+ "sentence_id": 6,
+ "text": "β _ You _ want to tell me , and I have no objection to hearing it . β",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 19,
+ "start_token": 163,
+ "end_token": 181
+ },
+ {
+ "sentence_id": 7,
+ "text": "This was invitation enough .",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 5,
+ "start_token": 182,
+ "end_token": 186
+ },
+ {
+ "sentence_id": 8,
+ "text": "β Why , my dear , you must know , Mrs. Long says that Netherfield is taken by a young man of large fortune from the north of England ; that he came down on Monday in a chaise and four to see the place , and was so much delighted with it , that he agreed with Mr. Morris immediately ; that he is to take possession before Michaelmas , and some of his servants are to be in the house by the end of next week . β",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 90,
+ "start_token": 187,
+ "end_token": 276
+ },
+ {
+ "sentence_id": 9,
+ "text": "β What is his name ? β β Bingley . β",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 11,
+ "start_token": 277,
+ "end_token": 287
+ },
+ {
+ "sentence_id": 10,
+ "text": "β Is he married or single ? β β Oh !",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 11,
+ "start_token": 288,
+ "end_token": 298
+ },
+ {
+ "sentence_id": 11,
+ "text": "Single , my dear , to be sure !",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 9,
+ "start_token": 299,
+ "end_token": 307
+ },
+ {
+ "sentence_id": 12,
+ "text": "A single man of large fortune ; four or five thousand a year .",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 14,
+ "start_token": 308,
+ "end_token": 321
+ },
+ {
+ "sentence_id": 13,
+ "text": "What a fine thing for our girls ! β β How so ?",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 13,
+ "start_token": 322,
+ "end_token": 334
+ },
+ {
+ "sentence_id": 14,
+ "text": "How can it affect them ? β β My dear Mr. Bennet , β replied his wife , β how can you be so tiresome !",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 26,
+ "start_token": 335,
+ "end_token": 360
+ },
+ {
+ "sentence_id": 15,
+ "text": "You must know that I am thinking of his marrying one of them . β β Is that his design in settling here ? β β Design !",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 28,
+ "start_token": 361,
+ "end_token": 388
+ },
+ {
+ "sentence_id": 16,
+ "text": "Nonsense , how can you talk so !",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 8,
+ "start_token": 389,
+ "end_token": 396
+ },
+ {
+ "sentence_id": 17,
+ "text": "But it is very likely that he _ may _ fall in love with one of them , and therefore you must visit him as soon as he comes . β",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 31,
+ "start_token": 397,
+ "end_token": 427
+ },
+ {
+ "sentence_id": 18,
+ "text": "β I see no occasion for that .",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 8,
+ "start_token": 428,
+ "end_token": 435
+ },
+ {
+ "sentence_id": 19,
+ "text": "You and the girls may go , or you may send them by themselves , which perhaps will be still better , for as you are as handsome as any of them , Mr. Bingley may like you the best of the party . β",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 45,
+ "start_token": 436,
+ "end_token": 480
+ },
+ {
+ "sentence_id": 20,
+ "text": "β My dear , you flatter me .",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 8,
+ "start_token": 481,
+ "end_token": 488
+ },
+ {
+ "sentence_id": 21,
+ "text": "I certainly _ have _ had my share of beauty , but I do not pretend to be anything extraordinary now .",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 22,
+ "start_token": 489,
+ "end_token": 510
+ },
+ {
+ "sentence_id": 22,
+ "text": "When a woman has five grown - up daughters , she ought to give over thinking of her own beauty . β",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 22,
+ "start_token": 511,
+ "end_token": 532
+ },
+ {
+ "sentence_id": 23,
+ "text": "β In such cases , a woman has not often much beauty to think of . β",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 17,
+ "start_token": 533,
+ "end_token": 549
+ },
+ {
+ "sentence_id": 24,
+ "text": "β But , my dear , you must indeed go and see Mr. Bingley when he comes into the neighbourhood . β",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 22,
+ "start_token": 550,
+ "end_token": 571
+ },
+ {
+ "sentence_id": 25,
+ "text": "β It is more than I engage for , I assure you . β",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 14,
+ "start_token": 572,
+ "end_token": 585
+ },
+ {
+ "sentence_id": 26,
+ "text": "β But consider your daughters .",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 6,
+ "start_token": 586,
+ "end_token": 591
+ },
+ {
+ "sentence_id": 27,
+ "text": "Only think what an establishment it would be for one of them .",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 13,
+ "start_token": 592,
+ "end_token": 604
+ },
+ {
+ "sentence_id": 28,
+ "text": "Sir William and Lady Lucas are determined to go , merely on that account , for in general , you know , they visit no newcomers .",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 27,
+ "start_token": 605,
+ "end_token": 631
+ },
+ {
+ "sentence_id": 29,
+ "text": "Indeed you must go , for it will be impossible for _ us _ to visit him if you do not . β",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 23,
+ "start_token": 632,
+ "end_token": 654
+ },
+ {
+ "sentence_id": 30,
+ "text": "β You are over - scrupulous , surely .",
+ "speaker": "mr. bennet",
+ "speaker_id": 13,
+ "token_count": 9,
+ "start_token": 655,
+ "end_token": 663
+ },
+ {
+ "sentence_id": 31,
+ "text": "I dare say Mr. Bingley will be very glad to see you ; and I will send a few lines by you to assure him of my hearty consent to his marrying whichever he chooses of the girls ; though I must throw in a good word for my little Lizzy . β β I desire you will do no such thing .",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 63,
+ "start_token": 664,
+ "end_token": 726
+ },
+ {
+ "sentence_id": 32,
+ "text": "Lizzy is not a bit better than the others ; and I am sure she is not half so handsome as Jane , nor half so good - humoured as Lydia .",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 32,
+ "start_token": 727,
+ "end_token": 758
+ },
+ {
+ "sentence_id": 33,
+ "text": "But you are always giving _ her _ the preference . β",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 12,
+ "start_token": 759,
+ "end_token": 770
+ },
+ {
+ "sentence_id": 34,
+ "text": "β They have none of them much to recommend them , β replied he ; β they are all silly and ignorant like other girls ; but Lizzy has something more of quickness than her sisters . β β Mr. Bennet , how can you abuse your own children in such a way ?",
+ "speaker": "narrator",
+ "speaker_id": "narrator",
+ "token_count": 54,
+ "start_token": 771,
+ "end_token": 824
+ },
+ {
+ "sentence_id": 35,
+ "text": "You take delight in vexing me .",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 7,
+ "start_token": 825,
+ "end_token": 831
+ },
+ {
+ "sentence_id": 36,
+ "text": "You have no compassion for my poor nerves . β",
+ "speaker": "my dear",
+ "speaker_id": 14,
+ "token_count": 10,
+ "start_token": 832,
+ "end_token": 841
+ },
+ {
+ "sentence_id": 37,
+ "text": "β You mistake me , my dear .",
+ "speaker": "my",
+ "speaker_id": 21,
+ "token_count": 8,
+ "start_token": 842,
+ "end_token": 849
+ },
+ {
+ "sentence_id": 38,
+ "text": "I have a high respect for your nerves .",
+ "speaker": "my",
+ "speaker_id": 21,
+ "token_count": 9,
+ "start_token": 850,
+ "end_token": 858
+ },
+ {
+ "sentence_id": 39,
+ "text": "They are my old friends .",
+ "speaker": "my",
+ "speaker_id": 21,
+ "token_count": 6,
+ "start_token": 859,
+ "end_token": 864
+ },
+ {
+ "sentence_id": 40,
+ "text": "I have heard you mention them with consideration these last twenty years at least . β",
+ "speaker": "my",
+ "speaker_id": 21,
+ "token_count": 16,
+ "start_token": 865,
+ "end_token": 880
+ }
+ ]
+}
\ No newline at end of file
diff --git a/examples/pride.short_json/pride.supersense b/examples/pride.short_json/pride.supersense
new file mode 100644
index 0000000..a9a24c5
--- /dev/null
+++ b/examples/pride.short_json/pride.supersense
@@ -0,0 +1,208 @@
+start_token end_token supersense_category text
+1 1 verb.stative is
+3 3 noun.cognition truth
+5 5 verb.cognition acknowledged
+10 10 noun.person man
+16 16 noun.possession fortune
+19 19 verb.stative be
+21 21 noun.state want
+24 24 noun.person wife
+30 30 noun.feeling feelings
+32 32 noun.cognition views
+36 36 noun.person man
+38 38 verb.stative be
+42 42 verb.motion entering
+44 44 noun.location neighbourhood
+47 47 noun.cognition truth
+51 51 verb.cognition fixed
+54 54 noun.cognition minds
+58 58 noun.group families
+63 63 verb.cognition considered
+66 66 noun.possession property
+74 74 noun.person daughters
+79 80 noun.person Mr. Bennet
+83 83 verb.communication said
+85 85 noun.person lady
+89 89 noun.time day
+94 94 verb.cognition heard
+96 97 noun.location Netherfield Park
+99 99 verb.social let
+104 105 noun.person Mr. Bennet
+106 106 verb.communication replied
+115 115 verb.stative is
+118 118 verb.communication returned
+123 124 noun.person Mrs. Long
+127 127 verb.stative been
+132 132 verb.communication told
+139 140 noun.person Mr. Bennet
+141 141 verb.cognition made
+143 143 noun.communication answer
+149 149 verb.emotion want
+151 151 verb.cognition know
+154 154 verb.possession taken
+158 158 verb.communication cried
+160 160 noun.person wife
+167 167 verb.emotion want
+169 169 verb.communication tell
+174 174 verb.possession have
+176 176 noun.communication objection
+178 178 verb.perception hearing
+183 183 verb.stative was
+191 191 noun.person dear
+195 195 verb.cognition know
+197 198 noun.person Mrs. Long
+199 199 verb.communication says
+201 201 noun.location Netherfield
+203 203 verb.possession taken
+207 207 noun.person man
+210 210 noun.possession fortune
+213 213 noun.location north
+215 215 noun.location England
+219 219 verb.motion came
+222 222 noun.time Monday
+225 225 noun.artifact chaise
+229 229 verb.perception see
+231 231 noun.location place
+234 234 verb.stative was
+237 237 verb.emotion delighted
+243 243 verb.communication agreed
+245 246 noun.person Mr. Morris
+253 253 verb.possession take
+254 254 noun.act possession
+256 256 noun.person Michaelmas
+262 262 noun.person servants
+265 265 verb.stative be
+268 268 noun.artifact house
+271 271 noun.time end
+274 274 noun.time week
+279 279 verb.stative is
+281 281 noun.communication name
+285 285 noun.person Bingley
+302 302 noun.person dear
+310 310 noun.person man
+313 313 noun.attribute fortune
+320 320 noun.time year
+325 325 noun.artifact thing
+328 328 noun.person girls
+338 338 verb.change affect
+344 346 noun.person dear Mr. Bennet
+349 349 verb.communication replied
+351 351 noun.person wife
+363 363 verb.cognition know
+367 368 verb.cognition thinking of
+370 370 verb.social marrying
+380 380 noun.act design
+382 382 verb.stative settling
+387 387 noun.communication Design
+389 389 noun.communication Nonsense
+394 394 verb.communication talk
+399 399 verb.stative is
+407 409 verb.change fall in love
+419 419 verb.social visit
+425 425 verb.motion comes
+430 430 verb.cognition see
+432 432 noun.event occasion
+439 439 noun.person girls
+441 441 verb.motion go
+446 446 verb.communication send
+454 454 verb.stative be
+461 461 verb.stative are
+469 470 noun.person Mr. Bingley
+472 472 verb.emotion like
+478 478 noun.group party
+483 483 noun.person dear
+486 486 verb.communication flatter
+494 494 verb.possession had
+496 496 noun.possession share
+498 498 noun.attribute beauty
+504 504 verb.communication pretend
+506 506 verb.stative be
+513 513 noun.person woman
+514 514 verb.possession has
+519 519 noun.person daughters
+524 525 verb.possession give over
+526 527 verb.cognition thinking of
+530 530 noun.attribute beauty
+536 536 noun.event cases
+539 539 noun.person woman
+540 540 verb.possession has
+544 544 noun.attribute beauty
+546 547 verb.cognition think of
+554 554 noun.person dear
+559 559 verb.motion go
+561 561 verb.perception see
+562 563 noun.person Mr. Bingley
+566 566 verb.motion comes
+569 569 noun.location neighbourhood
+574 574 verb.stative is
+578 578 verb.consumption engage
+582 582 verb.communication assure
+588 588 verb.cognition consider
+590 590 noun.person daughters
+593 593 verb.cognition think
+596 596 noun.artifact establishment
+599 599 verb.stative be
+605 606 noun.person Sir William
+608 609 noun.person Lady Lucas
+613 613 verb.motion go
+625 625 verb.cognition know
+628 628 verb.social visit
+630 630 noun.person newcomers
+635 635 verb.motion go
+640 640 verb.stative be
+647 647 verb.social visit
+657 657 verb.stative are
+666 666 verb.communication say
+667 668 noun.person Mr. Bingley
+670 670 verb.stative be
+674 674 verb.social see
+680 680 verb.communication send
+683 683 noun.communication lines
+687 687 verb.communication assure
+692 692 noun.communication consent
+695 695 noun.act marrying
+698 698 verb.cognition chooses
+701 701 noun.person girls
+706 707 verb.cognition throw in
+710 710 noun.communication word
+714 714 noun.person Lizzy
+719 719 verb.emotion desire
+722 722 verb.social do
+727 727 noun.person Lizzy
+728 728 verb.stative is
+739 739 verb.stative am
+742 742 verb.stative is
+748 748 noun.person Jane
+757 757 noun.person Lydia
+763 763 verb.possession giving
+768 768 noun.communication preference
+773 773 verb.possession have
+779 779 verb.communication recommend
+783 783 verb.communication replied
+788 788 verb.stative are
+795 795 noun.person girls
+798 798 noun.person Lizzy
+799 799 verb.possession has
+803 803 noun.cognition quickness
+806 806 noun.person sisters
+810 811 noun.person Mr. Bennet
+816 816 verb.social abuse
+819 819 noun.person children
+823 823 noun.attribute way
+826 826 verb.cognition take
+827 827 noun.feeling delight
+829 829 verb.communication vexing
+833 833 verb.possession have
+835 835 noun.feeling compassion
+839 839 noun.state nerves
+844 844 verb.communication mistake
+848 848 noun.person dear
+851 851 verb.possession have
+854 854 noun.cognition respect
+857 857 noun.state nerves
+860 860 verb.stative are
+863 863 noun.person friends
+867 867 verb.perception heard
+869 869 verb.communication mention
+872 872 noun.cognition consideration
+876 876 noun.time years
diff --git a/examples/pride.short_json/pride.tokens b/examples/pride.short_json/pride.tokens
new file mode 100644
index 0000000..c34be2e
--- /dev/null
+++ b/examples/pride.short_json/pride.tokens
@@ -0,0 +1,882 @@
+paragraph_ID sentence_ID token_ID_within_sentence token_ID_within_document word lemma byte_onset byte_offset POS_tag fine_POS_tag dependency_relation syntactic_head_ID event
+0 0 0 0 It it 6 8 PRON PRP nsubj 1 O
+0 0 1 1 is be 9 11 AUX VBZ ROOT 1 O
+0 0 2 2 a a 12 13 DET DT det 3 O
+0 0 3 3 truth truth 14 19 NOUN NN attr 1 O
+0 0 4 4 universally universally 20 31 ADV RB advmod 5 O
+0 0 5 5 acknowledged acknowledge 32 44 VERB VBD acl 3 O
+0 0 6 6 , , 44 45 PUNCT , punct 3 O
+0 0 7 7 that that 46 50 SCONJ IN mark 19 O
+0 0 8 8 a a 51 52 DET DT det 10 O
+0 0 9 9 single single 53 59 ADJ JJ amod 10 O
+0 0 10 10 man man 60 63 NOUN NN nsubj 19 O
+0 0 11 11 in in 64 66 ADP IN prep 10 O
+0 0 12 12 possession possession 73 83 NOUN NN pobj 11 O
+0 0 13 13 of of 84 86 ADP IN prep 12 O
+0 0 14 14 a a 87 88 DET DT det 16 O
+0 0 15 15 good good 89 93 ADJ JJ amod 16 O
+0 0 16 16 fortune fortune 94 101 NOUN NN pobj 13 O
+0 0 17 17 , , 101 102 PUNCT , punct 10 O
+0 0 18 18 must must 103 107 AUX MD aux 19 O
+0 0 19 19 be be 108 110 AUX VB ccomp 1 O
+0 0 20 20 in in 111 113 ADP IN prep 19 O
+0 0 21 21 want want 114 118 NOUN NN pobj 20 O
+0 0 22 22 of of 119 121 ADP IN prep 21 O
+0 0 23 23 a a 122 123 DET DT det 24 O
+0 0 24 24 wife wife 124 128 NOUN NN pobj 22 O
+0 0 25 25 . . 128 129 PUNCT . punct 1 O
+1 1 0 26 However however 137 144 ADV RB advmod 28 O
+1 1 1 27 little little 145 151 ADJ JJ advmod 28 O
+1 1 2 28 known know 152 157 VERB VBD csubj 38 O
+1 1 3 29 the the 158 161 DET DT det 30 O
+1 1 4 30 feelings feeling 162 170 NOUN NNS dobj 28 O
+1 1 5 31 or or 171 173 CCONJ CC cc 30 O
+1 1 6 32 views view 174 179 NOUN NNS conj 30 O
+1 1 7 33 of of 180 182 ADP IN prep 30 O
+1 1 8 34 such such 183 187 DET PDT predet 36 O
+1 1 9 35 a a 188 189 DET DT det 36 O
+1 1 10 36 man man 190 193 NOUN NN pobj 33 O
+1 1 11 37 may may 194 197 AUX MD aux 38 O
+1 1 12 38 be be 198 200 AUX VB ccomp 48 O
+1 1 13 39 on on 207 209 ADP IN prep 38 O
+1 1 14 40 his his 210 213 PRON PRP$ poss 42 O
+1 1 15 41 first first 214 219 ADJ JJ advmod 42 O
+1 1 16 42 entering enter 220 228 VERB VBG pobj 39 O
+1 1 17 43 a a 229 230 DET DT det 44 O
+1 1 18 44 neighbourhood neighbourhood 231 244 NOUN NN dobj 42 O
+1 1 19 45 , , 244 245 PUNCT , punct 48 O
+1 1 20 46 this this 246 250 DET DT det 47 O
+1 1 21 47 truth truth 251 256 NOUN NN nsubj 48 O
+1 1 22 48 is be 257 259 AUX VBZ ROOT 48 O
+1 1 23 49 so so 260 262 ADV RB advmod 50 O
+1 1 24 50 well well 263 267 ADV RB advmod 51 O
+1 1 25 51 fixed fix 274 279 VERB VBN acomp 48 O
+1 1 26 52 in in 280 282 ADP IN prep 51 O
+1 1 27 53 the the 283 286 DET DT det 54 O
+1 1 28 54 minds mind 287 292 NOUN NNS pobj 52 O
+1 1 29 55 of of 293 295 ADP IN prep 54 O
+1 1 30 56 the the 296 299 DET DT det 58 O
+1 1 31 57 surrounding surround 300 311 VERB VBG amod 58 O
+1 1 32 58 families family 312 320 NOUN NNS pobj 55 O
+1 1 33 59 , , 320 321 PUNCT , punct 48 O
+1 1 34 60 that that 322 326 SCONJ IN mark 63 O
+1 1 35 61 he he 327 329 PRON PRP nsubjpass 63 O
+1 1 36 62 is be 330 332 AUX VBZ auxpass 63 O
+1 1 37 63 considered consider 339 349 VERB VBN advcl 48 O
+1 1 38 64 the the 350 353 DET DT det 66 O
+1 1 39 65 rightful rightful 354 362 ADJ JJ amod 66 O
+1 1 40 66 property property 363 371 NOUN NN oprd 63 O
+1 1 41 67 of of 372 374 ADP IN prep 66 O
+1 1 42 68 some some 375 379 DET DT det 69 O
+1 1 43 69 one one 380 383 NUM CD pobj 67 O
+1 1 44 70 or or 384 386 CCONJ CC cc 69 O
+1 1 45 71 other other 387 392 ADJ JJ conj 69 O
+1 1 46 72 of of 393 395 ADP IN prep 69 O
+1 1 47 73 their their 396 401 PRON PRP$ poss 74 O
+1 1 48 74 daughters daughter 408 417 NOUN NNS pobj 72 O
+1 1 49 75 . . 417 418 PUNCT . punct 48 O
+2 2 0 76 β " 426 427 PUNCT `` punct 83 O
+2 2 1 77 My my 427 429 PRON PRP$ poss 80 O
+2 2 2 78 dear dear 430 434 ADJ JJ amod 80 O
+2 2 3 79 Mr. Mr. 435 438 PROPN NNP compound 80 O
+2 2 4 80 Bennet Bennet 439 445 PROPN NNP npadvmod 83 O
+2 2 5 81 , , 445 446 PUNCT , punct 83 O
+2 2 6 82 β " 446 447 PUNCT '' punct 83 O
+2 2 7 83 said say 448 452 VERB VBD parataxis 94 EVENT
+2 2 8 84 his his 453 456 PRON PRP$ poss 85 O
+2 2 9 85 lady lady 457 461 NOUN NN nsubj 83 O
+2 2 10 86 to to 462 464 ADP IN prep 83 O
+2 2 11 87 him he 465 468 PRON PRP pobj 86 O
+2 2 12 88 one one 469 472 NUM CD nummod 89 O
+2 2 13 89 day day 473 476 NOUN NN npadvmod 83 O
+2 2 14 90 , , 476 477 PUNCT , punct 83 O
+2 2 15 91 β " 478 479 PUNCT `` punct 94 O
+2 2 16 92 have have 479 483 AUX VBP aux 94 O
+2 2 17 93 you you 484 487 PRON PRP nsubj 94 O
+2 2 18 94 heard hear 494 499 VERB VBD ccomp 106 O
+2 2 19 95 that that 500 504 SCONJ IN mark 99 O
+2 2 20 96 Netherfield Netherfield 505 516 PROPN NNP compound 97 O
+2 2 21 97 Park Park 517 521 PROPN NNP nsubjpass 99 O
+2 2 22 98 is be 522 524 AUX VBZ auxpass 99 O
+2 2 23 99 let let 525 528 VERB VBN ccomp 94 O
+2 2 24 100 at at 529 531 ADP IN prep 99 O
+2 2 25 101 last last 532 536 ADJ JJ pcomp 100 O
+2 2 26 102 ? ? 536 537 PUNCT . punct 106 O
+2 2 27 103 β " 537 538 PUNCT '' punct 106 O
+3 2 28 104 Mr. Mr. 546 549 PROPN NNP compound 105 O
+3 2 29 105 Bennet Bennet 550 556 PROPN NNP nsubj 106 O
+3 2 30 106 replied reply 557 564 VERB VBD ROOT 106 EVENT
+3 2 31 107 that that 565 569 SCONJ IN mark 109 O
+3 2 32 108 he he 570 572 PRON PRP nsubj 109 O
+3 2 33 109 had have 573 576 VERB VBD ccomp 106 O
+3 2 34 110 not not 577 580 PART RB neg 109 O
+3 2 35 111 . . 580 581 PUNCT . punct 106 O
+4 3 0 112 β " 589 590 PUNCT `` punct 115 O
+4 3 1 113 But but 590 593 CCONJ CC cc 115 O
+4 3 2 114 it it 594 596 PRON PRP nsubj 115 O
+4 3 3 115 is be 597 599 AUX VBZ ccomp 118 O
+4 3 4 116 , , 599 600 PUNCT , punct 118 O
+4 3 5 117 β " 600 601 PUNCT '' punct 118 O
+4 3 6 118 returned return 602 610 VERB VBD ccomp 127 EVENT
+4 3 7 119 she she 611 614 PRON PRP nsubj 118 O
+4 3 8 120 ; ; 614 615 PUNCT : punct 127 O
+4 3 9 121 β " 616 617 PUNCT `` punct 127 O
+4 3 10 122 for for 617 620 SCONJ IN mark 127 O
+4 3 11 123 Mrs. Mrs. 621 625 PROPN NNP compound 124 O
+4 3 12 124 Long Long 626 630 PROPN NNP nsubj 127 O
+4 3 13 125 has have 631 634 AUX VBZ aux 127 O
+4 3 14 126 just just 635 639 ADV RB advmod 127 O
+4 3 15 127 been be 640 644 AUX VBN ROOT 127 O
+4 3 16 128 here here 645 649 ADV RB advmod 127 EVENT
+4 3 17 129 , , 649 650 PUNCT , punct 127 O
+4 3 18 130 and and 651 654 CCONJ CC cc 127 O
+4 3 19 131 she she 661 664 PRON PRP nsubj 132 O
+4 3 20 132 told tell 665 669 VERB VBD conj 127 EVENT
+4 3 21 133 me I 670 672 PRON PRP dobj 132 O
+4 3 22 134 all all 673 676 PRON DT advmod 135 O
+4 3 23 135 about about 677 682 ADP IN prep 132 O
+4 3 24 136 it it 683 685 PRON PRP pobj 135 O
+4 3 25 137 . . 685 686 PUNCT . punct 132 O
+4 3 26 138 β " 686 687 PUNCT '' punct 132 O
+5 4 0 139 Mr. Mr. 695 698 PROPN NNP compound 140 O
+5 4 1 140 Bennet Bennet 699 705 PROPN NNP nsubj 141 O
+5 4 2 141 made make 706 710 VERB VBD ROOT 141 O
+5 4 3 142 no no 711 713 DET DT det 143 O
+5 4 4 143 answer answer 714 720 NOUN NN dobj 141 O
+5 4 5 144 . . 720 721 PUNCT . punct 141 O
+6 5 0 145 β " 729 730 PUNCT `` punct 158 O
+6 5 1 146 Do do 730 732 AUX VBP aux 149 O
+6 5 2 147 you you 733 736 PRON PRP nsubj 149 O
+6 5 3 148 not not 737 740 PART RB neg 149 O
+6 5 4 149 want want 741 745 VERB VB ccomp 158 O
+6 5 5 150 to to 746 748 PART TO aux 151 O
+6 5 6 151 know know 749 753 VERB VB xcomp 149 O
+6 5 7 152 who who 754 757 PRON WP nsubj 154 O
+6 5 8 153 has have 758 761 AUX VBZ aux 154 O
+6 5 9 154 taken take 762 767 VERB VBN ccomp 151 O
+6 5 10 155 it it 768 770 PRON PRP dobj 154 O
+6 5 11 156 ? ? 770 771 PUNCT . punct 158 O
+6 5 12 157 β " 771 772 PUNCT '' punct 158 O
+6 5 13 158 cried cry 773 778 VERB VBD ROOT 158 EVENT
+6 5 14 159 his his 779 782 PRON PRP$ poss 160 O
+6 5 15 160 wife wife 783 787 NOUN NN dobj 158 O
+6 5 16 161 impatiently impatiently 794 805 ADV RB advmod 158 O
+6 5 17 162 . . 805 806 PUNCT . punct 158 O
+7 6 0 163 β " 814 815 PUNCT `` punct 167 O
+7 6 1 164 _ _ 815 816 PUNCT NFP dep 167 O
+7 6 2 165 You you 816 819 PRON PRP nsubj 167 O
+7 6 3 166 _ _ 819 820 PRON DT appos 165 O
+7 6 4 167 want want 821 825 VERB VBP ROOT 167 O
+7 6 5 168 to to 826 828 PART TO aux 169 O
+7 6 6 169 tell tell 829 833 VERB VB xcomp 167 O
+7 6 7 170 me I 834 836 PRON PRP dobj 169 O
+7 6 8 171 , , 836 837 PUNCT , punct 167 O
+7 6 9 172 and and 838 841 CCONJ CC cc 167 O
+7 6 10 173 I I 842 843 PRON PRP nsubj 174 O
+7 6 11 174 have have 844 848 VERB VBP conj 167 O
+7 6 12 175 no no 849 851 DET DT det 176 O
+7 6 13 176 objection objection 852 861 NOUN NN dobj 174 O
+7 6 14 177 to to 862 864 ADP IN prep 176 O
+7 6 15 178 hearing hear 865 872 VERB VBG pcomp 177 O
+7 6 16 179 it it 873 875 PRON PRP dobj 178 O
+7 6 17 180 . . 875 876 PUNCT . punct 174 O
+7 6 18 181 β " 876 877 PUNCT '' punct 174 O
+8 7 0 182 This this 885 889 PRON DT nsubj 183 O
+8 7 1 183 was be 890 893 AUX VBD ROOT 183 O
+8 7 2 184 invitation invitation 894 904 NOUN NN attr 183 O
+8 7 3 185 enough enough 905 911 ADV RB advmod 184 O
+8 7 4 186 . . 911 912 PUNCT . punct 183 O
+9 8 0 187 β " 920 921 PUNCT `` punct 199 O
+9 8 1 188 Why why 921 924 SCONJ WRB advmod 195 O
+9 8 2 189 , , 924 925 PUNCT , punct 195 O
+9 8 3 190 my my 926 928 PRON PRP$ poss 191 O
+9 8 4 191 dear dear 929 933 NOUN NN npadvmod 195 O
+9 8 5 192 , , 933 934 PUNCT , punct 195 O
+9 8 6 193 you you 935 938 PRON PRP nsubj 195 O
+9 8 7 194 must must 939 943 AUX MD aux 195 O
+9 8 8 195 know know 944 948 VERB VB ccomp 199 O
+9 8 9 196 , , 948 949 PUNCT , punct 199 O
+9 8 10 197 Mrs. Mrs. 950 954 PROPN NNP compound 198 O
+9 8 11 198 Long Long 955 959 PROPN NNP nsubj 199 O
+9 8 12 199 says say 960 964 VERB VBZ ROOT 199 EVENT
+9 8 13 200 that that 965 969 SCONJ IN mark 203 O
+9 8 14 201 Netherfield Netherfield 970 981 PROPN NNP nsubjpass 203 O
+9 8 15 202 is be 982 984 AUX VBZ auxpass 203 O
+9 8 16 203 taken take 991 996 VERB VBN ccomp 199 O
+9 8 17 204 by by 997 999 ADP IN agent 203 O
+9 8 18 205 a a 1000 1001 DET DT det 207 O
+9 8 19 206 young young 1002 1007 ADJ JJ amod 207 O
+9 8 20 207 man man 1008 1011 NOUN NN pobj 204 O
+9 8 21 208 of of 1012 1014 ADP IN prep 207 O
+9 8 22 209 large large 1015 1020 ADJ JJ amod 210 O
+9 8 23 210 fortune fortune 1021 1028 NOUN NN pobj 208 O
+9 8 24 211 from from 1029 1033 ADP IN prep 203 O
+9 8 25 212 the the 1034 1037 DET DT det 213 O
+9 8 26 213 north north 1038 1043 NOUN NN pobj 211 O
+9 8 27 214 of of 1044 1046 ADP IN prep 213 O
+9 8 28 215 England England 1047 1054 PROPN NNP pobj 214 O
+9 8 29 216 ; ; 1054 1055 PUNCT : punct 203 O
+9 8 30 217 that that 1062 1066 SCONJ IN mark 219 O
+9 8 31 218 he he 1067 1069 PRON PRP nsubj 219 O
+9 8 32 219 came come 1070 1074 VERB VBD conj 203 EVENT
+9 8 33 220 down down 1075 1079 ADP RP prt 219 O
+9 8 34 221 on on 1080 1082 ADP IN prep 219 O
+9 8 35 222 Monday Monday 1083 1089 PROPN NNP pobj 221 O
+9 8 36 223 in in 1090 1092 ADP IN prep 219 O
+9 8 37 224 a a 1093 1094 DET DT det 225 O
+9 8 38 225 chaise chaise 1095 1101 NOUN NN pobj 223 O
+9 8 39 226 and and 1102 1105 CCONJ CC cc 219 O
+9 8 40 227 four four 1106 1110 NUM CD conj 219 O
+9 8 41 228 to to 1111 1113 PART TO aux 229 O
+9 8 42 229 see see 1114 1117 VERB VB advcl 219 O
+9 8 43 230 the the 1118 1121 DET DT det 231 O
+9 8 44 231 place place 1128 1133 NOUN NN dobj 229 O
+9 8 45 232 , , 1133 1134 PUNCT , punct 219 O
+9 8 46 233 and and 1135 1138 CCONJ CC cc 219 O
+9 8 47 234 was be 1139 1142 AUX VBD conj 219 O
+9 8 48 235 so so 1143 1145 ADV RB advmod 236 O
+9 8 49 236 much much 1146 1150 ADV RB advmod 237 O
+9 8 50 237 delighted delighted 1151 1160 ADJ JJ acomp 234 EVENT
+9 8 51 238 with with 1161 1165 ADP IN prep 237 O
+9 8 52 239 it it 1166 1168 PRON PRP pobj 238 O
+9 8 53 240 , , 1168 1169 PUNCT , punct 234 O
+9 8 54 241 that that 1170 1174 SCONJ IN mark 243 O
+9 8 55 242 he he 1175 1177 PRON PRP nsubj 243 O
+9 8 56 243 agreed agree 1178 1184 VERB VBD conj 219 EVENT
+9 8 57 244 with with 1185 1189 ADP IN prep 243 O
+9 8 58 245 Mr. Mr. 1190 1193 PROPN NNP compound 246 O
+9 8 59 246 Morris Morris 1200 1206 PROPN NNP pobj 244 O
+9 8 60 247 immediately immediately 1207 1218 ADV RB advmod 243 O
+9 8 61 248 ; ; 1218 1219 PUNCT : punct 243 O
+9 8 62 249 that that 1220 1224 SCONJ IN mark 251 O
+9 8 63 250 he he 1225 1227 PRON PRP nsubj 251 O
+9 8 64 251 is be 1228 1230 AUX VBZ conj 243 O
+9 8 65 252 to to 1231 1233 PART TO aux 253 O
+9 8 66 253 take take 1234 1238 VERB VB xcomp 251 O
+9 8 67 254 possession possession 1239 1249 NOUN NN dobj 253 O
+9 8 68 255 before before 1250 1256 ADP IN prep 253 O
+9 8 69 256 Michaelmas Michaelmas 1263 1273 PROPN NNP pobj 255 O
+9 8 70 257 , , 1273 1274 PUNCT , punct 251 O
+9 8 71 258 and and 1275 1278 CCONJ CC cc 251 O
+9 8 72 259 some some 1279 1283 PRON DT nsubj 263 O
+9 8 73 260 of of 1284 1286 ADP IN prep 259 O
+9 8 74 261 his his 1287 1290 PRON PRP$ poss 262 O
+9 8 75 262 servants servant 1291 1299 NOUN NNS pobj 260 O
+9 8 76 263 are be 1300 1303 AUX VBP conj 251 O
+9 8 77 264 to to 1304 1306 PART TO aux 265 O
+9 8 78 265 be be 1307 1309 AUX VB xcomp 263 O
+9 8 79 266 in in 1310 1312 ADP IN prep 265 O
+9 8 80 267 the the 1313 1316 DET DT det 268 O
+9 8 81 268 house house 1317 1322 NOUN NN pobj 266 O
+9 8 82 269 by by 1323 1325 ADP IN prep 265 O
+9 8 83 270 the the 1332 1335 DET DT det 271 O
+9 8 84 271 end end 1336 1339 NOUN NN pobj 269 O
+9 8 85 272 of of 1340 1342 ADP IN prep 271 O
+9 8 86 273 next next 1343 1347 ADJ JJ amod 274 O
+9 8 87 274 week week 1348 1352 NOUN NN pobj 272 O
+9 8 88 275 . . 1352 1353 PUNCT . punct 199 O
+9 8 89 276 β " 1353 1354 PUNCT '' punct 199 O
+10 9 0 277 β " 1362 1363 PUNCT `` punct 279 O
+10 9 1 278 What what 1363 1367 PRON WP attr 279 O
+10 9 2 279 is be 1368 1370 AUX VBZ ROOT 279 O
+10 9 3 280 his his 1371 1374 PRON PRP$ poss 281 O
+10 9 4 281 name name 1375 1379 NOUN NN nsubj 279 O
+10 9 5 282 ? ? 1379 1380 PUNCT . punct 279 O
+10 9 6 283 β " 1380 1381 PUNCT '' punct 279 O
+11 9 7 284 β " 1389 1390 PUNCT `` punct 285 O
+11 9 8 285 Bingley Bingley 1390 1397 PROPN NNP nsubj 279 O
+11 9 9 286 . . 1397 1398 PUNCT . punct 279 O
+11 9 10 287 β " 1398 1399 PUNCT '' punct 279 O
+12 10 0 288 β " 1407 1408 PUNCT `` punct 289 O
+12 10 1 289 Is be 1408 1410 AUX VBZ ROOT 289 O
+12 10 2 290 he he 1411 1413 PRON PRP nsubj 289 O
+12 10 3 291 married married 1414 1421 ADJ JJ acomp 289 O
+12 10 4 292 or or 1422 1424 CCONJ CC cc 291 O
+12 10 5 293 single single 1425 1431 ADJ JJ conj 291 O
+12 10 6 294 ? ? 1431 1432 PUNCT . punct 289 O
+12 10 7 295 β " 1432 1433 PUNCT '' punct 289 O
+13 10 8 296 β " 1441 1442 PUNCT `` punct 289 O
+13 10 9 297 Oh oh 1442 1444 INTJ UH intj 289 O
+13 10 10 298 ! ! 1444 1445 PUNCT . punct 289 O
+13 11 0 299 Single single 1446 1452 ADJ JJ ROOT 299 O
+13 11 1 300 , , 1452 1453 PUNCT , punct 299 O
+13 11 2 301 my my 1454 1456 PRON PRP$ poss 302 O
+13 11 3 302 dear dear 1457 1461 NOUN NN appos 299 O
+13 11 4 303 , , 1461 1462 PUNCT , punct 299 O
+13 11 5 304 to to 1463 1465 PART TO aux 305 O
+13 11 6 305 be be 1466 1468 AUX VB relcl 299 O
+13 11 7 306 sure sure 1469 1473 ADJ JJ acomp 305 O
+13 11 8 307 ! ! 1473 1474 PUNCT . punct 299 O
+13 12 0 308 A a 1475 1476 DET DT det 310 O
+13 12 1 309 single single 1477 1483 ADJ JJ amod 310 O
+13 12 2 310 man man 1484 1487 NOUN NN ROOT 310 O
+13 12 3 311 of of 1488 1490 ADP IN prep 310 O
+13 12 4 312 large large 1491 1496 ADJ JJ amod 313 O
+13 12 5 313 fortune fortune 1497 1504 NOUN NN pobj 311 O
+13 12 6 314 ; ; 1504 1505 PUNCT : punct 310 O
+13 12 7 315 four four 1512 1516 NUM CD appos 310 O
+13 12 8 316 or or 1517 1519 CCONJ CC cc 315 O
+13 12 9 317 five five 1520 1524 NUM CD compound 318 O
+13 12 10 318 thousand thousand 1525 1533 NUM CD conj 315 O
+13 12 11 319 a a 1534 1535 DET DT det 320 O
+13 12 12 320 year year 1536 1540 NOUN NN npadvmod 318 O
+13 12 13 321 . . 1540 1541 PUNCT . punct 310 O
+13 13 0 322 What what 1542 1546 PRON WP det 325 O
+13 13 1 323 a a 1547 1548 DET DT det 325 O
+13 13 2 324 fine fine 1549 1553 ADJ JJ amod 325 O
+13 13 3 325 thing thing 1554 1559 NOUN NN attr 333 O
+13 13 4 326 for for 1560 1563 ADP IN prep 325 O
+13 13 5 327 our our 1564 1567 PRON PRP$ poss 328 O
+13 13 6 328 girls girl 1568 1573 NOUN NNS pobj 326 O
+13 13 7 329 ! ! 1573 1574 PUNCT . punct 325 O
+13 13 8 330 β " 1574 1575 PUNCT '' punct 325 O
+14 13 9 331 β " 1583 1584 PUNCT `` punct 333 O
+14 13 10 332 How how 1584 1587 SCONJ WRB advmod 333 O
+14 13 11 333 so so 1588 1590 ADV RB ROOT 333 O
+14 13 12 334 ? ? 1590 1591 PUNCT . punct 333 O
+14 14 0 335 How how 1592 1595 SCONJ WRB advmod 338 O
+14 14 1 336 can can 1596 1599 AUX MD aux 338 O
+14 14 2 337 it it 1600 1602 PRON PRP nsubj 338 O
+14 14 3 338 affect affect 1603 1609 VERB VB ccomp 349 O
+14 14 4 339 them they 1610 1614 PRON PRP dobj 338 O
+14 14 5 340 ? ? 1614 1615 PUNCT . punct 338 O
+14 14 6 341 β " 1615 1616 PUNCT '' punct 338 O
+15 14 7 342 β " 1624 1625 PUNCT `` punct 338 O
+15 14 8 343 My my 1625 1627 PRON PRP$ poss 346 O
+15 14 9 344 dear dear 1628 1632 ADJ JJ amod 346 O
+15 14 10 345 Mr. Mr. 1633 1636 PROPN NNP compound 346 O
+15 14 11 346 Bennet Bennet 1637 1643 PROPN NNP npadvmod 338 O
+15 14 12 347 , , 1643 1644 PUNCT , punct 349 O
+15 14 13 348 β " 1644 1645 PUNCT '' punct 349 O
+15 14 14 349 replied reply 1646 1653 VERB VBD ROOT 349 EVENT
+15 14 15 350 his his 1654 1657 PRON PRP$ poss 351 O
+15 14 16 351 wife wife 1658 1662 NOUN NN nsubj 349 O
+15 14 17 352 , , 1662 1663 PUNCT , punct 349 O
+15 14 18 353 β " 1664 1665 PUNCT `` punct 349 O
+15 14 19 354 how how 1665 1668 SCONJ WRB advmod 357 O
+15 14 20 355 can can 1669 1672 AUX MD aux 357 O
+15 14 21 356 you you 1673 1676 PRON PRP nsubj 357 O
+15 14 22 357 be be 1677 1679 AUX VB ccomp 349 O
+15 14 23 358 so so 1680 1682 ADV RB advmod 359 O
+15 14 24 359 tiresome tiresome 1689 1697 ADJ JJ acomp 357 O
+15 14 25 360 ! ! 1697 1698 PUNCT . punct 349 O
+15 15 0 361 You you 1699 1702 PRON PRP nsubj 363 O
+15 15 1 362 must must 1703 1707 AUX MD aux 363 O
+15 15 2 363 know know 1708 1712 VERB VB ROOT 363 O
+15 15 3 364 that that 1713 1717 SCONJ IN mark 367 O
+15 15 4 365 I I 1718 1719 PRON PRP nsubj 367 O
+15 15 5 366 am be 1720 1722 AUX VBP aux 367 O
+15 15 6 367 thinking think 1723 1731 VERB VBG ccomp 363 O
+15 15 7 368 of of 1732 1734 ADP IN prep 367 O
+15 15 8 369 his his 1735 1738 PRON PRP$ nsubj 370 O
+15 15 9 370 marrying marry 1739 1747 VERB VBG pcomp 368 O
+15 15 10 371 one one 1748 1751 NUM CD dobj 370 O
+15 15 11 372 of of 1752 1754 ADP IN prep 371 O
+15 15 12 373 them they 1761 1765 PRON PRP pobj 372 O
+15 15 13 374 . . 1765 1766 PUNCT . punct 363 O
+15 15 14 375 β " 1766 1767 PUNCT '' punct 363 O
+16 15 15 376 β " 1775 1776 PUNCT `` punct 377 O
+16 15 16 377 Is be 1776 1778 AUX VBZ ccomp 363 O
+16 15 17 378 that that 1779 1783 PRON DT nsubj 377 O
+16 15 18 379 his his 1784 1787 PRON PRP$ poss 380 O
+16 15 19 380 design design 1788 1794 NOUN NN attr 377 O
+16 15 20 381 in in 1795 1797 ADP IN prep 380 O
+16 15 21 382 settling settle 1798 1806 VERB VBG pcomp 381 O
+16 15 22 383 here here 1807 1811 ADV RB advmod 382 O
+16 15 23 384 ? ? 1811 1812 PUNCT . punct 377 O
+16 15 24 385 β " 1812 1813 PUNCT '' punct 377 O
+17 15 25 386 β " 1821 1822 PUNCT `` punct 387 O
+17 15 26 387 Design Design 1822 1828 PROPN NNP attr 377 O
+17 15 27 388 ! ! 1828 1829 PUNCT . punct 363 O
+17 16 0 389 Nonsense nonsense 1830 1838 NOUN NN npadvmod 394 O
+17 16 1 390 , , 1838 1839 PUNCT , punct 394 O
+17 16 2 391 how how 1840 1843 SCONJ WRB advmod 394 O
+17 16 3 392 can can 1844 1847 AUX MD aux 394 O
+17 16 4 393 you you 1848 1851 PRON PRP nsubj 394 O
+17 16 5 394 talk talk 1852 1856 VERB VB ROOT 394 O
+17 16 6 395 so so 1857 1859 ADV RB advmod 394 O
+17 16 7 396 ! ! 1859 1860 PUNCT . punct 394 O
+17 17 0 397 But but 1861 1864 CCONJ CC cc 399 O
+17 17 1 398 it it 1865 1867 PRON PRP nsubj 399 O
+17 17 2 399 is be 1868 1870 AUX VBZ ROOT 399 O
+17 17 3 400 very very 1871 1875 ADV RB advmod 401 O
+17 17 4 401 likely likely 1876 1882 ADJ JJ acomp 399 O
+17 17 5 402 that that 1889 1893 SCONJ IN mark 407 O
+17 17 6 403 he he 1894 1896 PRON PRP nsubj 407 O
+17 17 7 404 _ _ 1897 1898 PRON PRP punct 403 O
+17 17 8 405 may may 1898 1901 AUX MD aux 407 O
+17 17 9 406 _ _ 1901 1902 PRON DT nsubj 407 O
+17 17 10 407 fall fall 1903 1907 VERB VB ccomp 401 O
+17 17 11 408 in in 1908 1910 ADP IN prep 407 O
+17 17 12 409 love love 1911 1915 NOUN NN pobj 408 O
+17 17 13 410 with with 1916 1920 ADP IN prep 407 O
+17 17 14 411 one one 1921 1924 NUM CD pobj 410 O
+17 17 15 412 of of 1925 1927 ADP IN prep 411 O
+17 17 16 413 them they 1928 1932 PRON PRP pobj 412 O
+17 17 17 414 , , 1932 1933 PUNCT , punct 399 O
+17 17 18 415 and and 1934 1937 CCONJ CC cc 399 O
+17 17 19 416 therefore therefore 1938 1947 ADV RB advmod 419 O
+17 17 20 417 you you 1948 1951 PRON PRP nsubj 419 O
+17 17 21 418 must must 1958 1962 AUX MD aux 419 O
+17 17 22 419 visit visit 1963 1968 VERB VB conj 399 O
+17 17 23 420 him he 1969 1972 PRON PRP dobj 419 O
+17 17 24 421 as as 1973 1975 ADV RB advmod 422 O
+17 17 25 422 soon soon 1976 1980 ADV RB advmod 419 O
+17 17 26 423 as as 1981 1983 SCONJ IN mark 425 O
+17 17 27 424 he he 1984 1986 PRON PRP nsubj 425 O
+17 17 28 425 comes come 1987 1992 VERB VBZ advcl 422 O
+17 17 29 426 . . 1992 1993 PUNCT . punct 419 O
+17 17 30 427 β " 1993 1994 PUNCT '' punct 419 O
+18 18 0 428 β " 2002 2003 PUNCT `` punct 430 O
+18 18 1 429 I I 2003 2004 PRON PRP nsubj 430 O
+18 18 2 430 see see 2005 2008 VERB VBP ROOT 430 O
+18 18 3 431 no no 2009 2011 DET DT det 432 O
+18 18 4 432 occasion occasion 2012 2020 NOUN NN dobj 430 O
+18 18 5 433 for for 2021 2024 ADP IN prep 432 O
+18 18 6 434 that that 2025 2029 PRON DT pobj 433 O
+18 18 7 435 . . 2029 2030 PUNCT . punct 430 O
+18 19 0 436 You you 2031 2034 PRON PRP nsubj 441 O
+18 19 1 437 and and 2035 2038 CCONJ CC cc 436 O
+18 19 2 438 the the 2039 2042 DET DT det 439 O
+18 19 3 439 girls girl 2043 2048 NOUN NNS conj 436 O
+18 19 4 440 may may 2049 2052 AUX MD aux 441 O
+18 19 5 441 go go 2053 2055 VERB VB ccomp 472 O
+18 19 6 442 , , 2055 2056 PUNCT , punct 441 O
+18 19 7 443 or or 2057 2059 CCONJ CC cc 441 O
+18 19 8 444 you you 2060 2063 PRON PRP nsubj 446 O
+18 19 9 445 may may 2064 2067 AUX MD aux 446 O
+18 19 10 446 send send 2074 2078 VERB VB conj 441 O
+18 19 11 447 them they 2079 2083 PRON PRP dobj 446 O
+18 19 12 448 by by 2084 2086 ADP IN prep 446 O
+18 19 13 449 themselves themselves 2087 2097 PRON PRP pobj 448 O
+18 19 14 450 , , 2097 2098 PUNCT , punct 446 O
+18 19 15 451 which which 2099 2104 PRON WDT nsubj 454 O
+18 19 16 452 perhaps perhaps 2105 2112 ADV RB advmod 454 O
+18 19 17 453 will will 2113 2117 AUX MD aux 454 O
+18 19 18 454 be be 2118 2120 AUX VB relcl 447 O
+18 19 19 455 still still 2121 2126 ADV RB advmod 454 O
+18 19 20 456 better well 2127 2133 ADJ JJR acomp 454 O
+18 19 21 457 , , 2133 2134 PUNCT , punct 454 O
+18 19 22 458 for for 2135 2138 ADP IN prep 454 O
+18 19 23 459 as as 2145 2147 SCONJ IN mark 461 O
+18 19 24 460 you you 2148 2151 PRON PRP nsubj 461 O
+18 19 25 461 are be 2152 2155 AUX VBP advcl 454 O
+18 19 26 462 as as 2156 2158 ADV RB advmod 463 O
+18 19 27 463 handsome handsome 2159 2167 ADJ JJ acomp 461 O
+18 19 28 464 as as 2168 2170 ADP IN prep 463 O
+18 19 29 465 any any 2171 2174 PRON DT pobj 464 O
+18 19 30 466 of of 2175 2177 ADP IN prep 465 O
+18 19 31 467 them they 2178 2182 PRON PRP pobj 466 O
+18 19 32 468 , , 2182 2183 PUNCT , punct 472 O
+18 19 33 469 Mr. Mr. 2184 2187 PROPN NNP compound 470 O
+18 19 34 470 Bingley Bingley 2188 2195 PROPN NNP nsubj 472 O
+18 19 35 471 may may 2196 2199 AUX MD aux 472 O
+18 19 36 472 like like 2200 2204 VERB VB ROOT 472 O
+18 19 37 473 you you 2205 2208 PRON PRP dobj 472 O
+18 19 38 474 the the 2215 2218 DET DT det 475 O
+18 19 39 475 best good 2219 2223 ADJ JJS dobj 472 O
+18 19 40 476 of of 2224 2226 ADP IN prep 475 O
+18 19 41 477 the the 2227 2230 DET DT det 478 O
+18 19 42 478 party party 2231 2236 NOUN NN pobj 476 O
+18 19 43 479 . . 2236 2237 PUNCT . punct 472 O
+18 19 44 480 β " 2237 2238 PUNCT '' punct 472 O
+19 20 0 481 β " 2246 2247 PUNCT `` punct 486 O
+19 20 1 482 My my 2247 2249 PRON PRP$ poss 483 O
+19 20 2 483 dear dear 2250 2254 NOUN NN npadvmod 486 O
+19 20 3 484 , , 2254 2255 PUNCT , punct 486 O
+19 20 4 485 you you 2256 2259 PRON PRP nsubj 486 O
+19 20 5 486 flatter flatter 2260 2267 VERB VBD ROOT 486 O
+19 20 6 487 me I 2268 2270 PRON PRP dobj 486 O
+19 20 7 488 . . 2270 2271 PUNCT . punct 486 O
+19 21 0 489 I I 2272 2273 PRON PRP nsubj 492 O
+19 21 1 490 certainly certainly 2274 2283 ADV RB advmod 491 O
+19 21 2 491 _ _ 2284 2285 VERB VBP appos 489 O
+19 21 3 492 have have 2285 2289 VERB VBP ROOT 492 O
+19 21 4 493 _ _ 2289 2290 NOUN NN dobj 492 O
+19 21 5 494 had have 2291 2294 AUX VBD dep 492 O
+19 21 6 495 my my 2295 2297 PRON PRP$ poss 496 O
+19 21 7 496 share share 2298 2303 NOUN NN dobj 494 O
+19 21 8 497 of of 2304 2306 ADP IN prep 496 O
+19 21 9 498 beauty beauty 2313 2319 NOUN NN pobj 497 O
+19 21 10 499 , , 2319 2320 PUNCT , punct 492 O
+19 21 11 500 but but 2321 2324 CCONJ CC cc 492 O
+19 21 12 501 I I 2325 2326 PRON PRP nsubj 504 O
+19 21 13 502 do do 2327 2329 AUX VBP aux 504 O
+19 21 14 503 not not 2330 2333 PART RB neg 504 O
+19 21 15 504 pretend pretend 2334 2341 VERB VB conj 492 O
+19 21 16 505 to to 2342 2344 PART TO aux 506 O
+19 21 17 506 be be 2345 2347 AUX VB xcomp 504 O
+19 21 18 507 anything anything 2348 2356 PRON NN attr 506 O
+19 21 19 508 extraordinary extraordinary 2357 2370 ADJ JJ amod 507 O
+19 21 20 509 now now 2371 2374 ADV RB advmod 506 O
+19 21 21 510 . . 2374 2375 PUNCT . punct 504 O
+19 22 0 511 When when 2382 2386 SCONJ WRB advmod 514 O
+19 22 1 512 a a 2387 2388 DET DT det 513 O
+19 22 2 513 woman woman 2389 2394 NOUN NN nsubj 514 O
+19 22 3 514 has have 2395 2398 VERB VBZ advcl 522 O
+19 22 4 515 five five 2399 2403 NUM CD nummod 519 O
+19 22 5 516 grown grow 2404 2409 VERB VBN amod 519 O
+19 22 6 517 - - 2409 2410 PUNCT HYPH punct 516 O
+19 22 7 518 up up 2410 2412 ADP RP prt 516 O
+19 22 8 519 daughters daughter 2413 2422 NOUN NNS dobj 514 O
+19 22 9 520 , , 2422 2423 PUNCT , punct 522 O
+19 22 10 521 she she 2424 2427 PRON PRP nsubj 522 O
+19 22 11 522 ought ought 2428 2433 AUX MD ROOT 522 O
+19 22 12 523 to to 2434 2436 PART TO aux 524 O
+19 22 13 524 give give 2437 2441 VERB VB xcomp 522 O
+19 22 14 525 over over 2442 2446 ADP RP prt 524 O
+19 22 15 526 thinking thinking 2453 2461 NOUN NN dobj 524 O
+19 22 16 527 of of 2462 2464 ADP IN prep 526 O
+19 22 17 528 her her 2465 2468 PRON PRP$ poss 530 O
+19 22 18 529 own own 2469 2472 ADJ JJ amod 530 O
+19 22 19 530 beauty beauty 2473 2479 NOUN NN pobj 527 O
+19 22 20 531 . . 2479 2480 PUNCT . punct 522 O
+19 22 21 532 β " 2480 2481 PUNCT '' punct 522 O
+20 23 0 533 β " 2489 2490 PUNCT `` punct 544 O
+20 23 1 534 In in 2490 2492 ADP IN prep 544 O
+20 23 2 535 such such 2493 2497 ADJ JJ amod 536 O
+20 23 3 536 cases case 2498 2503 NOUN NNS pobj 534 O
+20 23 4 537 , , 2503 2504 PUNCT , punct 544 O
+20 23 5 538 a a 2505 2506 DET DT det 539 O
+20 23 6 539 woman woman 2507 2512 NOUN NN nsubj 544 O
+20 23 7 540 has have 2513 2516 AUX VBZ aux 544 O
+20 23 8 541 not not 2517 2520 PART RB neg 543 O
+20 23 9 542 often often 2521 2526 ADV RB advmod 543 O
+20 23 10 543 much much 2527 2531 ADJ JJ amod 544 O
+20 23 11 544 beauty beauty 2532 2538 NOUN NN ROOT 544 O
+20 23 12 545 to to 2539 2541 PART TO aux 546 O
+20 23 13 546 think think 2542 2547 VERB VB advcl 544 O
+20 23 14 547 of of 2548 2550 ADP IN prep 546 O
+20 23 15 548 . . 2550 2551 PUNCT . punct 544 O
+20 23 16 549 β " 2551 2552 PUNCT '' punct 544 O
+21 24 0 550 β " 2560 2561 PUNCT `` punct 559 O
+21 24 1 551 But but 2561 2564 CCONJ CC cc 559 O
+21 24 2 552 , , 2564 2565 PUNCT , punct 559 O
+21 24 3 553 my my 2566 2568 PRON PRP$ poss 554 O
+21 24 4 554 dear dear 2569 2573 NOUN NN npadvmod 559 O
+21 24 5 555 , , 2573 2574 PUNCT , punct 559 O
+21 24 6 556 you you 2575 2578 PRON PRP nsubj 559 O
+21 24 7 557 must must 2579 2583 AUX MD aux 559 O
+21 24 8 558 indeed indeed 2584 2590 ADV RB advmod 559 O
+21 24 9 559 go go 2591 2593 VERB VB ROOT 559 O
+21 24 10 560 and and 2594 2597 CCONJ CC cc 559 O
+21 24 11 561 see see 2598 2601 VERB VB conj 559 O
+21 24 12 562 Mr. Mr. 2602 2605 PROPN NNP compound 563 O
+21 24 13 563 Bingley Bingley 2606 2613 PROPN NNP dobj 561 O
+21 24 14 564 when when 2614 2618 SCONJ WRB advmod 566 O
+21 24 15 565 he he 2619 2621 PRON PRP nsubj 566 O
+21 24 16 566 comes come 2628 2633 VERB VBZ advcl 561 O
+21 24 17 567 into into 2634 2638 ADP IN prep 566 O
+21 24 18 568 the the 2639 2642 DET DT det 569 O
+21 24 19 569 neighbourhood neighbourhood 2643 2656 NOUN NN pobj 567 O
+21 24 20 570 . . 2656 2657 PUNCT . punct 559 O
+21 24 21 571 β " 2657 2658 PUNCT '' punct 559 O
+22 25 0 572 β " 2666 2667 PUNCT `` punct 582 O
+22 25 1 573 It it 2667 2669 PRON PRP nsubj 574 O
+22 25 2 574 is be 2670 2672 AUX VBZ ccomp 582 O
+22 25 3 575 more more 2673 2677 ADJ JJR acomp 574 O
+22 25 4 576 than than 2678 2682 SCONJ IN mark 578 O
+22 25 5 577 I I 2683 2684 PRON PRP nsubj 578 O
+22 25 6 578 engage engage 2685 2691 VERB VBP advcl 575 O
+22 25 7 579 for for 2692 2695 ADP IN prep 578 O
+22 25 8 580 , , 2695 2696 PUNCT , punct 582 O
+22 25 9 581 I I 2697 2698 PRON PRP nsubj 582 O
+22 25 10 582 assure assure 2699 2705 VERB VBP ROOT 582 EVENT
+22 25 11 583 you you 2706 2709 PRON PRP dobj 582 O
+22 25 12 584 . . 2709 2710 PUNCT . punct 582 O
+22 25 13 585 β " 2710 2711 PUNCT '' punct 582 O
+23 26 0 586 β " 2719 2720 PUNCT `` punct 588 O
+23 26 1 587 But but 2720 2723 CCONJ CC cc 588 O
+23 26 2 588 consider consider 2724 2732 VERB VB ROOT 588 O
+23 26 3 589 your your 2733 2737 PRON PRP$ poss 590 O
+23 26 4 590 daughters daughter 2738 2747 NOUN NNS dobj 588 O
+23 26 5 591 . . 2747 2748 PUNCT . punct 588 O
+23 27 0 592 Only only 2749 2753 ADV RB advmod 593 O
+23 27 1 593 think think 2754 2759 VERB VB ROOT 593 O
+23 27 2 594 what what 2760 2764 PRON WP det 596 O
+23 27 3 595 an an 2765 2767 DET DT det 596 O
+23 27 4 596 establishment establishment 2768 2781 NOUN NN attr 599 O
+23 27 5 597 it it 2782 2784 PRON PRP nsubj 599 O
+23 27 6 598 would would 2791 2796 AUX MD aux 599 O
+23 27 7 599 be be 2797 2799 AUX VB ccomp 593 O
+23 27 8 600 for for 2800 2803 ADP IN prep 599 O
+23 27 9 601 one one 2804 2807 NUM CD pobj 600 O
+23 27 10 602 of of 2808 2810 ADP IN prep 601 O
+23 27 11 603 them they 2811 2815 PRON PRP pobj 602 O
+23 27 12 604 . . 2815 2816 PUNCT . punct 593 O
+23 28 0 605 Sir Sir 2817 2820 PROPN NNP compound 606 O
+23 28 1 606 William William 2821 2828 PROPN NNP nsubj 610 O
+23 28 2 607 and and 2829 2832 CCONJ CC cc 606 O
+23 28 3 608 Lady Lady 2833 2837 PROPN NNP compound 609 O
+23 28 4 609 Lucas Lucas 2838 2843 PROPN NNP conj 606 O
+23 28 5 610 are be 2844 2847 AUX VBP ROOT 610 O
+23 28 6 611 determined determined 2854 2864 ADJ JJ acomp 610 O
+23 28 7 612 to to 2865 2867 PART TO aux 613 O
+23 28 8 613 go go 2868 2870 VERB VB xcomp 611 O
+23 28 9 614 , , 2870 2871 PUNCT , punct 610 O
+23 28 10 615 merely merely 2872 2878 ADV RB advmod 616 O
+23 28 11 616 on on 2879 2881 ADP IN prep 610 O
+23 28 12 617 that that 2882 2886 DET DT det 618 O
+23 28 13 618 account account 2887 2894 NOUN NN pobj 616 O
+23 28 14 619 , , 2894 2895 PUNCT , punct 610 O
+23 28 15 620 for for 2896 2899 ADP IN prep 610 O
+23 28 16 621 in in 2900 2902 ADP IN prep 620 O
+23 28 17 622 general general 2903 2910 ADJ JJ amod 621 O
+23 28 18 623 , , 2910 2911 PUNCT , punct 625 O
+23 28 19 624 you you 2912 2915 PRON PRP nsubj 625 O
+23 28 20 625 know know 2922 2926 VERB VBP parataxis 628 O
+23 28 21 626 , , 2926 2927 PUNCT , punct 628 O
+23 28 22 627 they they 2928 2932 PRON PRP nsubj 628 O
+23 28 23 628 visit visit 2933 2938 VERB VBP conj 610 O
+23 28 24 629 no no 2939 2941 DET DT det 630 O
+23 28 25 630 newcomers newcomer 2942 2951 NOUN NNS dobj 628 O
+23 28 26 631 . . 2951 2952 PUNCT . punct 628 O
+23 29 0 632 Indeed indeed 2953 2959 ADV RB advmod 635 O
+23 29 1 633 you you 2960 2963 PRON PRP nsubj 635 O
+23 29 2 634 must must 2964 2968 AUX MD aux 635 O
+23 29 3 635 go go 2969 2971 VERB VB ROOT 635 O
+23 29 4 636 , , 2971 2972 PUNCT , punct 635 O
+23 29 5 637 for for 2973 2976 SCONJ IN mark 640 O
+23 29 6 638 it it 2977 2979 PRON PRP nsubj 640 O
+23 29 7 639 will will 2980 2984 AUX MD aux 640 O
+23 29 8 640 be be 2985 2987 AUX VB advcl 635 O
+23 29 9 641 impossible impossible 2994 3004 ADJ JJ acomp 640 O
+23 29 10 642 for for 3005 3008 ADP IN prep 640 O
+23 29 11 643 _ _ 3009 3010 PUNCT NFP pobj 642 O
+23 29 12 644 us we 3010 3012 PRON PRP attr 640 O
+23 29 13 645 _ _ 3012 3013 NOUN NN attr 640 O
+23 29 14 646 to to 3014 3016 PART TO aux 647 O
+23 29 15 647 visit visit 3017 3022 VERB VB xcomp 640 O
+23 29 16 648 him he 3023 3026 PRON PRP dobj 647 O
+23 29 17 649 if if 3027 3029 SCONJ IN mark 651 O
+23 29 18 650 you you 3030 3033 PRON PRP nsubj 651 O
+23 29 19 651 do do 3034 3036 VERB VBP advcl 635 O
+23 29 20 652 not not 3037 3040 PART RB neg 651 O
+23 29 21 653 . . 3040 3041 PUNCT . punct 635 O
+23 29 22 654 β " 3041 3042 PUNCT '' punct 635 O
+24 30 0 655 β " 3050 3051 PUNCT `` punct 657 O
+24 30 1 656 You you 3051 3054 PRON PRP nsubj 657 O
+24 30 2 657 are be 3055 3058 AUX VBP ROOT 657 O
+24 30 3 658 over over 3059 3063 ADV RB advmod 660 O
+24 30 4 659 - - 3063 3064 PUNCT HYPH punct 660 O
+24 30 5 660 scrupulous scrupulous 3064 3074 ADJ JJ acomp 657 O
+24 30 6 661 , , 3074 3075 PUNCT , punct 657 O
+24 30 7 662 surely surely 3076 3082 ADV RB advmod 657 O
+24 30 8 663 . . 3082 3083 PUNCT . punct 657 O
+24 31 0 664 I I 3084 3085 PRON PRP nsubj 665 O
+24 31 1 665 dare dare 3086 3090 VERB VBP ROOT 665 O
+24 31 2 666 say say 3091 3094 VERB VB xcomp 665 O
+24 31 3 667 Mr. Mr. 3095 3098 PROPN NNP compound 668 O
+24 31 4 668 Bingley Bingley 3099 3106 PROPN NNP nsubj 670 O
+24 31 5 669 will will 3107 3111 AUX MD aux 670 O
+24 31 6 670 be be 3112 3114 AUX VB ccomp 666 O
+24 31 7 671 very very 3121 3125 ADV RB advmod 672 O
+24 31 8 672 glad glad 3126 3130 ADJ JJ acomp 670 O
+24 31 9 673 to to 3131 3133 PART TO aux 674 O
+24 31 10 674 see see 3134 3137 VERB VB xcomp 672 O
+24 31 11 675 you you 3138 3141 PRON PRP dobj 674 O
+24 31 12 676 ; ; 3141 3142 PUNCT : punct 665 O
+24 31 13 677 and and 3143 3146 CCONJ CC cc 665 O
+24 31 14 678 I I 3147 3148 PRON PRP nsubj 680 O
+24 31 15 679 will will 3149 3153 AUX MD aux 680 O
+24 31 16 680 send send 3154 3158 VERB VB conj 665 O
+24 31 17 681 a a 3159 3160 DET DT quantmod 682 O
+24 31 18 682 few few 3161 3164 ADJ JJ amod 683 O
+24 31 19 683 lines line 3165 3170 NOUN NNS dobj 680 O
+24 31 20 684 by by 3171 3173 ADP IN prep 680 O
+24 31 21 685 you you 3174 3177 PRON PRP pobj 684 O
+24 31 22 686 to to 3178 3180 PART TO aux 687 O
+24 31 23 687 assure assure 3187 3193 VERB VB advcl 680 O
+24 31 24 688 him he 3194 3197 PRON PRP dobj 687 O
+24 31 25 689 of of 3198 3200 ADP IN prep 687 O
+24 31 26 690 my my 3201 3203 PRON PRP$ poss 692 O
+24 31 27 691 hearty hearty 3204 3210 ADJ JJ amod 692 O
+24 31 28 692 consent consent 3211 3218 NOUN NN pobj 689 O
+24 31 29 693 to to 3219 3221 ADP IN prep 692 O
+24 31 30 694 his his 3222 3225 PRON PRP$ poss 695 O
+24 31 31 695 marrying marrying 3226 3234 NOUN NN pobj 693 O
+24 31 32 696 whichever whichever 3235 3244 PRON WDT dobj 698 O
+24 31 33 697 he he 3245 3247 PRON PRP nsubj 698 O
+24 31 34 698 chooses choose 3254 3261 VERB VBZ ccomp 687 O
+24 31 35 699 of of 3262 3264 ADP IN prep 698 O
+24 31 36 700 the the 3265 3268 DET DT det 701 O
+24 31 37 701 girls girl 3269 3274 NOUN NNS pobj 699 O
+24 31 38 702 ; ; 3274 3275 PUNCT : punct 680 O
+24 31 39 703 though though 3276 3282 SCONJ IN mark 706 O
+24 31 40 704 I I 3283 3284 PRON PRP nsubj 706 O
+24 31 41 705 must must 3285 3289 AUX MD aux 706 O
+24 31 42 706 throw throw 3290 3295 VERB VB advcl 680 O
+24 31 43 707 in in 3296 3298 ADP RP prt 706 O
+24 31 44 708 a a 3299 3300 DET DT det 710 O
+24 31 45 709 good good 3301 3305 ADJ JJ amod 710 O
+24 31 46 710 word word 3306 3310 NOUN NN dobj 706 O
+24 31 47 711 for for 3311 3314 ADP IN prep 710 O
+24 31 48 712 my my 3315 3317 PRON PRP$ poss 714 O
+24 31 49 713 little little 3324 3330 ADJ JJ amod 714 O
+24 31 50 714 Lizzy Lizzy 3331 3336 PROPN NNP pobj 711 O
+24 31 51 715 . . 3336 3337 PUNCT . punct 680 O
+24 31 52 716 β " 3337 3338 PUNCT '' punct 680 O
+25 31 53 717 β " 3346 3347 PUNCT `` punct 719 O
+25 31 54 718 I I 3347 3348 PRON PRP nsubj 719 O
+25 31 55 719 desire desire 3349 3355 VERB VBP ccomp 665 O
+25 31 56 720 you you 3356 3359 PRON PRP nsubj 722 O
+25 31 57 721 will will 3360 3364 AUX MD aux 722 O
+25 31 58 722 do do 3365 3367 VERB VB ccomp 719 O
+25 31 59 723 no no 3368 3370 DET DT det 725 O
+25 31 60 724 such such 3371 3375 ADJ JJ amod 725 O
+25 31 61 725 thing thing 3376 3381 NOUN NN dobj 722 O
+25 31 62 726 . . 3381 3382 PUNCT . punct 665 O
+25 32 0 727 Lizzy Lizzy 3383 3388 PROPN NNP nsubj 728 O
+25 32 1 728 is be 3389 3391 AUX VBZ ROOT 728 O
+25 32 2 729 not not 3392 3395 PART RB neg 728 O
+25 32 3 730 a a 3396 3397 DET DT det 731 O
+25 32 4 731 bit bit 3398 3401 NOUN NN npadvmod 732 O
+25 32 5 732 better well 3402 3408 ADJ JJR acomp 728 O
+25 32 6 733 than than 3415 3419 ADP IN prep 732 O
+25 32 7 734 the the 3420 3423 DET DT det 735 O
+25 32 8 735 others other 3424 3430 NOUN NNS pobj 733 O
+25 32 9 736 ; ; 3430 3431 PUNCT : punct 728 O
+25 32 10 737 and and 3432 3435 CCONJ CC cc 728 O
+25 32 11 738 I I 3436 3437 PRON PRP nsubj 739 O
+25 32 12 739 am be 3438 3440 AUX VBP conj 728 O
+25 32 13 740 sure sure 3441 3445 ADJ JJ acomp 739 O
+25 32 14 741 she she 3446 3449 PRON PRP nsubj 742 O
+25 32 15 742 is be 3450 3452 AUX VBZ ccomp 740 O
+25 32 16 743 not not 3453 3456 PART RB neg 742 O
+25 32 17 744 half half 3457 3461 ADV RB advmod 746 O
+25 32 18 745 so so 3462 3464 ADV RB advmod 746 O
+25 32 19 746 handsome handsome 3465 3473 ADJ JJ acomp 742 O
+25 32 20 747 as as 3474 3476 ADP IN prep 746 O
+25 32 21 748 Jane Jane 3483 3487 PROPN NNP pobj 747 O
+25 32 22 749 , , 3487 3488 PUNCT , punct 748 O
+25 32 23 750 nor nor 3489 3492 CCONJ CC cc 746 O
+25 32 24 751 half half 3493 3497 NOUN NN nsubjpass 755 O
+25 32 25 752 so so 3498 3500 ADV RB advmod 753 O
+25 32 26 753 good good 3501 3505 ADV RB advmod 755 O
+25 32 27 754 - - 3505 3506 PUNCT HYPH punct 755 O
+25 32 28 755 humoured humour 3506 3514 VERB VBN conj 746 O
+25 32 29 756 as as 3515 3517 ADP IN prep 755 O
+25 32 30 757 Lydia Lydia 3518 3523 PROPN NNP pobj 756 O
+25 32 31 758 . . 3523 3524 PUNCT . punct 739 O
+25 33 0 759 But but 3525 3528 CCONJ CC cc 763 O
+25 33 1 760 you you 3529 3532 PRON PRP nsubj 763 O
+25 33 2 761 are be 3533 3536 AUX VBP aux 763 O
+25 33 3 762 always always 3537 3543 ADV RB advmod 763 O
+25 33 4 763 giving give 3550 3556 VERB VBG ROOT 763 O
+25 33 5 764 _ _ 3557 3558 PRON PRP dative 763 O
+25 33 6 765 her she 3558 3561 PRON PRP dobj 763 O
+25 33 7 766 _ _ 3561 3562 PRON DT dative 763 O
+25 33 8 767 the the 3563 3566 DET DT det 768 O
+25 33 9 768 preference preference 3567 3577 NOUN NN dobj 763 O
+25 33 10 769 . . 3577 3578 PUNCT . punct 763 O
+25 33 11 770 β " 3578 3579 PUNCT '' punct 763 O
+26 34 0 771 β " 3587 3588 PUNCT `` punct 773 O
+26 34 1 772 They they 3588 3592 PRON PRP nsubj 773 O
+26 34 2 773 have have 3593 3597 VERB VBP ccomp 783 O
+26 34 3 774 none none 3598 3602 NOUN NN dobj 773 O
+26 34 4 775 of of 3603 3605 ADP IN prep 774 O
+26 34 5 776 them they 3606 3610 PRON PRP pobj 775 O
+26 34 6 777 much much 3611 3615 ADV RB advmod 773 O
+26 34 7 778 to to 3616 3618 PART TO aux 779 O
+26 34 8 779 recommend recommend 3619 3628 VERB VB advcl 773 O
+26 34 9 780 them they 3629 3633 PRON PRP dobj 779 O
+26 34 10 781 , , 3633 3634 PUNCT , punct 783 O
+26 34 11 782 β " 3634 3635 PUNCT '' punct 783 O
+26 34 12 783 replied reply 3636 3643 VERB VBD ccomp 788 EVENT
+26 34 13 784 he he 3644 3646 PRON PRP nsubj 783 O
+26 34 14 785 ; ; 3646 3647 PUNCT : punct 788 O
+26 34 15 786 β " 3654 3655 PUNCT `` punct 788 O
+26 34 16 787 they they 3655 3659 PRON PRP nsubj 788 O
+26 34 17 788 are be 3660 3663 AUX VBP ROOT 788 O
+26 34 18 789 all all 3664 3667 ADV RB advmod 788 O
+26 34 19 790 silly silly 3668 3673 ADJ JJ acomp 788 O
+26 34 20 791 and and 3674 3677 CCONJ CC cc 790 O
+26 34 21 792 ignorant ignorant 3678 3686 ADJ JJ conj 790 O
+26 34 22 793 like like 3687 3691 ADP IN prep 792 O
+26 34 23 794 other other 3692 3697 ADJ JJ amod 795 O
+26 34 24 795 girls girl 3698 3703 NOUN NNS pobj 793 O
+26 34 25 796 ; ; 3703 3704 PUNCT : punct 788 O
+26 34 26 797 but but 3705 3708 CCONJ CC cc 788 O
+26 34 27 798 Lizzy Lizzy 3709 3714 PROPN NNP nsubj 799 O
+26 34 28 799 has have 3715 3718 VERB VBZ conj 788 O
+26 34 29 800 something something 3725 3734 PRON NN dobj 799 O
+26 34 30 801 more more 3735 3739 ADJ JJR amod 800 O
+26 34 31 802 of of 3740 3742 ADP IN prep 801 O
+26 34 32 803 quickness quickness 3743 3752 NOUN NN pobj 802 O
+26 34 33 804 than than 3753 3757 ADP IN prep 803 O
+26 34 34 805 her her 3758 3761 PRON PRP$ poss 806 O
+26 34 35 806 sisters sister 3762 3769 NOUN NNS pobj 804 O
+26 34 36 807 . . 3769 3770 PUNCT . punct 799 O
+26 34 37 808 β " 3770 3771 PUNCT '' punct 799 O
+27 34 38 809 β " 3779 3780 PUNCT `` punct 816 O
+27 34 39 810 Mr. Mr. 3780 3783 PROPN NNP compound 811 O
+27 34 40 811 Bennet Bennet 3784 3790 PROPN NNP npadvmod 816 O
+27 34 41 812 , , 3790 3791 PUNCT , punct 816 O
+27 34 42 813 how how 3792 3795 SCONJ WRB advmod 816 O
+27 34 43 814 can can 3796 3799 AUX MD aux 816 O
+27 34 44 815 you you 3800 3803 PRON PRP nsubj 816 O
+27 34 45 816 abuse abuse 3804 3809 VERB VB conj 788 O
+27 34 46 817 your your 3810 3814 PRON PRP$ poss 819 O
+27 34 47 818 own own 3815 3818 ADJ JJ amod 819 O
+27 34 48 819 children child 3819 3827 NOUN NNS dobj 816 O
+27 34 49 820 in in 3828 3830 ADP IN prep 816 O
+27 34 50 821 such such 3831 3835 DET PDT predet 823 O
+27 34 51 822 a a 3836 3837 DET DT det 823 O
+27 34 52 823 way way 3838 3841 NOUN NN pobj 820 O
+27 34 53 824 ? ? 3841 3842 PUNCT . punct 816 O
+27 35 0 825 You you 3849 3852 PRON PRP nsubj 826 O
+27 35 1 826 take take 3853 3857 VERB VBP ROOT 826 O
+27 35 2 827 delight delight 3858 3865 NOUN NN dobj 826 O
+27 35 3 828 in in 3866 3868 ADP IN prep 826 O
+27 35 4 829 vexing vex 3869 3875 VERB VBG pcomp 828 O
+27 35 5 830 me I 3876 3878 PRON PRP dobj 829 O
+27 35 6 831 . . 3878 3879 PUNCT . punct 826 O
+27 36 0 832 You you 3880 3883 PRON PRP nsubj 833 O
+27 36 1 833 have have 3884 3888 VERB VBP ROOT 833 O
+27 36 2 834 no no 3889 3891 DET DT det 835 O
+27 36 3 835 compassion compassion 3892 3902 NOUN NN dobj 833 O
+27 36 4 836 for for 3903 3906 ADP IN prep 835 O
+27 36 5 837 my my 3907 3909 PRON PRP$ poss 839 O
+27 36 6 838 poor poor 3910 3914 ADJ JJ amod 839 O
+27 36 7 839 nerves nerve 3921 3927 NOUN NNS pobj 836 O
+27 36 8 840 . . 3927 3928 PUNCT . punct 833 O
+27 36 9 841 β " 3928 3929 PUNCT '' punct 833 O
+28 37 0 842 β " 3937 3938 PUNCT `` punct 844 O
+28 37 1 843 You you 3938 3941 PRON PRP nsubj 844 O
+28 37 2 844 mistake mistake 3942 3949 VERB VBP ROOT 844 O
+28 37 3 845 me I 3950 3952 PRON PRP dobj 844 O
+28 37 4 846 , , 3952 3953 PUNCT , punct 844 O
+28 37 5 847 my my 3954 3956 PRON PRP$ poss 848 O
+28 37 6 848 dear dear 3957 3961 NOUN NN npadvmod 844 O
+28 37 7 849 . . 3961 3962 PUNCT . punct 844 O
+28 38 0 850 I I 3963 3964 PRON PRP nsubj 851 O
+28 38 1 851 have have 3965 3969 VERB VBP ROOT 851 O
+28 38 2 852 a a 3970 3971 DET DT det 854 O
+28 38 3 853 high high 3972 3976 ADJ JJ amod 854 O
+28 38 4 854 respect respect 3977 3984 NOUN NN dobj 851 O
+28 38 5 855 for for 3985 3988 ADP IN prep 854 O
+28 38 6 856 your your 3989 3993 PRON PRP$ poss 857 O
+28 38 7 857 nerves nerve 3994 4000 NOUN NNS pobj 855 O
+28 38 8 858 . . 4000 4001 PUNCT . punct 851 O
+28 39 0 859 They they 4008 4012 PRON PRP nsubj 860 O
+28 39 1 860 are be 4013 4016 AUX VBP ROOT 860 O
+28 39 2 861 my my 4017 4019 PRON PRP$ poss 863 O
+28 39 3 862 old old 4020 4023 ADJ JJ amod 863 O
+28 39 4 863 friends friend 4024 4031 NOUN NNS attr 860 O
+28 39 5 864 . . 4031 4032 PUNCT . punct 860 O
+28 40 0 865 I I 4033 4034 PRON PRP nsubj 867 O
+28 40 1 866 have have 4035 4039 AUX VBP aux 867 O
+28 40 2 867 heard hear 4040 4045 VERB VBN ROOT 867 EVENT
+28 40 3 868 you you 4046 4049 PRON PRP nsubj 869 O
+28 40 4 869 mention mention 4050 4057 VERB VB ccomp 867 EVENT
+28 40 5 870 them they 4058 4062 PRON PRP dobj 869 O
+28 40 6 871 with with 4063 4067 ADP IN prep 869 O
+28 40 7 872 consideration consideration 4074 4087 NOUN NN pobj 871 O
+28 40 8 873 these these 4088 4093 DET DT det 876 O
+28 40 9 874 last last 4094 4098 ADJ JJ amod 876 O
+28 40 10 875 twenty twenty 4099 4105 NUM CD nummod 876 O
+28 40 11 876 years year 4106 4111 NOUN NNS npadvmod 869 O
+28 40 12 877 at at 4112 4114 ADV RB advmod 878 O
+28 40 13 878 least least 4115 4120 ADJ JJS advmod 876 O
+28 40 14 879 . . 4120 4121 PUNCT . punct 867 O
+28 40 15 880 β " 4121 4122 PUNCT '' punct 867 O
diff --git a/requirements.txt b/requirements.txt
index 4bbad89..84f07aa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,10 @@
torch>=2.0.0
-transformers>=4.30.0
tokenizers>=0.13.0
spacy>=3.5.0
+transformers>=4.30.0
+sentence-transformers
+tf-keras
+
numpy>=1.24.0
tqdm>=4.65.0
filelock>=3.12.0
@@ -12,4 +15,4 @@ packaging>=23.0
pandas>=1.3.0
matplotlib>=3.4.0
networkx>=2.6.0
-jinja2>=3.0.0
+jinja2>=3.0.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 6ff84cb..65a4162 100644
--- a/setup.py
+++ b/setup.py
@@ -1,18 +1,69 @@
from setuptools import setup, find_packages
-setup(name='booknlp',
+setup(name='booknlp-plus',
version='1.0.7',
packages=find_packages(),
py_modules=['booknlp'],
- url="https://github.com/dbamman/book-nlp",
- author="David Bamman",
+ url="https://github.com/DrewThomasson/booknlp/tree/json-patch-1",
+ author="David Bamman (Original), Drew Thomasson (Fork)",
author_email="dbamman@berkeley.edu",
+ description="Enhanced fork of BookNLP with JSON patch support and additional features",
+ long_description="""
+ BookNLP Plus: An Enhanced Fork of BookNLP
+
+ This is an enhanced fork of the original BookNLP by David Bamman, featuring:
+ - JSON patch support
+ - Sentence transformers integration
+ - Updated dependencies for Python 3.9-3.12
+ - Additional improvements and bug fixes
+
+ Original repository: https://github.com/dbamman/book-nlp
+ Original author: David Bamman (UC Berkeley)
+
+ Enhanced fork repository: https://github.com/DrewThomasson/booknlp/tree/json-patch-1
+ Fork maintainer: Drew Thomasson
+ """,
+ long_description_content_type="text/plain",
include_package_data=True,
license="MIT",
- install_requires=['torch>=1.7.1',
- 'tensorflow>=1.15',
- 'spacy>=3',
- 'transformers>=4.11.3'
- ],
-
- )
+ python_requires='>=3.9,<3.13',
+ install_requires=[
+ 'torch>=2.0.0',
+ 'tokenizers>=0.13.0',
+ 'spacy>=3.5.0',
+ 'transformers>=4.30.0',
+ 'sentence-transformers',
+ 'tf-keras',
+ 'numpy>=1.24.0',
+ 'tqdm>=4.65.0',
+ 'filelock>=3.12.0',
+ 'regex>=2023.8.8',
+ 'requests>=2.31.0',
+ 'pyyaml>=6.0.1',
+ 'packaging>=23.0',
+ 'pandas>=1.3.0',
+ 'matplotlib>=3.4.0',
+ 'networkx>=2.6.0',
+ 'jinja2>=3.0.0'
+ ],
+ project_urls={
+ "Original Repository": "https://github.com/dbamman/book-nlp",
+ "Fork Repository": "https://github.com/DrewThomasson/booknlp",
+ "Current Branch": "https://github.com/DrewThomasson/booknlp/tree/json-patch-1",
+ "Bug Reports": "https://github.com/DrewThomasson/booknlp/issues",
+ },
+ classifiers=[
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+ "Topic :: Text Processing :: Linguistic",
+ ],
+ keywords="booknlp, nlp, natural language processing, literature, fiction, character analysis, entity recognition, sentence transformers, plus",
+)
\ No newline at end of file