Skip to content

Commit e2d9f5b

Browse files
committed
Fix Gliner slowness
1 parent ffaccc9 commit e2d9f5b

File tree

4 files changed

+9
-33
lines changed

4 files changed

+9
-33
lines changed

src/trainable_entity_extractor/adapters/extractors/GlinerDateExtractor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
from dateparser.search import search_dates
3+
from gliner import GLiNER
34

45

56
class GlinerDateExtractor:
@@ -53,3 +54,7 @@ def extract_dates(self, text: str):
5354
entities = self.remove_overlapping_entities(entities)
5455
date_times = [d[1] for e in entities for d in search_dates(e["text"])]
5556
return date_times
57+
58+
@staticmethod
59+
def get_model():
60+
return GLiNER.from_pretrained("urchade/gliner_multi-v2.1")

src/trainable_entity_extractor/adapters/extractors/pdf_to_text_extractor/methods/GlinerFirstDateMethod.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import re
2-
3-
from gliner import GLiNER
4-
2+
from trainable_entity_extractor.adapters.extractors.GlinerDateExtractor import GlinerDateExtractor
53
from trainable_entity_extractor.adapters.extractors.ToTextExtractorMethod import ToTextExtractorMethod
64
from trainable_entity_extractor.domain.ExtractionData import ExtractionData
75
from trainable_entity_extractor.domain.PdfDataSegment import PdfDataSegment
@@ -17,7 +15,7 @@ def train(self, extraction_data: ExtractionData):
1715
self.save_json("languages.json", list(set(languages)))
1816

1917
def predict(self, prediction_samples_data: PredictionSamplesData) -> list[str]:
20-
gliner_model = GLiNER.from_pretrained("urchade/gliner_multi-v2.1")
18+
gliner_model = GlinerDateExtractor.get_model()
2119
predictions_samples = prediction_samples_data.prediction_samples
2220
predictions = [""] * len(predictions_samples)
2321
languages = self.load_json("languages.json")

src/trainable_entity_extractor/adapters/extractors/pdf_to_text_extractor/methods/SpaceFixerGlinerFirstDateMethod.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

src/trainable_entity_extractor/adapters/extractors/text_to_text_extractor/methods/GlinerDateParserMethod.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from gliner import GLiNER
2-
31
from trainable_entity_extractor.domain.ExtractionData import ExtractionData
42
from trainable_entity_extractor.domain.PredictionSamplesData import PredictionSamplesData
53
from trainable_entity_extractor.adapters.extractors.ToTextExtractorMethod import ToTextExtractorMethod
@@ -31,7 +29,7 @@ def get_date(model, tags_texts: list[str]):
3129
return None
3230

3331
def train(self, extraction_data: ExtractionData):
34-
gliner_model = GLiNER.from_pretrained("urchade/gliner_multi-v2.1")
32+
gliner_model = GlinerDateExtractor.get_model()
3533

3634
gliner_date_extractor = GlinerDateExtractor(gliner_model)
3735

@@ -46,7 +44,7 @@ def train(self, extraction_data: ExtractionData):
4644
self.save_json(self.IS_VALID_EXECUTION_FILE_NAME, "true")
4745

4846
def predict(self, prediction_samples_data: PredictionSamplesData) -> list[str]:
49-
gliner_model = GLiNER.from_pretrained("urchade/gliner_multi-v2.1")
47+
gliner_model = GlinerDateExtractor.get_model()
5048

5149
if self.load_json(self.IS_VALID_EXECUTION_FILE_NAME) == "false":
5250
return [""] * len(prediction_samples_data.prediction_samples)

0 commit comments

Comments
 (0)