streamlit-octostar-utils 0.1.7a5__tar.gz → 0.1.7a7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/pyproject.toml +1 -1
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/nlp/ner.py +144 -98
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/LICENSE +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/README.md +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/nifi.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/core/timestamp.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/hello.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/nlp/language.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/ontology/expand_entities.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.1.7a5 → streamlit_octostar_utils-0.1.7a7}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -14,6 +14,7 @@ import numpy as np
|
|
14
14
|
import math
|
15
15
|
import nltk
|
16
16
|
from typing import Optional, List
|
17
|
+
from pydantic import BaseModel, ConfigDict, Field
|
17
18
|
|
18
19
|
SPACY_NER_MODELS = {
|
19
20
|
"english": lambda: load_spacy(
|
@@ -24,15 +25,15 @@ SPACY_NER_MODELS = {
|
|
24
25
|
FLAIR_NER_MODELS = {"english": lambda: SequenceTagger.load("flair/ner-english")}
|
25
26
|
REGEX_NER_MODELS = {
|
26
27
|
"IP_ADDRESS": [
|
27
|
-
r"(?:(
|
28
|
-
r"(?:(?<=:=)|(?<=\s)|(?<=\b))(?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}(?::\d{1,5})?(?:(?=\s)|(?=\b))"
|
28
|
+
r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?::(?:[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]))?\b",
|
29
29
|
],
|
30
|
-
"PHONE": r"(?:(
|
31
|
-
"EMAIL": r"
|
30
|
+
"PHONE": r"(?:(?:\+(?:\d{1,3}[ .-]?)?(?:\(\d{1,3}\)[ .-]?)?)(?:\d{2,5}[ .-]?){1,3}|\d{2,5}[ .-]\d{2,5}(?:[ .-]\d{2,5}){0,2})\b",
|
31
|
+
"EMAIL": r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)+\b",
|
32
|
+
"URL": r"\b(?:(?:https?|ftp|sftp|ftps|ssh|file|mailto|git|onion|ipfs|ipns):\/\/|www\.)(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}(?::\d+)?(?:\/(?:[-a-z0-9\/_.,~%+:@]|(?:%[0-9a-f]{2}))*)?(?:\?(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?(?:#(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?|(?:https?:\/\/)?[a-z2-7]{16,56}\.onion(?:\/(?:[-a-z0-9\/_.,~%+:@]|(?:%[0-9a-f]{2}))*)?(?:\?(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?(?:#(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)\b",
|
32
33
|
}
|
33
34
|
|
34
35
|
BASE_TO_ONTONOTES_LABELMAP = {"PER": "PERSON"}
|
35
|
-
BASE_ALLOWED_LABELS = ["PERSON", "ORG", "LOC", "NORP", "GPE", "PRODUCT", "DATE", "
|
36
|
+
BASE_ALLOWED_LABELS = ["PERSON", "ORG", "LOC", "NORP", "GPE", "PRODUCT", "DATE", "PHONE", "IP_ADDRESS", "EMAIL", "URL"]
|
36
37
|
|
37
38
|
|
38
39
|
def _sumy__get_best_sentences(sentences, rating, *args, **kwargs):
|
@@ -75,34 +76,21 @@ def get_nltk_tokenizer(language: str) -> Tokenizer:
|
|
75
76
|
return Tokenizer(language)
|
76
77
|
|
77
78
|
|
78
|
-
class NERObject(
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
79
|
+
class NERObject(BaseModel):
|
80
|
+
name: str
|
81
|
+
label: str
|
82
|
+
score: float = 0.0
|
83
|
+
start: int
|
84
|
+
count: int
|
85
|
+
context: str | None = None
|
86
|
+
comentions: list[str] = Field(default_factory=list)
|
87
|
+
model_config = ConfigDict(extra="allow")
|
87
88
|
|
88
|
-
def to_dict(self):
|
89
|
-
data = {
|
90
|
-
"name": self.name,
|
91
|
-
"label": self.label,
|
92
|
-
"score": self.score,
|
93
|
-
"context": self.context,
|
94
|
-
"count": self.count,
|
95
|
-
"comentions": self.comentions or [],
|
96
|
-
}
|
97
|
-
if self.sources:
|
98
|
-
data["sources"] = self.sources
|
99
|
-
return data
|
100
|
-
|
101
89
|
def __repr__(self):
|
102
90
|
return f"NERObject(label={self.label},name={self.name})"
|
103
91
|
|
104
92
|
|
105
|
-
def postprocess_ner(entities, whitelisted_labels=None, max_entities=None):
|
93
|
+
def postprocess_ner(entities: list[NERObject], whitelisted_labels=None, max_entities=None):
|
106
94
|
if whitelisted_labels is not None:
|
107
95
|
entities = [e for e in entities if e.label in whitelisted_labels]
|
108
96
|
entities = sorted(entities, key=lambda x: x.name)
|
@@ -110,29 +98,48 @@ def postprocess_ner(entities, whitelisted_labels=None, max_entities=None):
|
|
110
98
|
for _, group in itertools.groupby(entities, key=lambda x: x.name):
|
111
99
|
group = list(group)
|
112
100
|
best_entity = max(group, key=lambda x: x.score * x.count)
|
113
|
-
|
114
|
-
best_entity.name,
|
115
|
-
best_entity.label,
|
116
|
-
best_entity.score,
|
117
|
-
best_entity.context,
|
118
|
-
sum(
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
101
|
+
merged_data = {
|
102
|
+
"name": best_entity.name,
|
103
|
+
"label": best_entity.label,
|
104
|
+
"score": best_entity.score,
|
105
|
+
"context": best_entity.context,
|
106
|
+
"count": sum(e.count for e in group),
|
107
|
+
"start": best_entity.start,
|
108
|
+
}
|
109
|
+
all_fields = best_entity.model_fields.keys()
|
110
|
+
for field in all_fields:
|
111
|
+
if field in merged_data:
|
112
|
+
continue
|
113
|
+
values = [getattr(e, field, None) for e in group if getattr(e, field, None) is not None]
|
114
|
+
if not values:
|
115
|
+
continue
|
116
|
+
if isinstance(values[0], list):
|
117
|
+
merged_data[field] = list(set(itertools.chain.from_iterable(values or [])))
|
118
|
+
else:
|
119
|
+
merged_data[field] = getattr(best_entity, field, None)
|
120
|
+
final_entities.append(NERObject(**merged_data))
|
123
121
|
final_entities = sorted(final_entities, key=lambda x: x.score * x.count, reverse=True)
|
124
122
|
if max_entities and len(final_entities) > max_entities:
|
125
123
|
final_entities = final_entities[:max_entities]
|
126
124
|
return final_entities
|
127
125
|
|
128
126
|
|
129
|
-
def compute_ner(
|
127
|
+
def compute_ner(
|
128
|
+
language,
|
129
|
+
sentences,
|
130
|
+
spacy_model,
|
131
|
+
flair_model=None,
|
132
|
+
context_width=150,
|
133
|
+
with_scores=True,
|
134
|
+
with_comentions=True,
|
135
|
+
with_context=True,
|
136
|
+
):
|
130
137
|
sentence_starts = [0] + [len(s[0]) + 1 for s in sentences]
|
131
138
|
del sentence_starts[-1]
|
132
139
|
sentence_starts = list(np.cumsum(sentence_starts))
|
133
140
|
text = "\n".join([s[0] for s in sentences])
|
134
141
|
min_score = 1.0
|
135
|
-
entities = []
|
142
|
+
entities: list[NERObject] = []
|
136
143
|
|
137
144
|
# FLAIR model (if not fast)
|
138
145
|
if flair_model:
|
@@ -140,88 +147,115 @@ def compute_ner(language, sentences, spacy_model, flair_model=None, context_widt
|
|
140
147
|
flair_model.predict(input)
|
141
148
|
output = [e for sentence in input for e in sentence.get_spans("ner")]
|
142
149
|
flair_entities = [
|
143
|
-
(
|
144
|
-
entity.text,
|
145
|
-
BASE_TO_ONTONOTES_LABELMAP.get(
|
150
|
+
NERObject(
|
151
|
+
name=entity.text,
|
152
|
+
label=BASE_TO_ONTONOTES_LABELMAP.get(
|
146
153
|
entity.annotation_layers["ner"][0].value,
|
147
154
|
entity.annotation_layers["ner"][0].value,
|
148
155
|
),
|
149
|
-
entity.score,
|
150
|
-
sentence_starts[input.index(entity[0].sentence)] + entity[0].start_position,
|
156
|
+
score=entity.score,
|
157
|
+
start=sentence_starts[input.index(entity[0].sentence)] + entity[0].start_position,
|
158
|
+
count=1,
|
151
159
|
)
|
152
160
|
for entity in output
|
153
161
|
]
|
154
|
-
min_score = min(min_score
|
162
|
+
min_score = min([min_score] + [e.score for e in flair_entities])
|
155
163
|
entities += flair_entities
|
156
164
|
del flair_entities
|
157
165
|
|
166
|
+
print("Checking REGEXES")
|
158
167
|
# REGEX model
|
159
168
|
for label, regexes in REGEX_NER_MODELS.items():
|
160
169
|
if not isinstance(regexes, list):
|
161
170
|
regexes = [regexes]
|
162
171
|
for regex in regexes:
|
163
|
-
print(regex)
|
164
172
|
regex_entities = [
|
165
|
-
(
|
173
|
+
NERObject(
|
174
|
+
name=match.group(),
|
175
|
+
label=label,
|
176
|
+
score=min_score - 0.5,
|
177
|
+
count=1,
|
178
|
+
start=match.start(),
|
179
|
+
)
|
180
|
+
for match in re.finditer(regex, text)
|
166
181
|
]
|
167
|
-
print(regex_entities)
|
168
182
|
entities += regex_entities
|
169
|
-
|
183
|
+
min_score = min([min_score] + [e.score for e in regex_entities])
|
184
|
+
|
170
185
|
# SPACY model
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
186
|
+
print("CHECKING SPACY")
|
187
|
+
chunks = []
|
188
|
+
chunk_start_offsets = []
|
189
|
+
current_chunk = []
|
190
|
+
current_length = 0
|
191
|
+
offset = 0
|
192
|
+
for sentence, _ in sentences:
|
193
|
+
sentence_len = len(sentence) + 1
|
194
|
+
if sentence_len > spacy_model.max_length:
|
195
|
+
truncated = sentence[: spacy_model.max_length - 1]
|
196
|
+
chunks.append(truncated)
|
197
|
+
chunk_start_offsets.append(offset)
|
198
|
+
offset += sentence_len
|
199
|
+
continue
|
200
|
+
if current_length + sentence_len > spacy_model.max_length:
|
201
|
+
chunks.append("\n".join(current_chunk))
|
202
|
+
chunk_start_offsets.append(offset - current_length)
|
203
|
+
current_chunk = []
|
204
|
+
current_length = 0
|
205
|
+
current_chunk.append(sentence)
|
206
|
+
current_length += sentence_len
|
207
|
+
offset += sentence_len
|
208
|
+
if current_chunk:
|
209
|
+
chunks.append("\n".join(current_chunk))
|
210
|
+
chunk_start_offsets.append(offset - current_length)
|
211
|
+
for i, chunk in enumerate(chunks):
|
212
|
+
doc = spacy_model(chunk)
|
213
|
+
chunk_offset = chunk_start_offsets[i]
|
214
|
+
for entity in doc.ents:
|
215
|
+
entities.append(
|
216
|
+
NERObject(
|
217
|
+
name=entity.text,
|
218
|
+
label=BASE_TO_ONTONOTES_LABELMAP.get(entity.label_, entity.label_),
|
219
|
+
score=min_score - 0.5,
|
220
|
+
start=chunk_offset + entity.start_char,
|
221
|
+
count=1,
|
222
|
+
)
|
223
|
+
)
|
182
224
|
|
183
225
|
# Reformatting for consistency
|
184
|
-
if entities:
|
185
|
-
|
186
|
-
|
226
|
+
if not entities:
|
227
|
+
return []
|
228
|
+
if with_scores:
|
229
|
+
min_entity_score = min([e.score for e in entities])
|
230
|
+
max_entity_score = max([e.score for e in entities])
|
187
231
|
entity_score_range = 1 if min_entity_score == max_entity_score else (max_entity_score - min_entity_score)
|
188
|
-
|
189
|
-
|
232
|
+
for e in entities:
|
233
|
+
e.score = (e.score - min_entity_score) / entity_score_range
|
234
|
+
scores = list(np.searchsorted(sentence_starts, [e.start + 1 for e in entities]))
|
190
235
|
scores = [sentences[i - 1][1] for i in scores]
|
191
|
-
scores = [scores[i] +
|
236
|
+
scores = [scores[i] + 10 * entities[i].score for i in range(len(entities))]
|
237
|
+
for i in range(len(entities)):
|
238
|
+
entities[i].score = scores[i]
|
239
|
+
else:
|
192
240
|
for i in range(len(entities)):
|
193
|
-
entities[i] =
|
241
|
+
entities[i].score = 0.0
|
242
|
+
if with_comentions:
|
194
243
|
for i in range(len(entities)):
|
195
244
|
entity = entities[i]
|
196
|
-
count = 1
|
197
245
|
comentions = [
|
198
|
-
entities[j]
|
246
|
+
entities[j].name
|
199
247
|
for j in range(len(entities))
|
200
|
-
if j != i and abs(entities[j]
|
248
|
+
if j != i and abs(entities[j].start - entity.start) < math.ceil(context_width / 2)
|
201
249
|
]
|
202
|
-
entities[i] =
|
203
|
-
|
204
|
-
entity[1],
|
205
|
-
entity[2],
|
206
|
-
entity[3],
|
207
|
-
count,
|
208
|
-
comentions,
|
209
|
-
)
|
250
|
+
entities[i].comentions = comentions
|
251
|
+
if with_context:
|
210
252
|
for i in range(len(entities)):
|
211
253
|
entity = entities[i]
|
212
|
-
if entity
|
213
|
-
left = max(0, entity
|
214
|
-
right = min(len(text), entity
|
254
|
+
if entity.start >= 0 and entity.start < len(text):
|
255
|
+
left = max(0, entity.start - math.floor(context_width / 2))
|
256
|
+
right = min(len(text), entity.start + math.ceil(context_width / 2))
|
215
257
|
context = ("[..]" if left > 0 else "") + text[left:right] + ("[..]" if right < len(text) else "")
|
216
|
-
entities[i] =
|
217
|
-
entity[0],
|
218
|
-
entity[1],
|
219
|
-
entity[2],
|
220
|
-
context,
|
221
|
-
entity[4],
|
222
|
-
entity[5],
|
223
|
-
)
|
224
|
-
entities = [NERObject(*entities[i]) for i in range(len(entities))]
|
258
|
+
entities[i].context = context
|
225
259
|
return entities
|
226
260
|
|
227
261
|
|
@@ -265,24 +299,36 @@ def get_extractive_summary(text, language, max_chars, fast=False, with_scores=Fa
|
|
265
299
|
return summary
|
266
300
|
|
267
301
|
|
268
|
-
def ner_pipe(
|
302
|
+
def ner_pipe(
|
303
|
+
text,
|
304
|
+
language,
|
305
|
+
spacy_model,
|
306
|
+
flair_model=None,
|
307
|
+
fast=False,
|
308
|
+
compression_ratio="auto",
|
309
|
+
with_scores=True,
|
310
|
+
with_comentions=True,
|
311
|
+
with_context=True,
|
312
|
+
):
|
269
313
|
if compression_ratio == "auto":
|
270
314
|
compression_ratio = max(1.0, len(text) / 15000) if fast else 1.0
|
271
315
|
sentences = get_extractive_summary(text, language, int(len(text) / compression_ratio), fast=fast, with_scores=True)
|
272
|
-
ner = compute_ner(language, sentences, spacy_model, flair_model)
|
316
|
+
ner = compute_ner(language, sentences, spacy_model, flair_model, 150, with_scores, with_comentions, with_context)
|
273
317
|
return ner
|
274
318
|
|
275
319
|
|
276
|
-
def get_ner_handler(language, fast=False
|
320
|
+
def get_ner_handler(language, fast=False):
|
277
321
|
try:
|
278
322
|
get_nltk_tokenizer(language) # raises a LookupError if the language is not valid
|
279
323
|
except LookupError:
|
280
324
|
language = "english"
|
281
|
-
spacy_model = SPACY_NER_MODELS.get(language, SPACY_NER_MODELS[
|
282
|
-
flair_model = None if fast else FLAIR_NER_MODELS.get(language, FLAIR_NER_MODELS[
|
283
|
-
return lambda text
|
325
|
+
spacy_model = SPACY_NER_MODELS.get(language, SPACY_NER_MODELS["english"])()
|
326
|
+
flair_model = None if fast else FLAIR_NER_MODELS.get(language, FLAIR_NER_MODELS["english"])()
|
327
|
+
return lambda text, compression_ratio="auto", with_scores=True, with_comentions=True, with_context=True: ner_pipe(
|
328
|
+
text, language, spacy_model, flair_model, fast, compression_ratio, with_scores, with_comentions, with_context
|
329
|
+
)
|
284
330
|
|
285
331
|
|
286
332
|
@st.cache_resource
|
287
333
|
def get_cached_ner_handler(language, fast):
|
288
|
-
return get_ner_handler(language, fast)
|
334
|
+
return get_ner_handler(language, fast)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|