streamlit-octostar-utils 0.2.10__py3-none-any.whl → 2.11a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -330,7 +330,7 @@ class DefaultErrorRoute:
330
330
  if len(message) > MAX_ERROR_MESSAGE_BYTES:
331
331
  message = message[-MAX_ERROR_MESSAGE_BYTES:]
332
332
  try:
333
- tcbk = "\n".join(traceback.format_exception(exc))
333
+ tcbk = traceback.format_exception(exc)
334
334
  if len(tcbk) > MAX_ERROR_TRACEBACK_BYTES:
335
335
  tcbk = tcbk[-MAX_ERROR_TRACEBACK_BYTES:]
336
336
  except:
@@ -1,36 +1,142 @@
1
- import re
1
+ import itertools
2
+ import math
3
+ from typing import Optional, List, Tuple
4
+ from pydantic import BaseModel, ConfigDict, Field
5
+ from collections import Counter
6
+
7
+ from presidio_analyzer import AnalyzerEngine, BatchAnalyzerEngine, RecognizerRegistry, AnalysisExplanation, \
8
+ EntityRecognizer, RecognizerResult
9
+ from presidio_analyzer.nlp_engine import NlpArtifacts, NlpEngineProvider
2
10
  import streamlit as st
3
- from spacy_download import load_spacy
11
+ import nltk
12
+ import pandas as pd
4
13
  from flair.data import Sentence
5
14
  from flair.models import SequenceTagger
15
+
6
16
  from sumy.parsers.plaintext import PlaintextParser
7
17
  from sumy.nlp.tokenizers import Tokenizer
8
18
  from sumy.nlp.stemmers import Stemmer
9
19
  from sumy.summarizers.lsa import LsaSummarizer
10
20
  from sumy.summarizers.luhn import LuhnSummarizer
11
21
  from sumy.utils import get_stop_words
12
- import itertools
13
- import numpy as np
14
- import math
15
- import nltk
16
- from typing import Optional, List
17
- from pydantic import BaseModel, ConfigDict, Field
18
22
 
19
- SPACY_NER_MODELS = {
20
- "english": lambda: load_spacy(
21
- "en_core_web_sm",
22
- disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"],
23
- )
24
- }
25
- FLAIR_NER_MODELS = {"english": lambda: SequenceTagger.load("flair/ner-english")}
26
- REGEX_NER_MODELS = {
27
- "IP_ADDRESS": [
28
- r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?::(?:[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]))?\b",
29
- ],
30
- "PHONE": r"(?:(?:\+(?:\d{1,3}[ .-]?)?(?:\(\d{1,3}\)[ .-]?)?)(?:\d{2,5}[ .-]?){1,3}|\d{2,5}[ .-]\d{2,5}(?:[ .-]\d{2,5}){0,2})\b",
31
- "EMAIL": r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)+\b",
32
- "URL": r"\b(?:(?:https?|ftp|sftp|ftps|ssh|file|mailto|git|onion|ipfs|ipns):\/\/|www\.)(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}(?::\d+)?(?:\/(?:[-a-z0-9\/_.,~%+:@]|(?:%[0-9a-f]{2}))*)?(?:\?(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?(?:#(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?|(?:https?:\/\/)?[a-z2-7]{16,56}\.onion(?:\/(?:[-a-z0-9\/_.,~%+:@]|(?:%[0-9a-f]{2}))*)?(?:\?(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?(?:#(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)\b",
33
- }
23
+
24
+ class FlairRecognizer(EntityRecognizer):
25
+ ENTITIES = [
26
+ "LOCATION",
27
+ "PERSON",
28
+ "ORGANIZATION",
29
+ ]
30
+
31
+ DEFAULT_EXPLANATION = "Identified as {} by Flair's Named Entity Recognition"
32
+
33
+ CHECK_LABEL_GROUPS = [
34
+ ({"LOCATION"}, {"LOC", "LOCATION"}),
35
+ ({"PERSON"}, {"PER", "PERSON"}),
36
+ ({"ORGANIZATION"}, {"ORG"}),
37
+ ]
38
+
39
+ MODEL_LANGUAGES = {
40
+ "en": "flair/ner-english-large",
41
+ "es": "flair/ner-spanish-large",
42
+ "de": "flair/ner-german-large",
43
+ "nl": "flair/ner-dutch-large",
44
+ "multi": "flair/ner-multi",
45
+ "multi-fast": "flair/ner-multi-fast",
46
+ }
47
+
48
+ PRESIDIO_EQUIVALENCES = {
49
+ "PER": "PERSON",
50
+ "LOC": "LOCATION",
51
+ "ORG": "ORGANIZATION",
52
+ }
53
+
54
+ def __init__(
55
+ self,
56
+ model: SequenceTagger = None,
57
+ supported_language: str = "en",
58
+ supported_entities: Optional[List[str]] = None,
59
+ check_label_groups: Optional[Tuple[set, set]] = None,
60
+ ):
61
+ self.check_label_groups = (
62
+ check_label_groups if check_label_groups else self.CHECK_LABEL_GROUPS
63
+ )
64
+
65
+ supported_entities = supported_entities if supported_entities else self.ENTITIES
66
+ self.model = model
67
+
68
+ super().__init__(
69
+ supported_entities=supported_entities,
70
+ supported_language=supported_language,
71
+ name="Flair Analytics",
72
+ )
73
+
74
+ def load(self) -> None:
75
+ pass
76
+
77
+ def get_supported_entities(self) -> List[str]:
78
+ return self.supported_entities
79
+
80
+ def analyze(self, text: str, entities: List[str], nlp_artifacts: NlpArtifacts = None) -> List[RecognizerResult]:
81
+ results = []
82
+
83
+ sentences = Sentence(text)
84
+ self.model.predict(sentences)
85
+
86
+ if not entities:
87
+ entities = self.supported_entities
88
+
89
+ for entity in entities:
90
+ if entity not in self.supported_entities:
91
+ continue
92
+
93
+ for ent in sentences.get_spans("ner"):
94
+ if not self.__check_label(
95
+ entity, ent.labels[0].value, self.check_label_groups
96
+ ):
97
+ continue
98
+ textual_explanation = self.DEFAULT_EXPLANATION.format(
99
+ ent.labels[0].value
100
+ )
101
+ explanation = self.build_flair_explanation(
102
+ round(ent.score, 2), textual_explanation
103
+ )
104
+ flair_result = self._convert_to_recognizer_result(ent, explanation)
105
+
106
+ results.append(flair_result)
107
+
108
+ return results
109
+
110
+ def build_flair_explanation(self, original_score: float, explanation: str) -> AnalysisExplanation:
111
+ explanation = AnalysisExplanation(
112
+ recognizer=self.__class__.__name__,
113
+ original_score=original_score,
114
+ textual_explanation=explanation,
115
+ )
116
+ return explanation
117
+
118
+ def _convert_to_recognizer_result(self, entity, explanation) -> RecognizerResult:
119
+ entity_type = self.PRESIDIO_EQUIVALENCES.get(entity.tag, entity.tag)
120
+ flair_score = round(entity.score, 2)
121
+
122
+ flair_results = RecognizerResult(
123
+ entity_type=entity_type,
124
+ start=entity.start_position,
125
+ end=entity.end_position,
126
+ score=flair_score,
127
+ analysis_explanation=explanation,
128
+ )
129
+
130
+ return flair_results
131
+
132
+ @staticmethod
133
+ def __check_label(
134
+ entity: str, label: str, check_label_groups: Tuple[set, set]
135
+ ) -> bool:
136
+ return any(
137
+ [entity in egrp and label in lgrp for egrp, lgrp in check_label_groups]
138
+ )
139
+
34
140
 
35
141
  BASE_TO_ONTONOTES_LABELMAP = {"PER": "PERSON"}
36
142
  BASE_ALLOWED_LABELS = ["PERSON", "ORG", "LOC", "NORP", "GPE", "PRODUCT", "DATE", "PHONE", "IP_ADDRESS", "EMAIL", "URL"]
@@ -69,8 +175,18 @@ def _sumy__luhn_call(summarizer, document):
69
175
 
70
176
 
71
177
  def get_nltk_tokenizer(language: str) -> Tokenizer:
72
- nltk.data.find("tokenizers/punkt")
73
- return Tokenizer(language)
178
+ if language == "en":
179
+ nltk_lang = "english"
180
+ elif language == "it":
181
+ nltk_lang = "italian"
182
+ else:
183
+ nltk_lang = language
184
+
185
+ try:
186
+ nltk.data.find("tokenizers/punkt")
187
+ except LookupError:
188
+ nltk.download("punkt")
189
+ return Tokenizer(nltk_lang)
74
190
 
75
191
 
76
192
  class NERObject(BaseModel):
@@ -121,137 +237,217 @@ def postprocess_ner(entities: list[NERObject], whitelisted_labels=None, max_enti
121
237
  return final_entities
122
238
 
123
239
 
124
- def compute_ner(
125
- language,
126
- sentences,
127
- spacy_model,
128
- flair_model=None,
129
- context_width=150,
130
- with_scores=True,
131
- with_comentions=True,
132
- with_context=True,
240
+ def build_presidio_analyzer(language: str, engine_type: str = "spacy", model=None) -> AnalyzerEngine:
241
+ registry = RecognizerRegistry()
242
+
243
+ if engine_type == "flair":
244
+
245
+ flair_recognizer = FlairRecognizer(
246
+ model=model,
247
+ supported_language=language
248
+ )
249
+ registry.add_recognizer(flair_recognizer)
250
+
251
+ default_registry = RecognizerRegistry()
252
+ default_registry.load_predefined_recognizers()
253
+
254
+ flair_handled_entities = {"PERSON", "LOCATION", "ORGANIZATION"}
255
+
256
+ for recognizer in default_registry.recognizers:
257
+ recognizer_entities = set(recognizer.supported_entities) if hasattr(recognizer, 'supported_entities') else set()
258
+
259
+ if recognizer_entities and recognizer_entities.issubset(flair_handled_entities):
260
+ continue
261
+
262
+ registry.add_recognizer(recognizer)
263
+
264
+ return AnalyzerEngine(
265
+ registry=registry,
266
+ supported_languages=[language]
267
+ )
268
+
269
+ else:
270
+ registry.load_predefined_recognizers()
271
+
272
+ if model is None:
273
+ raise ValueError("SpaCy model name must be provided")
274
+
275
+ configuration = {
276
+ "nlp_engine_name": "spacy",
277
+ "models": [{"lang_code": language, "model_name": model}],
278
+ }
279
+
280
+ provider = NlpEngineProvider(nlp_configuration=configuration)
281
+ nlp_engine = provider.create_engine()
282
+
283
+ return AnalyzerEngine(
284
+ nlp_engine=nlp_engine,
285
+ registry=registry,
286
+ supported_languages=[language],
287
+ )
288
+
289
+
290
+ def analyze_column_sample(column_values: pd.Series, analyzer: AnalyzerEngine, language: str,
291
+ entities: Optional[List[str]], score_threshold: float) -> Optional[str]:
292
+ sample_values = column_values.dropna().head(50)
293
+
294
+ if sample_values.empty:
295
+ return None
296
+
297
+ entity_counter = Counter()
298
+
299
+ for value in sample_values:
300
+ text = str(value).strip()
301
+
302
+ if not text:
303
+ continue
304
+
305
+ results = analyzer.analyze(
306
+ text=text,
307
+ language=language,
308
+ entities=entities if entities else None
309
+ )
310
+
311
+ for result in results:
312
+ if result.score >= score_threshold:
313
+ entity_counter[result.entity_type] += 1
314
+
315
+ if not entity_counter:
316
+ return None
317
+
318
+ most_common = entity_counter.most_common(1)[0]
319
+ total_detections = sum(entity_counter.values())
320
+
321
+ if most_common[1] > total_detections * 0.5:
322
+ return most_common[0]
323
+
324
+ return most_common[0] if entity_counter else None
325
+
326
+
327
+ def analyze_dataframe_optimized(df: pd.DataFrame, analyzer: AnalyzerEngine, language: str,
328
+ entities: Optional[List[str]] = None, score_threshold: float = 0.5) -> List[NERObject]:
329
+ ner_objects = []
330
+
331
+ for column_name in df.columns:
332
+ entity_type = analyze_column_sample(
333
+ df[column_name],
334
+ analyzer,
335
+ language,
336
+ entities,
337
+ score_threshold
338
+ )
339
+
340
+ if entity_type:
341
+ for idx, value in df[column_name].dropna().items():
342
+ text = str(value).strip()
343
+
344
+ if text:
345
+ ner_objects.append(NERObject(
346
+ name=text[:100],
347
+ label=entity_type,
348
+ score=0.9,
349
+ start=0,
350
+ count=1,
351
+ context=text[:100]
352
+ ))
353
+
354
+ return ner_objects
355
+
356
+
357
+ def compute_ner_presidio(
358
+ text,
359
+ language,
360
+ analyzer,
361
+ entities=None,
362
+ score_threshold=0.5,
363
+ context_width=150,
364
+ with_comentions=True,
365
+ with_context=True,
366
+ batch_size=32,
367
+ n_process=4
133
368
  ):
134
- sentence_starts = [0] + [len(s[0]) + 1 for s in sentences]
135
- del sentence_starts[-1]
136
- sentence_starts = list(np.cumsum(sentence_starts))
137
- text = "\n".join([s[0] for s in sentences])
138
- min_score = 1.0
139
- entities: list[NERObject] = []
140
-
141
- # FLAIR model (if not fast)
142
- if flair_model:
143
- input = [Sentence(sentence[0]) for sentence in sentences]
144
- flair_model.predict(input)
145
- output = [e for sentence in input for e in sentence.get_spans("ner")]
146
- flair_entities = [
147
- NERObject(
148
- name=entity.text,
149
- label=BASE_TO_ONTONOTES_LABELMAP.get(
150
- entity.annotation_layers["ner"][0].value,
151
- entity.annotation_layers["ner"][0].value,
152
- ),
153
- score=entity.score,
154
- start=sentence_starts[input.index(entity[0].sentence)] + entity[0].start_position,
369
+ if isinstance(text, pd.DataFrame):
370
+ if len(text) >= 100:
371
+ return analyze_dataframe_optimized(text, analyzer, language, entities, score_threshold)
372
+
373
+ else:
374
+ texts = []
375
+
376
+ for col in text.columns:
377
+ for idx, value in text[col].dropna().items():
378
+ text_value = str(value).strip()
379
+
380
+ if text_value:
381
+ texts.append(text_value)
382
+
383
+ text = "\n".join(texts)
384
+
385
+ elif isinstance(text, list):
386
+ batch_analyzer = BatchAnalyzerEngine(analyzer_engine=analyzer)
387
+
388
+ results_generator = batch_analyzer.analyze_iterator(
389
+ texts=text,
390
+ language=language,
391
+ batch_size=batch_size,
392
+ n_process=n_process,
393
+ entities=entities if entities else None,
394
+ )
395
+
396
+ all_results = list(results_generator)
397
+ ner_objects = []
398
+
399
+ for text_item, results in zip(text, all_results):
400
+ for result in results:
401
+ if result.score >= score_threshold:
402
+ context_start = max(0, result.start - 30)
403
+ context_end = min(len(text_item), result.end + 30)
404
+ context = text_item[context_start:context_end] if with_context else None
405
+
406
+ ner_objects.append(NERObject(
407
+ name=text_item[result.start:result.end],
408
+ label=result.entity_type,
409
+ score=float(result.score),
410
+ start=int(result.start),
411
+ count=1,
412
+ context=context
413
+ ))
414
+
415
+ return ner_objects
416
+
417
+ results = analyzer.analyze(
418
+ text=text,
419
+ language=language,
420
+ entities=entities if entities else None
421
+ )
422
+
423
+ ner_objects = []
424
+
425
+ for result in results:
426
+ if result.score >= score_threshold:
427
+ context_start = max(0, result.start - math.floor(context_width / 2))
428
+ context_end = min(len(text), result.end + math.ceil(context_width / 2))
429
+ context = text[context_start:context_end] if with_context else None
430
+
431
+ ner_objects.append(NERObject(
432
+ name=text[result.start:result.end],
433
+ label=result.entity_type,
434
+ score=float(result.score),
435
+ start=int(result.start),
155
436
  count=1,
156
- )
157
- for entity in output
158
- ]
159
- min_score = min([min_score] + [e.score for e in flair_entities])
160
- entities += flair_entities
161
- del flair_entities
162
-
163
- # REGEX model
164
- for label, regexes in REGEX_NER_MODELS.items():
165
- if not isinstance(regexes, list):
166
- regexes = [regexes]
167
- for regex in regexes:
168
- regex_entities = [
169
- NERObject(
170
- name=match.group(),
171
- label=label,
172
- score=min_score - 0.5,
173
- count=1,
174
- start=match.start(),
175
- )
176
- for match in re.finditer(regex, text)
177
- ]
178
- entities += regex_entities
179
- min_score = min([min_score] + [e.score for e in regex_entities])
180
-
181
- # SPACY model
182
- chunks = []
183
- chunk_start_offsets = []
184
- current_chunk = []
185
- current_length = 0
186
- offset = 0
187
- for sentence, _ in sentences:
188
- sentence_len = len(sentence) + 1
189
- if sentence_len > spacy_model.max_length:
190
- truncated = sentence[: spacy_model.max_length - 1]
191
- chunks.append(truncated)
192
- chunk_start_offsets.append(offset)
193
- offset += sentence_len
194
- continue
195
- if current_length + sentence_len > spacy_model.max_length:
196
- chunks.append("\n".join(current_chunk))
197
- chunk_start_offsets.append(offset - current_length)
198
- current_chunk = []
199
- current_length = 0
200
- current_chunk.append(sentence)
201
- current_length += sentence_len
202
- offset += sentence_len
203
- if current_chunk:
204
- chunks.append("\n".join(current_chunk))
205
- chunk_start_offsets.append(offset - current_length)
206
- for i, chunk in enumerate(chunks):
207
- doc = spacy_model(chunk)
208
- chunk_offset = chunk_start_offsets[i]
209
- for entity in doc.ents:
210
- entities.append(
211
- NERObject(
212
- name=entity.text,
213
- label=BASE_TO_ONTONOTES_LABELMAP.get(entity.label_, entity.label_),
214
- score=min_score - 0.5,
215
- start=chunk_offset + entity.start_char,
216
- count=1,
217
- )
218
- )
219
-
220
- # Reformatting for consistency
221
- if not entities:
222
- return []
223
- if with_scores:
224
- min_entity_score = min([e.score for e in entities])
225
- max_entity_score = max([e.score for e in entities])
226
- entity_score_range = 1 if min_entity_score == max_entity_score else (max_entity_score - min_entity_score)
227
- for e in entities:
228
- e.score = (e.score - min_entity_score) / entity_score_range
229
- scores = list(np.searchsorted(sentence_starts, [e.start + 1 for e in entities]))
230
- scores = [sentences[i - 1][1] for i in scores]
231
- scores = [scores[i] + 10 * entities[i].score for i in range(len(entities))]
232
- for i in range(len(entities)):
233
- entities[i].score = scores[i]
234
- else:
235
- for i in range(len(entities)):
236
- entities[i].score = 0.0
437
+ context=context
438
+ ))
439
+
237
440
  if with_comentions:
238
- for i in range(len(entities)):
239
- entity = entities[i]
441
+ for i in range(len(ner_objects)):
442
+ entity = ner_objects[i]
240
443
  comentions = [
241
- entities[j].name
242
- for j in range(len(entities))
243
- if j != i and abs(entities[j].start - entity.start) < math.ceil(context_width / 2)
444
+ ner_objects[j].name
445
+ for j in range(len(ner_objects))
446
+ if j != i and abs(ner_objects[j].start - entity.start) < math.ceil(context_width / 2)
244
447
  ]
245
- entities[i].comentions = comentions
246
- if with_context:
247
- for i in range(len(entities)):
248
- entity = entities[i]
249
- if entity.start >= 0 and entity.start < len(text):
250
- left = max(0, entity.start - math.floor(context_width / 2))
251
- right = min(len(text), entity.start + math.ceil(context_width / 2))
252
- context = ("[..]" if left > 0 else "") + text[left:right] + ("[..]" if right < len(text) else "")
253
- entities[i].context = context
254
- return entities
448
+ ner_objects[i].comentions = comentions
449
+
450
+ return ner_objects
255
451
 
256
452
 
257
453
  def get_extractive_summary(text, language, max_chars, fast=False, with_scores=False):
@@ -295,35 +491,94 @@ def get_extractive_summary(text, language, max_chars, fast=False, with_scores=Fa
295
491
 
296
492
 
297
493
  def ner_pipe(
298
- text,
299
- language,
300
- spacy_model,
301
- flair_model=None,
302
- fast=False,
303
- compression_ratio="auto",
304
- with_scores=True,
305
- with_comentions=True,
306
- with_context=True,
494
+ text,
495
+ language,
496
+ model,
497
+ engine_type="spacy",
498
+ fast=False,
499
+ compression_ratio="auto",
500
+ with_comentions=True,
501
+ with_context=True,
502
+ entities=None,
503
+ score_threshold=0.5,
504
+ batch_size=32,
505
+ n_process=4
307
506
  ):
308
- if compression_ratio == "auto":
309
- compression_ratio = max(1.0, len(text) / 15000) if fast else 1.0
310
- sentences = get_extractive_summary(text, language, int(len(text) / compression_ratio), fast=fast, with_scores=True)
311
- ner = compute_ner(language, sentences, spacy_model, flair_model, 150, with_scores, with_comentions, with_context)
507
+ analyzer = build_presidio_analyzer(
508
+ language=language,
509
+ engine_type=engine_type,
510
+ model=model,
511
+ )
512
+
513
+ if isinstance(text, pd.DataFrame):
514
+ ner = compute_ner_presidio(
515
+ text,
516
+ language,
517
+ analyzer,
518
+ entities,
519
+ score_threshold,
520
+ 150,
521
+ with_comentions,
522
+ with_context,
523
+ batch_size,
524
+ n_process
525
+ )
526
+ else:
527
+ if compression_ratio == "auto":
528
+ compression_ratio = max(1.0, len(text) / 15000) if fast else 1.0
529
+
530
+ if compression_ratio > 1.0:
531
+ sentences = get_extractive_summary(text, language, int(len(text) / compression_ratio), fast=fast,
532
+ with_scores=True)
533
+ text = " ".join([s[0] for s in sentences])
534
+
535
+ ner = compute_ner_presidio(
536
+ text,
537
+ language,
538
+ analyzer,
539
+ entities,
540
+ score_threshold,
541
+ 150,
542
+ with_comentions,
543
+ with_context,
544
+ batch_size,
545
+ n_process
546
+ )
547
+
312
548
  return ner
313
549
 
314
550
 
315
- def get_ner_handler(language, fast=False):
551
+ def get_ner_handler(
552
+ language,
553
+ model,
554
+ engine_type="spacy",
555
+ fast=False,
556
+ entities=None,
557
+ score_threshold=0.5,
558
+ batch_size=32,
559
+ n_process=4
560
+ ):
316
561
  try:
317
- get_nltk_tokenizer(language) # raises a LookupError if the language is not valid
562
+ get_nltk_tokenizer(language)
318
563
  except LookupError:
319
- language = "english"
320
- spacy_model = SPACY_NER_MODELS.get(language, SPACY_NER_MODELS["english"])()
321
- flair_model = None if fast else FLAIR_NER_MODELS.get(language, FLAIR_NER_MODELS["english"])()
322
- return lambda text, compression_ratio="auto", with_scores=True, with_comentions=True, with_context=True: ner_pipe(
323
- text, language, spacy_model, flair_model, fast, compression_ratio, with_scores, with_comentions, with_context
564
+ language = "en"
565
+
566
+ return lambda text, compression_ratio="auto", with_comentions=True, with_context=True: ner_pipe(
567
+ text,
568
+ language,
569
+ model,
570
+ engine_type,
571
+ fast,
572
+ compression_ratio,
573
+ with_comentions,
574
+ with_context,
575
+ entities,
576
+ score_threshold,
577
+ batch_size,
578
+ n_process
324
579
  )
325
580
 
326
581
 
327
582
  @st.cache_resource
328
- def get_cached_ner_handler(language, fast):
329
- return get_ner_handler(language, fast)
583
+ def get_cached_ner_handler(language, model):
584
+ return get_ner_handler(language, model)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.2.10
3
+ Version: 2.11a1
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,7 +1,7 @@
1
1
  streamlit_octostar_utils/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
2
2
  streamlit_octostar_utils/api_crafter/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
3
3
  streamlit_octostar_utils/api_crafter/celery.py,sha256=BXOTGN9egdD75qf-PkccLGAoniilB9PZ_NRchFIjWdw,30051
4
- streamlit_octostar_utils/api_crafter/fastapi.py,sha256=RKQrStPzG1I1pxsPJvGs_DRrnjlMJbVmu9ObMF2LgZ0,14368
4
+ streamlit_octostar_utils/api_crafter/fastapi.py,sha256=2bktT5Mwjs9XixWcOqUKMoLM_cgKl-cqZDUa2Imf4xA,14357
5
5
  streamlit_octostar_utils/api_crafter/nifi.py,sha256=yFs1HXpSVfWpOC1aJnNahjPofGzZ8fpuqvChloqM4rQ,45541
6
6
  streamlit_octostar_utils/api_crafter/parser/__init__.py,sha256=YeYWF6sdQiCFV_RKNW2t9Vs6KJExE2pbXxWTe_DOayY,107
7
7
  streamlit_octostar_utils/api_crafter/parser/combine_fields.py,sha256=ddc44xkajw8MU0peAX_263DL7rPXbTKbHUjpOhRgvyU,8790
@@ -21,7 +21,7 @@ streamlit_octostar_utils/core/threading/key_queue.py,sha256=7CJpj0gvZMQd8eC5wKQi
21
21
  streamlit_octostar_utils/core/timestamp.py,sha256=a3s4xfm1nctLzYsHOJxqoWIDTdbNY_yN1OByl8ahLc8,383
22
22
  streamlit_octostar_utils/nlp/__init__.py,sha256=BtlYDZK_xaEbc7Ju_7MznXbCVPZcdLn26xwR9qf_UhM,336
23
23
  streamlit_octostar_utils/nlp/language.py,sha256=BBBT8wtwWtVrCin5fNLMqGg5WdgHVotFkIvouk2qKh0,561
24
- streamlit_octostar_utils/nlp/ner.py,sha256=saE7A251JcAr6bFDGzRuSfXeqqRh5xbWRhgWbiKGeDM,13258
24
+ streamlit_octostar_utils/nlp/ner.py,sha256=ZKYVG33uoCupr-WmberQ0856cC1Fu_W5Da2NdeYtlBw,18561
25
25
  streamlit_octostar_utils/octostar/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
26
26
  streamlit_octostar_utils/octostar/client.py,sha256=NUvHe9asd65g4-hJ4CuUvUns-9dNWes1XZRJlO9eAAc,1690
27
27
  streamlit_octostar_utils/octostar/context.py,sha256=TpucK48EbeVy4vDqKd9UULEtr1JOY-_4nBs-rXZzESw,212
@@ -36,7 +36,7 @@ streamlit_octostar_utils/threading/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzp
36
36
  streamlit_octostar_utils/threading/async_task_manager.py,sha256=q7N6YZwUvIYMzkSHmsJNheNVCv93c03H6Hyg9uH8pvk,4747
37
37
  streamlit_octostar_utils/threading/session_callback_manager.py,sha256=LvZVP4g6tvKtYmI13f2j1sX_7hm61Groqp5xJine9_k,3973
38
38
  streamlit_octostar_utils/threading/session_state_hot_swapper.py,sha256=6eeCQI6A42hp4DmW2NQw2rbeR-k9N8DhfBKQdN_fbLU,811
39
- streamlit_octostar_utils-0.2.10.dist-info/METADATA,sha256=YKHSxwF_9RwZOr2uzqwhjZA_Q9LWhsMl-GEvanSL9mE,2330
40
- streamlit_octostar_utils-0.2.10.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
41
- streamlit_octostar_utils-0.2.10.dist-info/licenses/LICENSE,sha256=dkwVPyV03fPHHtERnF6RnvRXcll__tud9gWca1RcgnQ,1073
42
- streamlit_octostar_utils-0.2.10.dist-info/RECORD,,
39
+ streamlit_octostar_utils-2.11a1.dist-info/METADATA,sha256=HsaC9ySXFVacqX0l_i255QiqYKscq8b_0Edyp960xho,2330
40
+ streamlit_octostar_utils-2.11a1.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
41
+ streamlit_octostar_utils-2.11a1.dist-info/licenses/LICENSE,sha256=dkwVPyV03fPHHtERnF6RnvRXcll__tud9gWca1RcgnQ,1073
42
+ streamlit_octostar_utils-2.11a1.dist-info/RECORD,,