streamlit-octostar-utils 0.2.9__py3-none-any.whl → 2.11a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,36 +1,142 @@
1
- import re
1
+ import itertools
2
+ import math
3
+ from typing import Optional, List, Tuple
4
+ from pydantic import BaseModel, ConfigDict, Field
5
+ from collections import Counter
6
+
7
+ from presidio_analyzer import AnalyzerEngine, BatchAnalyzerEngine, RecognizerRegistry, AnalysisExplanation, \
8
+ EntityRecognizer, RecognizerResult
9
+ from presidio_analyzer.nlp_engine import NlpArtifacts, NlpEngineProvider
2
10
  import streamlit as st
3
- from spacy_download import load_spacy
11
+ import nltk
12
+ import pandas as pd
4
13
  from flair.data import Sentence
5
14
  from flair.models import SequenceTagger
15
+
6
16
  from sumy.parsers.plaintext import PlaintextParser
7
17
  from sumy.nlp.tokenizers import Tokenizer
8
18
  from sumy.nlp.stemmers import Stemmer
9
19
  from sumy.summarizers.lsa import LsaSummarizer
10
20
  from sumy.summarizers.luhn import LuhnSummarizer
11
21
  from sumy.utils import get_stop_words
12
- import itertools
13
- import numpy as np
14
- import math
15
- import nltk
16
- from typing import Optional, List
17
- from pydantic import BaseModel, ConfigDict, Field
18
22
 
19
- SPACY_NER_MODELS = {
20
- "english": lambda: load_spacy(
21
- "en_core_web_sm",
22
- disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"],
23
- )
24
- }
25
- FLAIR_NER_MODELS = {"english": lambda: SequenceTagger.load("flair/ner-english")}
26
- REGEX_NER_MODELS = {
27
- "IP_ADDRESS": [
28
- r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?::(?:[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]))?\b",
29
- ],
30
- "PHONE": r"(?:(?:\+(?:\d{1,3}[ .-]?)?(?:\(\d{1,3}\)[ .-]?)?)(?:\d{2,5}[ .-]?){1,3}|\d{2,5}[ .-]\d{2,5}(?:[ .-]\d{2,5}){0,2})\b",
31
- "EMAIL": r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)+\b",
32
- "URL": r"\b(?:(?:https?|ftp|sftp|ftps|ssh|file|mailto|git|onion|ipfs|ipns):\/\/|www\.)(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}(?::\d+)?(?:\/(?:[-a-z0-9\/_.,~%+:@]|(?:%[0-9a-f]{2}))*)?(?:\?(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?(?:#(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?|(?:https?:\/\/)?[a-z2-7]{16,56}\.onion(?:\/(?:[-a-z0-9\/_.,~%+:@]|(?:%[0-9a-f]{2}))*)?(?:\?(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)?(?:#(?:[-a-z0-9\/_.,~%+:@=&]|(?:%[0-9a-f]{2}))*)\b",
33
- }
23
+
24
+ class FlairRecognizer(EntityRecognizer):
25
+ ENTITIES = [
26
+ "LOCATION",
27
+ "PERSON",
28
+ "ORGANIZATION",
29
+ ]
30
+
31
+ DEFAULT_EXPLANATION = "Identified as {} by Flair's Named Entity Recognition"
32
+
33
+ CHECK_LABEL_GROUPS = [
34
+ ({"LOCATION"}, {"LOC", "LOCATION"}),
35
+ ({"PERSON"}, {"PER", "PERSON"}),
36
+ ({"ORGANIZATION"}, {"ORG"}),
37
+ ]
38
+
39
+ MODEL_LANGUAGES = {
40
+ "en": "flair/ner-english-large",
41
+ "es": "flair/ner-spanish-large",
42
+ "de": "flair/ner-german-large",
43
+ "nl": "flair/ner-dutch-large",
44
+ "multi": "flair/ner-multi",
45
+ "multi-fast": "flair/ner-multi-fast",
46
+ }
47
+
48
+ PRESIDIO_EQUIVALENCES = {
49
+ "PER": "PERSON",
50
+ "LOC": "LOCATION",
51
+ "ORG": "ORGANIZATION",
52
+ }
53
+
54
+ def __init__(
55
+ self,
56
+ model: SequenceTagger = None,
57
+ supported_language: str = "en",
58
+ supported_entities: Optional[List[str]] = None,
59
+ check_label_groups: Optional[Tuple[set, set]] = None,
60
+ ):
61
+ self.check_label_groups = (
62
+ check_label_groups if check_label_groups else self.CHECK_LABEL_GROUPS
63
+ )
64
+
65
+ supported_entities = supported_entities if supported_entities else self.ENTITIES
66
+ self.model = model
67
+
68
+ super().__init__(
69
+ supported_entities=supported_entities,
70
+ supported_language=supported_language,
71
+ name="Flair Analytics",
72
+ )
73
+
74
+ def load(self) -> None:
75
+ pass
76
+
77
+ def get_supported_entities(self) -> List[str]:
78
+ return self.supported_entities
79
+
80
+ def analyze(self, text: str, entities: List[str], nlp_artifacts: NlpArtifacts = None) -> List[RecognizerResult]:
81
+ results = []
82
+
83
+ sentences = Sentence(text)
84
+ self.model.predict(sentences)
85
+
86
+ if not entities:
87
+ entities = self.supported_entities
88
+
89
+ for entity in entities:
90
+ if entity not in self.supported_entities:
91
+ continue
92
+
93
+ for ent in sentences.get_spans("ner"):
94
+ if not self.__check_label(
95
+ entity, ent.labels[0].value, self.check_label_groups
96
+ ):
97
+ continue
98
+ textual_explanation = self.DEFAULT_EXPLANATION.format(
99
+ ent.labels[0].value
100
+ )
101
+ explanation = self.build_flair_explanation(
102
+ round(ent.score, 2), textual_explanation
103
+ )
104
+ flair_result = self._convert_to_recognizer_result(ent, explanation)
105
+
106
+ results.append(flair_result)
107
+
108
+ return results
109
+
110
+ def build_flair_explanation(self, original_score: float, explanation: str) -> AnalysisExplanation:
111
+ explanation = AnalysisExplanation(
112
+ recognizer=self.__class__.__name__,
113
+ original_score=original_score,
114
+ textual_explanation=explanation,
115
+ )
116
+ return explanation
117
+
118
+ def _convert_to_recognizer_result(self, entity, explanation) -> RecognizerResult:
119
+ entity_type = self.PRESIDIO_EQUIVALENCES.get(entity.tag, entity.tag)
120
+ flair_score = round(entity.score, 2)
121
+
122
+ flair_results = RecognizerResult(
123
+ entity_type=entity_type,
124
+ start=entity.start_position,
125
+ end=entity.end_position,
126
+ score=flair_score,
127
+ analysis_explanation=explanation,
128
+ )
129
+
130
+ return flair_results
131
+
132
+ @staticmethod
133
+ def __check_label(
134
+ entity: str, label: str, check_label_groups: Tuple[set, set]
135
+ ) -> bool:
136
+ return any(
137
+ [entity in egrp and label in lgrp for egrp, lgrp in check_label_groups]
138
+ )
139
+
34
140
 
35
141
  BASE_TO_ONTONOTES_LABELMAP = {"PER": "PERSON"}
36
142
  BASE_ALLOWED_LABELS = ["PERSON", "ORG", "LOC", "NORP", "GPE", "PRODUCT", "DATE", "PHONE", "IP_ADDRESS", "EMAIL", "URL"]
@@ -69,11 +175,18 @@ def _sumy__luhn_call(summarizer, document):
69
175
 
70
176
 
71
177
  def get_nltk_tokenizer(language: str) -> Tokenizer:
178
+ if language == "en":
179
+ nltk_lang = "english"
180
+ elif language == "it":
181
+ nltk_lang = "italian"
182
+ else:
183
+ nltk_lang = language
184
+
72
185
  try:
73
186
  nltk.data.find("tokenizers/punkt")
74
187
  except LookupError:
75
188
  nltk.download("punkt")
76
- return Tokenizer(language)
189
+ return Tokenizer(nltk_lang)
77
190
 
78
191
 
79
192
  class NERObject(BaseModel):
@@ -124,137 +237,217 @@ def postprocess_ner(entities: list[NERObject], whitelisted_labels=None, max_enti
124
237
  return final_entities
125
238
 
126
239
 
127
- def compute_ner(
128
- language,
129
- sentences,
130
- spacy_model,
131
- flair_model=None,
132
- context_width=150,
133
- with_scores=True,
134
- with_comentions=True,
135
- with_context=True,
240
+ def build_presidio_analyzer(language: str, engine_type: str = "spacy", model=None) -> AnalyzerEngine:
241
+ registry = RecognizerRegistry()
242
+
243
+ if engine_type == "flair":
244
+
245
+ flair_recognizer = FlairRecognizer(
246
+ model=model,
247
+ supported_language=language
248
+ )
249
+ registry.add_recognizer(flair_recognizer)
250
+
251
+ default_registry = RecognizerRegistry()
252
+ default_registry.load_predefined_recognizers()
253
+
254
+ flair_handled_entities = {"PERSON", "LOCATION", "ORGANIZATION"}
255
+
256
+ for recognizer in default_registry.recognizers:
257
+ recognizer_entities = set(recognizer.supported_entities) if hasattr(recognizer, 'supported_entities') else set()
258
+
259
+ if recognizer_entities and recognizer_entities.issubset(flair_handled_entities):
260
+ continue
261
+
262
+ registry.add_recognizer(recognizer)
263
+
264
+ return AnalyzerEngine(
265
+ registry=registry,
266
+ supported_languages=[language]
267
+ )
268
+
269
+ else:
270
+ registry.load_predefined_recognizers()
271
+
272
+ if model is None:
273
+ raise ValueError("SpaCy model name must be provided")
274
+
275
+ configuration = {
276
+ "nlp_engine_name": "spacy",
277
+ "models": [{"lang_code": language, "model_name": model}],
278
+ }
279
+
280
+ provider = NlpEngineProvider(nlp_configuration=configuration)
281
+ nlp_engine = provider.create_engine()
282
+
283
+ return AnalyzerEngine(
284
+ nlp_engine=nlp_engine,
285
+ registry=registry,
286
+ supported_languages=[language],
287
+ )
288
+
289
+
290
+ def analyze_column_sample(column_values: pd.Series, analyzer: AnalyzerEngine, language: str,
291
+ entities: Optional[List[str]], score_threshold: float) -> Optional[str]:
292
+ sample_values = column_values.dropna().head(50)
293
+
294
+ if sample_values.empty:
295
+ return None
296
+
297
+ entity_counter = Counter()
298
+
299
+ for value in sample_values:
300
+ text = str(value).strip()
301
+
302
+ if not text:
303
+ continue
304
+
305
+ results = analyzer.analyze(
306
+ text=text,
307
+ language=language,
308
+ entities=entities if entities else None
309
+ )
310
+
311
+ for result in results:
312
+ if result.score >= score_threshold:
313
+ entity_counter[result.entity_type] += 1
314
+
315
+ if not entity_counter:
316
+ return None
317
+
318
+ most_common = entity_counter.most_common(1)[0]
319
+ total_detections = sum(entity_counter.values())
320
+
321
+ if most_common[1] > total_detections * 0.5:
322
+ return most_common[0]
323
+
324
+ return most_common[0] if entity_counter else None
325
+
326
+
327
+ def analyze_dataframe_optimized(df: pd.DataFrame, analyzer: AnalyzerEngine, language: str,
328
+ entities: Optional[List[str]] = None, score_threshold: float = 0.5) -> List[NERObject]:
329
+ ner_objects = []
330
+
331
+ for column_name in df.columns:
332
+ entity_type = analyze_column_sample(
333
+ df[column_name],
334
+ analyzer,
335
+ language,
336
+ entities,
337
+ score_threshold
338
+ )
339
+
340
+ if entity_type:
341
+ for idx, value in df[column_name].dropna().items():
342
+ text = str(value).strip()
343
+
344
+ if text:
345
+ ner_objects.append(NERObject(
346
+ name=text[:100],
347
+ label=entity_type,
348
+ score=0.9,
349
+ start=0,
350
+ count=1,
351
+ context=text[:100]
352
+ ))
353
+
354
+ return ner_objects
355
+
356
+
357
+ def compute_ner_presidio(
358
+ text,
359
+ language,
360
+ analyzer,
361
+ entities=None,
362
+ score_threshold=0.5,
363
+ context_width=150,
364
+ with_comentions=True,
365
+ with_context=True,
366
+ batch_size=32,
367
+ n_process=4
136
368
  ):
137
- sentence_starts = [0] + [len(s[0]) + 1 for s in sentences]
138
- del sentence_starts[-1]
139
- sentence_starts = list(np.cumsum(sentence_starts))
140
- text = "\n".join([s[0] for s in sentences])
141
- min_score = 1.0
142
- entities: list[NERObject] = []
143
-
144
- # FLAIR model (if not fast)
145
- if flair_model:
146
- input = [Sentence(sentence[0]) for sentence in sentences]
147
- flair_model.predict(input)
148
- output = [e for sentence in input for e in sentence.get_spans("ner")]
149
- flair_entities = [
150
- NERObject(
151
- name=entity.text,
152
- label=BASE_TO_ONTONOTES_LABELMAP.get(
153
- entity.annotation_layers["ner"][0].value,
154
- entity.annotation_layers["ner"][0].value,
155
- ),
156
- score=entity.score,
157
- start=sentence_starts[input.index(entity[0].sentence)] + entity[0].start_position,
369
+ if isinstance(text, pd.DataFrame):
370
+ if len(text) >= 100:
371
+ return analyze_dataframe_optimized(text, analyzer, language, entities, score_threshold)
372
+
373
+ else:
374
+ texts = []
375
+
376
+ for col in text.columns:
377
+ for idx, value in text[col].dropna().items():
378
+ text_value = str(value).strip()
379
+
380
+ if text_value:
381
+ texts.append(text_value)
382
+
383
+ text = "\n".join(texts)
384
+
385
+ elif isinstance(text, list):
386
+ batch_analyzer = BatchAnalyzerEngine(analyzer_engine=analyzer)
387
+
388
+ results_generator = batch_analyzer.analyze_iterator(
389
+ texts=text,
390
+ language=language,
391
+ batch_size=batch_size,
392
+ n_process=n_process,
393
+ entities=entities if entities else None,
394
+ )
395
+
396
+ all_results = list(results_generator)
397
+ ner_objects = []
398
+
399
+ for text_item, results in zip(text, all_results):
400
+ for result in results:
401
+ if result.score >= score_threshold:
402
+ context_start = max(0, result.start - 30)
403
+ context_end = min(len(text_item), result.end + 30)
404
+ context = text_item[context_start:context_end] if with_context else None
405
+
406
+ ner_objects.append(NERObject(
407
+ name=text_item[result.start:result.end],
408
+ label=result.entity_type,
409
+ score=float(result.score),
410
+ start=int(result.start),
411
+ count=1,
412
+ context=context
413
+ ))
414
+
415
+ return ner_objects
416
+
417
+ results = analyzer.analyze(
418
+ text=text,
419
+ language=language,
420
+ entities=entities if entities else None
421
+ )
422
+
423
+ ner_objects = []
424
+
425
+ for result in results:
426
+ if result.score >= score_threshold:
427
+ context_start = max(0, result.start - math.floor(context_width / 2))
428
+ context_end = min(len(text), result.end + math.ceil(context_width / 2))
429
+ context = text[context_start:context_end] if with_context else None
430
+
431
+ ner_objects.append(NERObject(
432
+ name=text[result.start:result.end],
433
+ label=result.entity_type,
434
+ score=float(result.score),
435
+ start=int(result.start),
158
436
  count=1,
159
- )
160
- for entity in output
161
- ]
162
- min_score = min([min_score] + [e.score for e in flair_entities])
163
- entities += flair_entities
164
- del flair_entities
165
-
166
- # REGEX model
167
- for label, regexes in REGEX_NER_MODELS.items():
168
- if not isinstance(regexes, list):
169
- regexes = [regexes]
170
- for regex in regexes:
171
- regex_entities = [
172
- NERObject(
173
- name=match.group(),
174
- label=label,
175
- score=min_score - 0.5,
176
- count=1,
177
- start=match.start(),
178
- )
179
- for match in re.finditer(regex, text)
180
- ]
181
- entities += regex_entities
182
- min_score = min([min_score] + [e.score for e in regex_entities])
183
-
184
- # SPACY model
185
- chunks = []
186
- chunk_start_offsets = []
187
- current_chunk = []
188
- current_length = 0
189
- offset = 0
190
- for sentence, _ in sentences:
191
- sentence_len = len(sentence) + 1
192
- if sentence_len > spacy_model.max_length:
193
- truncated = sentence[: spacy_model.max_length - 1]
194
- chunks.append(truncated)
195
- chunk_start_offsets.append(offset)
196
- offset += sentence_len
197
- continue
198
- if current_length + sentence_len > spacy_model.max_length:
199
- chunks.append("\n".join(current_chunk))
200
- chunk_start_offsets.append(offset - current_length)
201
- current_chunk = []
202
- current_length = 0
203
- current_chunk.append(sentence)
204
- current_length += sentence_len
205
- offset += sentence_len
206
- if current_chunk:
207
- chunks.append("\n".join(current_chunk))
208
- chunk_start_offsets.append(offset - current_length)
209
- for i, chunk in enumerate(chunks):
210
- doc = spacy_model(chunk)
211
- chunk_offset = chunk_start_offsets[i]
212
- for entity in doc.ents:
213
- entities.append(
214
- NERObject(
215
- name=entity.text,
216
- label=BASE_TO_ONTONOTES_LABELMAP.get(entity.label_, entity.label_),
217
- score=min_score - 0.5,
218
- start=chunk_offset + entity.start_char,
219
- count=1,
220
- )
221
- )
222
-
223
- # Reformatting for consistency
224
- if not entities:
225
- return []
226
- if with_scores:
227
- min_entity_score = min([e.score for e in entities])
228
- max_entity_score = max([e.score for e in entities])
229
- entity_score_range = 1 if min_entity_score == max_entity_score else (max_entity_score - min_entity_score)
230
- for e in entities:
231
- e.score = (e.score - min_entity_score) / entity_score_range
232
- scores = list(np.searchsorted(sentence_starts, [e.start + 1 for e in entities]))
233
- scores = [sentences[i - 1][1] for i in scores]
234
- scores = [scores[i] + 10 * entities[i].score for i in range(len(entities))]
235
- for i in range(len(entities)):
236
- entities[i].score = scores[i]
237
- else:
238
- for i in range(len(entities)):
239
- entities[i].score = 0.0
437
+ context=context
438
+ ))
439
+
240
440
  if with_comentions:
241
- for i in range(len(entities)):
242
- entity = entities[i]
441
+ for i in range(len(ner_objects)):
442
+ entity = ner_objects[i]
243
443
  comentions = [
244
- entities[j].name
245
- for j in range(len(entities))
246
- if j != i and abs(entities[j].start - entity.start) < math.ceil(context_width / 2)
444
+ ner_objects[j].name
445
+ for j in range(len(ner_objects))
446
+ if j != i and abs(ner_objects[j].start - entity.start) < math.ceil(context_width / 2)
247
447
  ]
248
- entities[i].comentions = comentions
249
- if with_context:
250
- for i in range(len(entities)):
251
- entity = entities[i]
252
- if entity.start >= 0 and entity.start < len(text):
253
- left = max(0, entity.start - math.floor(context_width / 2))
254
- right = min(len(text), entity.start + math.ceil(context_width / 2))
255
- context = ("[..]" if left > 0 else "") + text[left:right] + ("[..]" if right < len(text) else "")
256
- entities[i].context = context
257
- return entities
448
+ ner_objects[i].comentions = comentions
449
+
450
+ return ner_objects
258
451
 
259
452
 
260
453
  def get_extractive_summary(text, language, max_chars, fast=False, with_scores=False):
@@ -298,35 +491,94 @@ def get_extractive_summary(text, language, max_chars, fast=False, with_scores=Fa
298
491
 
299
492
 
300
493
  def ner_pipe(
301
- text,
302
- language,
303
- spacy_model,
304
- flair_model=None,
305
- fast=False,
306
- compression_ratio="auto",
307
- with_scores=True,
308
- with_comentions=True,
309
- with_context=True,
494
+ text,
495
+ language,
496
+ model,
497
+ engine_type="spacy",
498
+ fast=False,
499
+ compression_ratio="auto",
500
+ with_comentions=True,
501
+ with_context=True,
502
+ entities=None,
503
+ score_threshold=0.5,
504
+ batch_size=32,
505
+ n_process=4
310
506
  ):
311
- if compression_ratio == "auto":
312
- compression_ratio = max(1.0, len(text) / 15000) if fast else 1.0
313
- sentences = get_extractive_summary(text, language, int(len(text) / compression_ratio), fast=fast, with_scores=True)
314
- ner = compute_ner(language, sentences, spacy_model, flair_model, 150, with_scores, with_comentions, with_context)
507
+ analyzer = build_presidio_analyzer(
508
+ language=language,
509
+ engine_type=engine_type,
510
+ model=model,
511
+ )
512
+
513
+ if isinstance(text, pd.DataFrame):
514
+ ner = compute_ner_presidio(
515
+ text,
516
+ language,
517
+ analyzer,
518
+ entities,
519
+ score_threshold,
520
+ 150,
521
+ with_comentions,
522
+ with_context,
523
+ batch_size,
524
+ n_process
525
+ )
526
+ else:
527
+ if compression_ratio == "auto":
528
+ compression_ratio = max(1.0, len(text) / 15000) if fast else 1.0
529
+
530
+ if compression_ratio > 1.0:
531
+ sentences = get_extractive_summary(text, language, int(len(text) / compression_ratio), fast=fast,
532
+ with_scores=True)
533
+ text = " ".join([s[0] for s in sentences])
534
+
535
+ ner = compute_ner_presidio(
536
+ text,
537
+ language,
538
+ analyzer,
539
+ entities,
540
+ score_threshold,
541
+ 150,
542
+ with_comentions,
543
+ with_context,
544
+ batch_size,
545
+ n_process
546
+ )
547
+
315
548
  return ner
316
549
 
317
550
 
318
- def get_ner_handler(language, fast=False):
551
+ def get_ner_handler(
552
+ language,
553
+ model,
554
+ engine_type="spacy",
555
+ fast=False,
556
+ entities=None,
557
+ score_threshold=0.5,
558
+ batch_size=32,
559
+ n_process=4
560
+ ):
319
561
  try:
320
- get_nltk_tokenizer(language) # raises a LookupError if the language is not valid
562
+ get_nltk_tokenizer(language)
321
563
  except LookupError:
322
- language = "english"
323
- spacy_model = SPACY_NER_MODELS.get(language, SPACY_NER_MODELS["english"])()
324
- flair_model = None if fast else FLAIR_NER_MODELS.get(language, FLAIR_NER_MODELS["english"])()
325
- return lambda text, compression_ratio="auto", with_scores=True, with_comentions=True, with_context=True: ner_pipe(
326
- text, language, spacy_model, flair_model, fast, compression_ratio, with_scores, with_comentions, with_context
564
+ language = "en"
565
+
566
+ return lambda text, compression_ratio="auto", with_comentions=True, with_context=True: ner_pipe(
567
+ text,
568
+ language,
569
+ model,
570
+ engine_type,
571
+ fast,
572
+ compression_ratio,
573
+ with_comentions,
574
+ with_context,
575
+ entities,
576
+ score_threshold,
577
+ batch_size,
578
+ n_process
327
579
  )
328
580
 
329
581
 
330
582
  @st.cache_resource
331
- def get_cached_ner_handler(language, fast):
332
- return get_ner_handler(language, fast)
583
+ def get_cached_ner_handler(language, model):
584
+ return get_ner_handler(language, model)
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.2.9
3
+ Version: 2.11a1
4
4
  Summary:
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Author: Octostar
7
8
  Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
8
9
  Classifier: License :: OSI Approved :: MIT License
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
12
13
  Classifier: Programming Language :: Python :: 3.11
13
14
  Classifier: Programming Language :: Python :: 3.12
14
15
  Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
15
17
  Provides-Extra: nlp
16
18
  Requires-Dist: PyJWT (>=2.5.0,<3.0.0)
17
19
  Requires-Dist: celery (>=5.3.0,<6.0.0)
@@ -21,7 +21,7 @@ streamlit_octostar_utils/core/threading/key_queue.py,sha256=7CJpj0gvZMQd8eC5wKQi
21
21
  streamlit_octostar_utils/core/timestamp.py,sha256=a3s4xfm1nctLzYsHOJxqoWIDTdbNY_yN1OByl8ahLc8,383
22
22
  streamlit_octostar_utils/nlp/__init__.py,sha256=BtlYDZK_xaEbc7Ju_7MznXbCVPZcdLn26xwR9qf_UhM,336
23
23
  streamlit_octostar_utils/nlp/language.py,sha256=BBBT8wtwWtVrCin5fNLMqGg5WdgHVotFkIvouk2qKh0,561
24
- streamlit_octostar_utils/nlp/ner.py,sha256=EAx4WNCH3jO9KQlK47hG3RMjnNpFadGl7CKfCQmSM7c,13326
24
+ streamlit_octostar_utils/nlp/ner.py,sha256=ZKYVG33uoCupr-WmberQ0856cC1Fu_W5Da2NdeYtlBw,18561
25
25
  streamlit_octostar_utils/octostar/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
26
26
  streamlit_octostar_utils/octostar/client.py,sha256=NUvHe9asd65g4-hJ4CuUvUns-9dNWes1XZRJlO9eAAc,1690
27
27
  streamlit_octostar_utils/octostar/context.py,sha256=TpucK48EbeVy4vDqKd9UULEtr1JOY-_4nBs-rXZzESw,212
@@ -36,7 +36,7 @@ streamlit_octostar_utils/threading/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzp
36
36
  streamlit_octostar_utils/threading/async_task_manager.py,sha256=q7N6YZwUvIYMzkSHmsJNheNVCv93c03H6Hyg9uH8pvk,4747
37
37
  streamlit_octostar_utils/threading/session_callback_manager.py,sha256=LvZVP4g6tvKtYmI13f2j1sX_7hm61Groqp5xJine9_k,3973
38
38
  streamlit_octostar_utils/threading/session_state_hot_swapper.py,sha256=6eeCQI6A42hp4DmW2NQw2rbeR-k9N8DhfBKQdN_fbLU,811
39
- streamlit_octostar_utils-0.2.9.dist-info/LICENSE,sha256=dkwVPyV03fPHHtERnF6RnvRXcll__tud9gWca1RcgnQ,1073
40
- streamlit_octostar_utils-0.2.9.dist-info/METADATA,sha256=weEgA5MLHog6prOtjARWZf-ZXoh2g9my67L8gwLOQLI,2256
41
- streamlit_octostar_utils-0.2.9.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
42
- streamlit_octostar_utils-0.2.9.dist-info/RECORD,,
39
+ streamlit_octostar_utils-2.11a1.dist-info/METADATA,sha256=HsaC9ySXFVacqX0l_i255QiqYKscq8b_0Edyp960xho,2330
40
+ streamlit_octostar_utils-2.11a1.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
41
+ streamlit_octostar_utils-2.11a1.dist-info/licenses/LICENSE,sha256=dkwVPyV03fPHHtERnF6RnvRXcll__tud9gWca1RcgnQ,1073
42
+ streamlit_octostar_utils-2.11a1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.3
2
+ Generator: poetry-core 2.2.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any