streamlit-octostar-utils 2.11a4__py3-none-any.whl → 2.11a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,30 +26,3 @@ def detect_language(text, min_confidence=None):
26
26
  detected_lang = re.sub("[^A-Za-z]", "", detected_lang).lower()
27
27
  detected_lang = languages.to_name(detected_lang).lower()
28
28
  return detected_lang, confidence
29
-
30
- FLAIR_MODELS = {
31
- "en": "flair/ner-english-large",
32
- "es": "flair/ner-spanish-large",
33
- "de": "flair/ner-german-large",
34
- "nl": "flair/ner-dutch-large",
35
- "multi": "flair/ner-multi",
36
- "multi-fast": "flair/ner-multi-fast",
37
- }
38
-
39
- SPACY_MODELS = {
40
- "en": 'en_core_web_sm',
41
- }
42
-
43
- def load_language_model(language, type):
44
- from flair.models import SequenceTagger
45
- from spacy_download import load_spacy
46
-
47
- model = None
48
- match type:
49
- case "spacy":
50
- model_name = SPACY_MODELS.get(language, SPACY_MODELS["en"])
51
- model = load_spacy(model_name)
52
- case "flair":
53
- model_name = FLAIR_MODELS.get(language, "flair/ner-multi")
54
- model = SequenceTagger.load(model_name)
55
- return model
@@ -20,7 +20,7 @@ from sumy.summarizers.lsa import LsaSummarizer
20
20
  from sumy.summarizers.luhn import LuhnSummarizer
21
21
  from sumy.utils import get_stop_words
22
22
 
23
- from .language import alpha2_to_language
23
+ from .language import alpha2_to_language, language_to_alpha2
24
24
 
25
25
  BASE_ALLOWED_LABELS = ["PERSON", "ORG", "LOC", "NORP", "GPE", "PRODUCT", "DATE", "PHONE", "IP_ADDRESS", "EMAIL", "URL",
26
26
  "CRYPTO", "IBAN", "CREDIT_CARD", "US_SSN", "US_DRIVER_LICENSE", "US_PASSPORT", "MEDICAL_LICENSE"]
@@ -67,6 +67,38 @@ BASE_TO_RECOGNIZER_EXPANSIONS = {
67
67
 
68
68
  BASE_TO_ONTONOTES_LABELMAP = {"PER": "PERSON"}
69
69
 
70
+ FLAIR_MODELS = {
71
+ "en": "flair/ner-english-large",
72
+ "es": "flair/ner-spanish-large",
73
+ "de": "flair/ner-german-large",
74
+ "nl": "flair/ner-dutch-large",
75
+ "multi": "flair/ner-multi", # English, German, French, Spanish
76
+ "multi-fast": "flair/ner-multi-fast", # English, German, Dutch, Spanish
77
+ }
78
+
79
+ SPACY_MODELS = {
80
+ "en": "en_core_web_sm",
81
+ "es": "es_core_news_sm",
82
+ "fr": "fr_core_news_sm",
83
+ "de": "de_core_news_sm",
84
+ "it": "it_core_news_sm"
85
+ }
86
+
87
+ def load_language_model(language, type):
88
+ from flair.models import SequenceTagger
89
+
90
+ model = None
91
+
92
+ match type:
93
+ case "spacy":
94
+ model = SPACY_MODELS.get(language_to_alpha2(language), SPACY_MODELS["en"])
95
+
96
+ case "flair":
97
+ model_name = FLAIR_MODELS.get(language, "flair/ner-multi")
98
+ model = SequenceTagger.load(model_name)
99
+
100
+ return model
101
+
70
102
 
71
103
  class FlairRecognizer(EntityRecognizer):
72
104
  ENTITIES = [
@@ -83,15 +115,6 @@ class FlairRecognizer(EntityRecognizer):
83
115
  ({"ORG"}, {"ORG", "ORGANIZATION"}),
84
116
  ]
85
117
 
86
- MODEL_LANGUAGES = {
87
- "en": "flair/ner-english-large",
88
- "es": "flair/ner-spanish-large",
89
- "de": "flair/ner-german-large",
90
- "nl": "flair/ner-dutch-large",
91
- "multi": "flair/ner-multi",
92
- "multi-fast": "flair/ner-multi-fast",
93
- }
94
-
95
118
  PRESIDIO_EQUIVALENCES = {
96
119
  "PER": "PERSON",
97
120
  "LOC": "LOC",
@@ -549,6 +572,7 @@ def ner_pipe(
549
572
  engine_type="spacy",
550
573
  fast=False,
551
574
  compression_ratio="auto",
575
+ with_scores=False,
552
576
  with_comentions=True,
553
577
  with_context=True,
554
578
  entities=None,
@@ -556,6 +580,9 @@ def ner_pipe(
556
580
  batch_size=32,
557
581
  n_process=4
558
582
  ):
583
+ if with_scores:
584
+ raise NotImplementedError("with_scores functionality is not implemented yet")
585
+
559
586
  analyzer = build_presidio_analyzer(
560
587
  language=language,
561
588
  engine_type=engine_type,
@@ -613,13 +640,14 @@ def get_ner_handler(
613
640
  except LookupError:
614
641
  language = "en"
615
642
 
616
- return lambda text, compression_ratio="auto", with_comentions=True, with_context=True: ner_pipe(
643
+ return lambda text, compression_ratio="auto", with_scores=False, with_comentions=True, with_context=True: ner_pipe(
617
644
  text,
618
645
  language,
619
646
  model,
620
647
  engine_type,
621
648
  fast,
622
649
  compression_ratio,
650
+ with_scores,
623
651
  with_comentions,
624
652
  with_context,
625
653
  entities,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 2.11a4
3
+ Version: 2.11a5
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -20,8 +20,8 @@ streamlit_octostar_utils/core/threading/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEkt
20
20
  streamlit_octostar_utils/core/threading/key_queue.py,sha256=7CJpj0gvZMQd8eC5wKQi3Ak5SQQ4zQ1OPTs_OP_kD20,2255
21
21
  streamlit_octostar_utils/core/timestamp.py,sha256=a3s4xfm1nctLzYsHOJxqoWIDTdbNY_yN1OByl8ahLc8,383
22
22
  streamlit_octostar_utils/nlp/__init__.py,sha256=BtlYDZK_xaEbc7Ju_7MznXbCVPZcdLn26xwR9qf_UhM,336
23
- streamlit_octostar_utils/nlp/language.py,sha256=2d8Wq8wTuo_ehjZekuoe3bgJD52ieEiZKDUPdKdOxZ0,1699
24
- streamlit_octostar_utils/nlp/ner.py,sha256=BP32wkZUNaKVIyzREEAgluPfwiISmNE4uITg7g1p0PM,20381
23
+ streamlit_octostar_utils/nlp/language.py,sha256=l48rBoLLBpTZz40N2KWNSpAWc8smcWMtiiDXREhmLtE,926
24
+ streamlit_octostar_utils/nlp/ner.py,sha256=LwnGbQHoT2mitroc0WjM2lVjtSUW7OUhqNmLsLMpNYQ,21196
25
25
  streamlit_octostar_utils/octostar/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
26
26
  streamlit_octostar_utils/octostar/client.py,sha256=NUvHe9asd65g4-hJ4CuUvUns-9dNWes1XZRJlO9eAAc,1690
27
27
  streamlit_octostar_utils/octostar/context.py,sha256=TpucK48EbeVy4vDqKd9UULEtr1JOY-_4nBs-rXZzESw,212
@@ -36,7 +36,7 @@ streamlit_octostar_utils/threading/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzp
36
36
  streamlit_octostar_utils/threading/async_task_manager.py,sha256=q7N6YZwUvIYMzkSHmsJNheNVCv93c03H6Hyg9uH8pvk,4747
37
37
  streamlit_octostar_utils/threading/session_callback_manager.py,sha256=LvZVP4g6tvKtYmI13f2j1sX_7hm61Groqp5xJine9_k,3973
38
38
  streamlit_octostar_utils/threading/session_state_hot_swapper.py,sha256=6eeCQI6A42hp4DmW2NQw2rbeR-k9N8DhfBKQdN_fbLU,811
39
- streamlit_octostar_utils-2.11a4.dist-info/METADATA,sha256=kRXpmh9YsBrKHNIPYW0gzzASNEwJr8Yj5SJb6gnr4WU,2330
40
- streamlit_octostar_utils-2.11a4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
41
- streamlit_octostar_utils-2.11a4.dist-info/licenses/LICENSE,sha256=dkwVPyV03fPHHtERnF6RnvRXcll__tud9gWca1RcgnQ,1073
42
- streamlit_octostar_utils-2.11a4.dist-info/RECORD,,
39
+ streamlit_octostar_utils-2.11a5.dist-info/METADATA,sha256=sa3ksvvDUHpMWd_szqcaFI_x9u7dVwc9Ctj1gcAyujg,2330
40
+ streamlit_octostar_utils-2.11a5.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
41
+ streamlit_octostar_utils-2.11a5.dist-info/licenses/LICENSE,sha256=dkwVPyV03fPHHtERnF6RnvRXcll__tud9gWca1RcgnQ,1073
42
+ streamlit_octostar_utils-2.11a5.dist-info/RECORD,,