streamlit-octostar-utils 0.6.5.dev2__tar.gz → 0.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/PKG-INFO +2 -1
  2. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/pyproject.toml +2 -1
  3. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/core/opensearch_conversion.py +4 -1
  4. streamlit_octostar_utils-0.6.7/streamlit_octostar_utils/metadata/__init__.py +13 -0
  5. streamlit_octostar_utils-0.6.7/streamlit_octostar_utils/metadata/parent_meta.py +121 -0
  6. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/LICENSE +0 -0
  7. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/README.md +0 -0
  8. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/__init__.py +0 -0
  9. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  10. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
  11. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
  12. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  13. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/nifi.py +0 -0
  14. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
  15. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  16. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  17. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  18. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  19. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  20. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  21. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  22. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  23. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  24. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  25. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/core/__init__.py +0 -0
  26. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/core/dict.py +0 -0
  27. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/core/filetypes.py +0 -0
  28. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  29. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  30. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/core/timestamp.py +0 -0
  31. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  32. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  33. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/nlp/language.py +0 -0
  34. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/nlp/ner.py +0 -0
  35. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  36. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/octostar/client.py +0 -0
  37. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/octostar/context.py +0 -0
  38. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  39. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  40. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
  41. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  42. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/ontology/validation.py +0 -0
  43. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/style/__init__.py +0 -0
  44. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/style/common.py +0 -0
  45. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/threading/__init__.py +0 -0
  46. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  47. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  48. {streamlit_octostar_utils-0.6.5.dev2 → streamlit_octostar_utils-0.6.7}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.6.5.dev2
3
+ Version: 0.6.7
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -24,6 +24,7 @@ Requires-Dist: iso639-lang (>=2.0.0,<3.0.0) ; extra == "nlp"
24
24
  Requires-Dist: nltk (>=3.8.0,<4.0.0) ; extra == "nlp"
25
25
  Requires-Dist: numpy (>=1.20.0)
26
26
  Requires-Dist: octostar-streamlit (>=0.1.25,<0.2.0)
27
+ Requires-Dist: pillow (>=10.0.0)
27
28
  Requires-Dist: pottery (>=3.0.0,<4.0.0)
28
29
  Requires-Dist: presidio-analyzer (>=2.2.0,<3.0.0) ; extra == "nlp"
29
30
  Requires-Dist: psutil (>=5.9.0)
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.6.5-dev.2"
8
+ version = "0.6.7"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -43,6 +43,7 @@ pottery = "^3.0.0"
43
43
  slowapi = "^0.1.9"
44
44
  filetype = "^1.2.0"
45
45
  PyJWT = "^2.5.0"
46
+ pillow = ">=10.0.0"
46
47
  #octostar-python-client = "^1.0.0"
47
48
  py3langid = { version = ">=0.2.0,<0.3.0", optional = true }
48
49
  spacy-download = { version = "==1.1.0", optional = true }
@@ -44,6 +44,7 @@ conversion_matrix = {
44
44
  False,
45
45
  ),
46
46
  ("str", "object"): lambda d: (json.loads(d), True),
47
+ ("str", "flat_object"): lambda d: (json.loads(d), True),
47
48
  ("str", "nested"): lambda d: (json.loads(d), True),
48
49
  ("bool", "text"): lambda d: (str(d), False),
49
50
  ("bool", "keyword"): lambda d: (str(d), False),
@@ -102,11 +103,13 @@ conversion_matrix = {
102
103
  ),
103
104
  ("bytes", "binary"): lambda d: (base64.b64encode(d).decode("utf-8"), False),
104
105
  ("bytes", "object"): lambda d: (json.loads(d.decode("utf-8")), True),
106
+ ("bytes", "flat_object"): lambda d: (json.loads(d.decode("utf-8")), True),
105
107
  ("bytes", "nested"): lambda d: (json.loads(d.decode("utf-8")), True),
106
108
  ("dict", "text"): lambda d: (json.dumps(d), False),
107
109
  ("dict", "keyword"): lambda d: (json.dumps(d), False),
108
110
  ("dict", "boolean"): lambda d: (bool(d), False),
109
111
  ("dict", "object"): lambda d: (d, True),
112
+ ("dict", "flat_object"): lambda d: (d, True),
110
113
  ("dict", "nested"): lambda d: (d, True),
111
114
  }
112
115
 
@@ -225,7 +228,7 @@ def handle_data_dict(data, curr_mapping, key):
225
228
  if (
226
229
  curr_mapping
227
230
  and has_opensearch_type(curr_mapping)
228
- and curr_mapping.get("type") not in ["object", "nested"]
231
+ and curr_mapping.get("type") not in ["object", "nested", "flat_object"]
229
232
  ):
230
233
  convert_data_type(data, curr_mapping, key)
231
234
  else:
@@ -0,0 +1,13 @@
1
+ from streamlit_octostar_utils.metadata.parent_meta import (
2
+ EXIF_PASSTHROUGH_FIELDS,
3
+ OS_PARENT_META_PREFIX,
4
+ resolve_with_aliases,
5
+ select_parent_meta_passthrough,
6
+ )
7
+
8
+ __all__ = [
9
+ "EXIF_PASSTHROUGH_FIELDS",
10
+ "OS_PARENT_META_PREFIX",
11
+ "resolve_with_aliases",
12
+ "select_parent_meta_passthrough",
13
+ ]
@@ -0,0 +1,121 @@
1
+ """Parent->child EXIF passthrough protocol (convention + selector + reader).
2
+
3
+ Producer routes that create new derived files (face crops, video keyframes,
4
+ extracted audio, PDF page splits, ...) carry the parent's EXIF date/GPS tags
5
+ through to downstream extract-metadata by embedding
6
+ ``os_parent_meta_<canonical>`` keys into the child file's metadata container.
7
+
8
+ This module owns the **convention**: which canonical EXIF fields propagate
9
+ (:data:`EXIF_PASSTHROUGH_FIELDS`), what prefix wraps them
10
+ (:data:`OS_PARENT_META_PREFIX`), how a parent's exiftool dict is reduced to a
11
+ ready-to-embed payload (:func:`select_parent_meta_passthrough`), and how a
12
+ reader recovers values regardless of which container surfaced them
13
+ (:func:`resolve_with_aliases`).
14
+
15
+ The actual **embedding** into PNG ``tEXt`` chunks, PDF Info dicts, or ffmpeg
16
+ ``-metadata`` flags lives in each producer app -- it requires container-
17
+ specific libraries (PIL, PyMuPDF, ffmpeg) that don't belong in a shared
18
+ utility lib. Each producer imports the convention/selector from here and
19
+ implements its own stamper.
20
+
21
+ Keep producer-side embedders and this reader on the same suffix protocol:
22
+ :func:`resolve_with_aliases` is the only piece that knows what prefixes the
23
+ various containers slap on tag names, and a new container format means
24
+ changes here that both sides must see.
25
+ """
26
+
27
+ from typing import Any, Dict
28
+
29
+
30
+ OS_PARENT_META_PREFIX = "os_parent_meta_"
31
+
32
+ _PDF_CUSTOM_PREFIX = "pdf:docinfo:custom:"
33
+
34
+ EXIF_PASSTHROUGH_FIELDS = (
35
+ "EXIF:DateTimeOriginal",
36
+ "Composite:SubSecDateTimeOriginal",
37
+ "Composite:DateTimeOriginal",
38
+ "EXIF:CreateDate",
39
+ "Composite:SubSecCreateDate",
40
+ "QuickTime:CreateDate",
41
+ "QuickTime:MediaCreateDate",
42
+ "QuickTime:TrackCreateDate",
43
+ "PDF:CreateDate",
44
+ "XMP:CreateDate",
45
+ "XMP:DateCreated",
46
+ "IPTC:DateCreated",
47
+ "Composite:DateTimeCreated",
48
+ "EXIF:DateTime",
49
+ "EXIF:ModifyDate",
50
+ "Composite:SubSecModifyDate",
51
+ "PDF:ModDate",
52
+ "XMP:ModifyDate",
53
+ "Composite:GPSLatitude",
54
+ "EXIF:GPSLatitude",
55
+ "XMP:GPSLatitude",
56
+ "Composite:GPSLatitudeRef",
57
+ "EXIF:GPSLatitudeRef",
58
+ "XMP:GPSLatitudeRef",
59
+ "Composite:GPSLongitude",
60
+ "EXIF:GPSLongitude",
61
+ "XMP:GPSLongitude",
62
+ "Composite:GPSLongitudeRef",
63
+ "EXIF:GPSLongitudeRef",
64
+ "XMP:GPSLongitudeRef",
65
+ "Composite:GPSPosition",
66
+ )
67
+
68
+
69
+ def resolve_with_aliases(metadata: Dict[str, Any], key: str) -> Any:
70
+ """Return ``metadata[key]`` respecting ``os_parent_meta_<key>`` passthrough.
71
+
72
+ Lookup order:
73
+
74
+ 1. ``os_parent_meta_<key>`` -- written directly by producer routes that
75
+ can place a key into the child container under an exact name.
76
+ 2. ``pdf:docinfo:custom:os_parent_meta_<key>`` -- PDF custom property
77
+ container (mirrors the ``os_ContextBefore`` pattern used elsewhere).
78
+ 3. Any key ending with ``:os_parent_meta_<key>`` -- catches the
79
+ format-specific group prefix that exiftool prepends when reading
80
+ container-native tags (``PNG:`` for PNG tEXt, ``ID3:`` for MP3 TXXX,
81
+ ``RIFF:`` for WAV INFO, etc.).
82
+ 4. ``<key>`` -- canonical fallback (the value as it would appear if the
83
+ child carried real EXIF natively).
84
+ """
85
+ aliased = f"{OS_PARENT_META_PREFIX}{key}"
86
+ value = metadata.get(aliased)
87
+ if value is not None:
88
+ return value
89
+ value = metadata.get(f"{_PDF_CUSTOM_PREFIX}{aliased}")
90
+ if value is not None:
91
+ return value
92
+ suffix = f":{aliased}"
93
+ for k, v in metadata.items():
94
+ if v is not None and k.endswith(suffix):
95
+ return v
96
+ return metadata.get(key)
97
+
98
+
99
+ def select_parent_meta_passthrough(metadata: Any) -> Dict[str, str]:
100
+ """Pick EXIF date/GPS tags worth embedding into derived children.
101
+
102
+ Input is a parent's raw exiftool metadata dict (as stored on the parent
103
+ entity's ``extract:metadata`` annotation). Output is keyed by
104
+ ``os_parent_meta_<canonical>`` with stringified values, ready to feed
105
+ into a container-specific embedder owned by the producer app.
106
+
107
+ Uses :func:`resolve_with_aliases` so a parent that itself inherited tags
108
+ from *its* parent (e.g. a keyframe of an inherited-meta video) still
109
+ propagates the original values forward.
110
+ """
111
+ if not isinstance(metadata, dict):
112
+ return {}
113
+ out: Dict[str, str] = {}
114
+ for key in EXIF_PASSTHROUGH_FIELDS:
115
+ value = resolve_with_aliases(metadata, key)
116
+ if value is None:
117
+ continue
118
+ if isinstance(value, (list, tuple)):
119
+ value = ",".join(str(v) for v in value)
120
+ out[f"{OS_PARENT_META_PREFIX}{key}"] = str(value)
121
+ return out