trustgraph-base 2.2.15__tar.gz → 2.2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/PKG-INFO +2 -1
  2. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/pyproject.toml +1 -0
  3. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/library.py +3 -2
  4. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/__init__.py +1 -0
  5. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/async_processor.py +5 -5
  6. trustgraph_base-2.2.16/trustgraph/base/chunking_service.py +104 -0
  7. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/consumer.py +52 -47
  8. trustgraph_base-2.2.16/trustgraph/base/librarian_client.py +246 -0
  9. trustgraph_base-2.2.16/trustgraph/base/pubsub.py +121 -0
  10. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/pulsar_backend.py +2 -110
  11. trustgraph_base-2.2.16/trustgraph/base/rabbitmq_backend.py +390 -0
  12. trustgraph_base-2.2.16/trustgraph/base/serialization.py +115 -0
  13. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/subscriber.py +1 -1
  14. trustgraph_base-2.2.16/trustgraph/base_version.py +1 -0
  15. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/base.py +2 -9
  16. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/config_client.py +2 -6
  17. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/library.py +6 -3
  18. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/PKG-INFO +2 -1
  19. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/SOURCES.txt +3 -0
  20. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/requires.txt +1 -0
  21. trustgraph_base-2.2.15/trustgraph/base/chunking_service.py +0 -264
  22. trustgraph_base-2.2.15/trustgraph/base/pubsub.py +0 -72
  23. trustgraph_base-2.2.15/trustgraph/base_version.py +0 -1
  24. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/README.md +0 -0
  25. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/setup.cfg +0 -0
  26. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/__init__.py +0 -0
  27. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/api.py +0 -0
  28. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/async_bulk_client.py +0 -0
  29. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/async_flow.py +0 -0
  30. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/async_metrics.py +0 -0
  31. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/async_socket_client.py +0 -0
  32. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/bulk_client.py +0 -0
  33. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/collection.py +0 -0
  34. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/config.py +0 -0
  35. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/exceptions.py +0 -0
  36. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/explainability.py +0 -0
  37. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/flow.py +0 -0
  38. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/knowledge.py +0 -0
  39. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/metrics.py +0 -0
  40. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/socket_client.py +0 -0
  41. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/api/types.py +0 -0
  42. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/agent_client.py +0 -0
  43. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/agent_service.py +0 -0
  44. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/backend.py +0 -0
  45. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/cassandra_config.py +0 -0
  46. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/collection_config_handler.py +0 -0
  47. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/consumer_spec.py +0 -0
  48. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/document_embeddings_client.py +0 -0
  49. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/document_embeddings_query_service.py +0 -0
  50. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/document_embeddings_store_service.py +0 -0
  51. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/dynamic_tool_service.py +0 -0
  52. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/embeddings_client.py +0 -0
  53. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/embeddings_service.py +0 -0
  54. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/flow.py +0 -0
  55. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/flow_processor.py +0 -0
  56. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/graph_embeddings_client.py +0 -0
  57. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/graph_embeddings_query_service.py +0 -0
  58. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/graph_embeddings_store_service.py +0 -0
  59. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/graph_rag_client.py +0 -0
  60. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/llm_service.py +0 -0
  61. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/logging.py +0 -0
  62. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/metrics.py +0 -0
  63. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/parameter_spec.py +0 -0
  64. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/producer.py +0 -0
  65. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/producer_spec.py +0 -0
  66. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/prompt_client.py +0 -0
  67. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/publisher.py +0 -0
  68. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/request_response_spec.py +0 -0
  69. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/row_embeddings_query_client.py +0 -0
  70. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/spec.py +0 -0
  71. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/structured_query_client.py +0 -0
  72. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/subscriber_spec.py +0 -0
  73. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/text_completion_client.py +0 -0
  74. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/tool_client.py +0 -0
  75. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/tool_service.py +0 -0
  76. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/tool_service_client.py +0 -0
  77. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/triples_client.py +0 -0
  78. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/triples_query_service.py +0 -0
  79. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/base/triples_store_service.py +0 -0
  80. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/__init__.py +0 -0
  81. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/agent_client.py +0 -0
  82. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/document_embeddings_client.py +0 -0
  83. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/document_rag_client.py +0 -0
  84. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/embeddings_client.py +0 -0
  85. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/graph_embeddings_client.py +0 -0
  86. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/graph_rag_client.py +0 -0
  87. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/llm_client.py +0 -0
  88. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/prompt_client.py +0 -0
  89. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/row_embeddings_client.py +0 -0
  90. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/clients/triples_query_client.py +0 -0
  91. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/exceptions.py +0 -0
  92. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/knowledge/__init__.py +0 -0
  93. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/knowledge/defs.py +0 -0
  94. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/knowledge/document.py +0 -0
  95. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/knowledge/identifier.py +0 -0
  96. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/knowledge/organization.py +0 -0
  97. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/knowledge/publication.py +0 -0
  98. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/log_level.py +0 -0
  99. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/__init__.py +0 -0
  100. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/registry.py +0 -0
  101. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/__init__.py +0 -0
  102. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/agent.py +0 -0
  103. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/base.py +0 -0
  104. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/collection.py +0 -0
  105. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/config.py +0 -0
  106. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/diagnosis.py +0 -0
  107. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/document_loading.py +0 -0
  108. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/embeddings.py +0 -0
  109. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/embeddings_query.py +0 -0
  110. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/flow.py +0 -0
  111. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/knowledge.py +0 -0
  112. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/library.py +0 -0
  113. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/metadata.py +0 -0
  114. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/nlp_query.py +0 -0
  115. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/primitives.py +0 -0
  116. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/prompt.py +0 -0
  117. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/retrieval.py +0 -0
  118. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/rows_query.py +0 -0
  119. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/structured_query.py +0 -0
  120. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/text_completion.py +0 -0
  121. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/tool.py +0 -0
  122. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/triples.py +0 -0
  123. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/objects/__init__.py +0 -0
  124. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/objects/field.py +0 -0
  125. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/objects/object.py +0 -0
  126. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/provenance/__init__.py +0 -0
  127. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/provenance/agent.py +0 -0
  128. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/provenance/namespaces.py +0 -0
  129. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/provenance/triples.py +0 -0
  130. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/provenance/uris.py +0 -0
  131. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/provenance/vocabulary.py +0 -0
  132. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/rdf.py +0 -0
  133. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/__init__.py +0 -0
  134. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/core/__init__.py +0 -0
  135. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/core/metadata.py +0 -0
  136. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/core/primitives.py +0 -0
  137. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/core/topic.py +0 -0
  138. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/__init__.py +0 -0
  139. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/document.py +0 -0
  140. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/embeddings.py +0 -0
  141. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/graph.py +0 -0
  142. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/knowledge.py +0 -0
  143. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/nlp.py +0 -0
  144. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/object.py +0 -0
  145. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/rows.py +0 -0
  146. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/structured.py +0 -0
  147. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/__init__.py +0 -0
  148. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/agent.py +0 -0
  149. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/collection.py +0 -0
  150. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/config.py +0 -0
  151. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/diagnosis.py +0 -0
  152. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/flow.py +0 -0
  153. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/llm.py +0 -0
  154. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/lookup.py +0 -0
  155. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/nlp_query.py +0 -0
  156. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/prompt.py +0 -0
  157. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/query.py +0 -0
  158. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/retrieval.py +0 -0
  159. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/rows_query.py +0 -0
  160. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/storage.py +0 -0
  161. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/structured_query.py +0 -0
  162. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph/schema/services/tool_service.py +0 -0
  163. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/dependency_links.txt +0 -0
  164. {trustgraph_base-2.2.15 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trustgraph-base
3
- Version: 2.2.15
3
+ Version: 2.2.16
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Author-email: "trustgraph.ai" <security@trustgraph.ai>
6
6
  Project-URL: Homepage, https://github.com/trustgraph-ai/trustgraph
@@ -12,5 +12,6 @@ Requires-Dist: pulsar-client
12
12
  Requires-Dist: prometheus-client
13
13
  Requires-Dist: requests
14
14
  Requires-Dist: python-logging-loki
15
+ Requires-Dist: pika
15
16
 
16
17
  See https://trustgraph.ai/
@@ -14,6 +14,7 @@ dependencies = [
14
14
  "prometheus-client",
15
15
  "requests",
16
16
  "python-logging-loki",
17
+ "pika",
17
18
  ]
18
19
  classifiers = [
19
20
  "Programming Language :: Python :: 3",
@@ -22,8 +22,9 @@ logger = logging.getLogger(__name__)
22
22
  # Lower threshold provides progress feedback and resumability on slower connections
23
23
  CHUNKED_UPLOAD_THRESHOLD = 2 * 1024 * 1024
24
24
 
25
- # Default chunk size (5MB - S3 multipart minimum)
26
- DEFAULT_CHUNK_SIZE = 5 * 1024 * 1024
25
+ # Default chunk size (3MB - stays under broker message size limits
26
+ # after base64 encoding ~4MB)
27
+ DEFAULT_CHUNK_SIZE = 3 * 1024 * 1024
27
28
 
28
29
 
29
30
  def to_value(x):
@@ -14,6 +14,7 @@ from . producer_spec import ProducerSpec
14
14
  from . subscriber_spec import SubscriberSpec
15
15
  from . request_response_spec import RequestResponseSpec
16
16
  from . llm_service import LlmService, LlmResult, LlmChunk
17
+ from . librarian_client import LibrarianClient
17
18
  from . chunking_service import ChunkingService
18
19
  from . embeddings_service import EmbeddingsService
19
20
  from . embeddings_client import EmbeddingsClientSpec
@@ -68,11 +68,12 @@ class AsyncProcessor:
68
68
  processor = self.id, flow = None, name = "config",
69
69
  )
70
70
 
71
- # Subscribe to config queue
71
+ # Subscribe to config queue — exclusive so every processor
72
+ # gets its own copy of config pushes (broadcast pattern)
72
73
  self.config_sub_task = Consumer(
73
74
 
74
75
  taskgroup = self.taskgroup,
75
- backend = self.pubsub_backend, # Changed from client to backend
76
+ backend = self.pubsub_backend,
76
77
  subscriber = config_subscriber_id,
77
78
  flow = None,
78
79
 
@@ -83,9 +84,8 @@ class AsyncProcessor:
83
84
 
84
85
  metrics = config_consumer_metrics,
85
86
 
86
- # This causes new subscriptions to view the entire history of
87
- # configuration
88
- start_of_messages = True
87
+ start_of_messages = True,
88
+ consumer_type = 'exclusive',
89
89
  )
90
90
 
91
91
  self.running = True
@@ -0,0 +1,104 @@
1
+ """
2
+ Base chunking service that provides parameter specification functionality
3
+ for chunk-size and chunk-overlap parameters, and librarian client for
4
+ fetching large document content.
5
+ """
6
+
7
+ import asyncio
8
+ import base64
9
+ import logging
10
+
11
+ from .flow_processor import FlowProcessor
12
+ from .parameter_spec import ParameterSpec
13
+ from .librarian_client import LibrarianClient
14
+
15
+ # Module logger
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class ChunkingService(FlowProcessor):
20
+ """Base service for chunking processors with parameter specification support"""
21
+
22
+ def __init__(self, **params):
23
+
24
+ id = params.get("id", "chunker")
25
+
26
+ # Call parent constructor
27
+ super(ChunkingService, self).__init__(**params)
28
+
29
+ # Register parameter specifications for chunk-size and chunk-overlap
30
+ self.register_specification(
31
+ ParameterSpec(name="chunk-size")
32
+ )
33
+
34
+ self.register_specification(
35
+ ParameterSpec(name="chunk-overlap")
36
+ )
37
+
38
+ # Librarian client
39
+ self.librarian = LibrarianClient(
40
+ id=id,
41
+ backend=self.pubsub,
42
+ taskgroup=self.taskgroup,
43
+ )
44
+
45
+ logger.debug("ChunkingService initialized with parameter specifications")
46
+
47
+ async def start(self):
48
+ await super(ChunkingService, self).start()
49
+ await self.librarian.start()
50
+
51
+ async def get_document_text(self, doc):
52
+ """
53
+ Get text content from a TextDocument, fetching from librarian if needed.
54
+
55
+ Args:
56
+ doc: TextDocument with either inline text or document_id
57
+
58
+ Returns:
59
+ str: The document text content
60
+ """
61
+ if doc.document_id and not doc.text:
62
+ logger.info(f"Fetching document {doc.document_id} from librarian...")
63
+ text = await self.librarian.fetch_document_text(
64
+ document_id=doc.document_id,
65
+ user=doc.metadata.user,
66
+ )
67
+ logger.info(f"Fetched {len(text)} characters from librarian")
68
+ return text
69
+ else:
70
+ return doc.text.decode("utf-8")
71
+
72
+ async def chunk_document(self, msg, consumer, flow, default_chunk_size, default_chunk_overlap):
73
+ """
74
+ Extract chunk parameters from flow and return effective values
75
+
76
+ Args:
77
+ msg: The message being processed
78
+ consumer: The consumer instance
79
+ flow: The flow object containing parameters
80
+ default_chunk_size: Default chunk size if not configured
81
+ default_chunk_overlap: Default chunk overlap if not configured
82
+
83
+ Returns:
84
+ tuple: (chunk_size, chunk_overlap) effective values
85
+ """
86
+
87
+ chunk_size = default_chunk_size
88
+ chunk_overlap = default_chunk_overlap
89
+
90
+ try:
91
+ cs = flow.parameters.get("chunk-size")
92
+ if cs is not None:
93
+ chunk_size = int(cs)
94
+ except Exception as e:
95
+ logger.warning(f"Could not parse chunk-size parameter: {e}")
96
+
97
+ try:
98
+ co = flow.parameters.get("chunk-overlap")
99
+ if co is not None:
100
+ chunk_overlap = int(co)
101
+ except Exception as e:
102
+ logger.warning(f"Could not parse chunk-overlap parameter: {e}")
103
+
104
+ return chunk_size, chunk_overlap
@@ -32,6 +32,7 @@ class Consumer:
32
32
  rate_limit_retry_time = 10, rate_limit_timeout = 7200,
33
33
  reconnect_time = 5,
34
34
  concurrency = 1, # Number of concurrent requests to handle
35
+ consumer_type = 'shared',
35
36
  ):
36
37
 
37
38
  self.taskgroup = taskgroup
@@ -42,6 +43,8 @@ class Consumer:
42
43
  self.schema = schema
43
44
  self.handler = handler
44
45
 
46
+ self.consumer_type = consumer_type
47
+
45
48
  self.rate_limit_retry_time = rate_limit_retry_time
46
49
  self.rate_limit_timeout = rate_limit_timeout
47
50
 
@@ -93,33 +96,11 @@ class Consumer:
93
96
  if self.metrics:
94
97
  self.metrics.state("stopped")
95
98
 
96
- try:
97
-
98
- logger.info(f"Subscribing to topic: {self.topic}")
99
-
100
- # Determine initial position
101
- if self.start_of_messages:
102
- initial_pos = 'earliest'
103
- else:
104
- initial_pos = 'latest'
105
-
106
- # Create consumer via backend
107
- self.consumer = await asyncio.to_thread(
108
- self.backend.create_consumer,
109
- topic = self.topic,
110
- subscription = self.subscriber,
111
- schema = self.schema,
112
- initial_position = initial_pos,
113
- consumer_type = 'shared',
114
- )
115
-
116
- except Exception as e:
117
-
118
- logger.error(f"Consumer subscription exception: {e}", exc_info=True)
119
- await asyncio.sleep(self.reconnect_time)
120
- continue
121
-
122
- logger.info(f"Successfully subscribed to topic: {self.topic}")
99
+ # Determine initial position
100
+ if self.start_of_messages:
101
+ initial_pos = 'earliest'
102
+ else:
103
+ initial_pos = 'latest'
123
104
 
124
105
  if self.metrics:
125
106
  self.metrics.state("running")
@@ -128,14 +109,30 @@ class Consumer:
128
109
 
129
110
  logger.info(f"Starting {self.concurrency} receiver threads")
130
111
 
131
- async with asyncio.TaskGroup() as tg:
132
-
133
- tasks = []
134
-
135
- for i in range(0, self.concurrency):
136
- tasks.append(
137
- tg.create_task(self.consume_from_queue())
112
+ # Create one backend consumer per concurrent task.
113
+ # Each gets its own connection — required for backends
114
+ # like RabbitMQ where connections are not thread-safe.
115
+ consumers = []
116
+ for i in range(self.concurrency):
117
+ try:
118
+ logger.info(f"Subscribing to topic: {self.topic} (worker {i})")
119
+ c = await asyncio.to_thread(
120
+ self.backend.create_consumer,
121
+ topic = self.topic,
122
+ subscription = self.subscriber,
123
+ schema = self.schema,
124
+ initial_position = initial_pos,
125
+ consumer_type = self.consumer_type,
138
126
  )
127
+ consumers.append(c)
128
+ logger.info(f"Successfully subscribed to topic: {self.topic} (worker {i})")
129
+ except Exception as e:
130
+ logger.error(f"Consumer subscription exception (worker {i}): {e}", exc_info=True)
131
+ raise
132
+
133
+ async with asyncio.TaskGroup() as tg:
134
+ for c in consumers:
135
+ tg.create_task(self.consume_from_queue(c))
139
136
 
140
137
  if self.metrics:
141
138
  self.metrics.state("stopped")
@@ -143,23 +140,31 @@ class Consumer:
143
140
  except Exception as e:
144
141
 
145
142
  logger.error(f"Consumer loop exception: {e}", exc_info=True)
146
- self.consumer.unsubscribe()
147
- self.consumer.close()
148
- self.consumer = None
143
+ for c in consumers:
144
+ try:
145
+ c.unsubscribe()
146
+ c.close()
147
+ except Exception:
148
+ pass
149
+ consumers = []
149
150
  await asyncio.sleep(self.reconnect_time)
150
151
  continue
151
152
 
152
- if self.consumer:
153
- self.consumer.unsubscribe()
154
- self.consumer.close()
153
+ finally:
154
+ for c in consumers:
155
+ try:
156
+ c.unsubscribe()
157
+ c.close()
158
+ except Exception:
159
+ pass
155
160
 
156
- async def consume_from_queue(self):
161
+ async def consume_from_queue(self, consumer):
157
162
 
158
163
  while self.running:
159
164
 
160
165
  try:
161
166
  msg = await asyncio.to_thread(
162
- self.consumer.receive,
167
+ consumer.receive,
163
168
  timeout_millis=2000
164
169
  )
165
170
  except Exception as e:
@@ -168,9 +173,9 @@ class Consumer:
168
173
  continue
169
174
  raise e
170
175
 
171
- await self.handle_one_from_queue(msg)
176
+ await self.handle_one_from_queue(msg, consumer)
172
177
 
173
- async def handle_one_from_queue(self, msg):
178
+ async def handle_one_from_queue(self, msg, consumer):
174
179
 
175
180
  expiry = time.time() + self.rate_limit_timeout
176
181
 
@@ -183,7 +188,7 @@ class Consumer:
183
188
 
184
189
  # Message failed to be processed, this causes it to
185
190
  # be retried
186
- self.consumer.negative_acknowledge(msg)
191
+ consumer.negative_acknowledge(msg)
187
192
 
188
193
  if self.metrics:
189
194
  self.metrics.process("error")
@@ -206,7 +211,7 @@ class Consumer:
206
211
  logger.debug("Message processed successfully")
207
212
 
208
213
  # Acknowledge successful processing of the message
209
- self.consumer.acknowledge(msg)
214
+ consumer.acknowledge(msg)
210
215
 
211
216
  if self.metrics:
212
217
  self.metrics.process("success")
@@ -233,7 +238,7 @@ class Consumer:
233
238
 
234
239
  # Message failed to be processed, this causes it to
235
240
  # be retried
236
- self.consumer.negative_acknowledge(msg)
241
+ consumer.negative_acknowledge(msg)
237
242
 
238
243
  if self.metrics:
239
244
  self.metrics.process("error")
@@ -0,0 +1,246 @@
1
+ """
2
+ Shared librarian client for services that need to communicate
3
+ with the librarian via pub/sub.
4
+
5
+ Provides request-response and streaming operations over the message
6
+ broker, with proper support for large documents via stream-document.
7
+
8
+ Usage:
9
+ self.librarian = LibrarianClient(
10
+ id=id, backend=self.pubsub, taskgroup=self.taskgroup, **params
11
+ )
12
+ await self.librarian.start()
13
+ content = await self.librarian.fetch_document_content(doc_id, user)
14
+ """
15
+
16
+ import asyncio
17
+ import base64
18
+ import logging
19
+ import uuid
20
+
21
+ from .consumer import Consumer
22
+ from .producer import Producer
23
+ from .metrics import ConsumerMetrics, ProducerMetrics
24
+
25
+ from ..schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
26
+ from ..schema import librarian_request_queue, librarian_response_queue
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class LibrarianClient:
32
+ """Client for librarian request-response over the message broker."""
33
+
34
+ def __init__(self, id, backend, taskgroup, **params):
35
+
36
+ librarian_request_q = params.get(
37
+ "librarian_request_queue", librarian_request_queue,
38
+ )
39
+ librarian_response_q = params.get(
40
+ "librarian_response_queue", librarian_response_queue,
41
+ )
42
+
43
+ librarian_request_metrics = ProducerMetrics(
44
+ processor=id, flow=None, name="librarian-request",
45
+ )
46
+
47
+ self._producer = Producer(
48
+ backend=backend,
49
+ topic=librarian_request_q,
50
+ schema=LibrarianRequest,
51
+ metrics=librarian_request_metrics,
52
+ )
53
+
54
+ librarian_response_metrics = ConsumerMetrics(
55
+ processor=id, flow=None, name="librarian-response",
56
+ )
57
+
58
+ self._consumer = Consumer(
59
+ taskgroup=taskgroup,
60
+ backend=backend,
61
+ flow=None,
62
+ topic=librarian_response_q,
63
+ subscriber=f"{id}-librarian",
64
+ schema=LibrarianResponse,
65
+ handler=self._on_response,
66
+ metrics=librarian_response_metrics,
67
+ consumer_type='exclusive',
68
+ )
69
+
70
+ # Single-response requests: request_id -> asyncio.Future
71
+ self._pending = {}
72
+ # Streaming requests: request_id -> asyncio.Queue
73
+ self._streams = {}
74
+
75
+ async def start(self):
76
+ """Start the librarian producer and consumer."""
77
+ await self._producer.start()
78
+ await self._consumer.start()
79
+
80
+ async def _on_response(self, msg, consumer, flow):
81
+ """Route librarian responses to the right waiter."""
82
+ response = msg.value()
83
+ request_id = msg.properties().get("id")
84
+
85
+ if not request_id:
86
+ return
87
+
88
+ if request_id in self._pending:
89
+ future = self._pending.pop(request_id)
90
+ future.set_result(response)
91
+ elif request_id in self._streams:
92
+ await self._streams[request_id].put(response)
93
+
94
+ async def request(self, request, timeout=120):
95
+ """Send a request to the librarian and wait for a single response."""
96
+ request_id = str(uuid.uuid4())
97
+
98
+ future = asyncio.get_event_loop().create_future()
99
+ self._pending[request_id] = future
100
+
101
+ try:
102
+ await self._producer.send(
103
+ request, properties={"id": request_id},
104
+ )
105
+ response = await asyncio.wait_for(future, timeout=timeout)
106
+
107
+ if response.error:
108
+ raise RuntimeError(
109
+ f"Librarian error: {response.error.type}: "
110
+ f"{response.error.message}"
111
+ )
112
+
113
+ return response
114
+
115
+ except asyncio.TimeoutError:
116
+ self._pending.pop(request_id, None)
117
+ raise RuntimeError("Timeout waiting for librarian response")
118
+
119
+ async def stream(self, request, timeout=120):
120
+ """Send a request and collect streamed response chunks."""
121
+ request_id = str(uuid.uuid4())
122
+
123
+ q = asyncio.Queue()
124
+ self._streams[request_id] = q
125
+
126
+ try:
127
+ await self._producer.send(
128
+ request, properties={"id": request_id},
129
+ )
130
+
131
+ chunks = []
132
+ while True:
133
+ response = await asyncio.wait_for(q.get(), timeout=timeout)
134
+
135
+ if response.error:
136
+ raise RuntimeError(
137
+ f"Librarian error: {response.error.type}: "
138
+ f"{response.error.message}"
139
+ )
140
+
141
+ chunks.append(response)
142
+
143
+ if response.is_final:
144
+ break
145
+
146
+ return chunks
147
+
148
+ except asyncio.TimeoutError:
149
+ self._streams.pop(request_id, None)
150
+ raise RuntimeError("Timeout waiting for librarian stream")
151
+ finally:
152
+ self._streams.pop(request_id, None)
153
+
154
+ async def fetch_document_content(self, document_id, user, timeout=120):
155
+ """Fetch document content using streaming.
156
+
157
+ Returns base64-encoded content. Caller is responsible for decoding.
158
+ """
159
+ req = LibrarianRequest(
160
+ operation="stream-document",
161
+ document_id=document_id,
162
+ user=user,
163
+ )
164
+ chunks = await self.stream(req, timeout=timeout)
165
+
166
+ # Decode each chunk's base64 to raw bytes, concatenate,
167
+ # re-encode for the caller.
168
+ raw = b""
169
+ for chunk in chunks:
170
+ if chunk.content:
171
+ if isinstance(chunk.content, bytes):
172
+ raw += base64.b64decode(chunk.content)
173
+ else:
174
+ raw += base64.b64decode(
175
+ chunk.content.encode("utf-8")
176
+ )
177
+
178
+ return base64.b64encode(raw)
179
+
180
+ async def fetch_document_text(self, document_id, user, timeout=120):
181
+ """Fetch document content and decode as UTF-8 text."""
182
+ content = await self.fetch_document_content(
183
+ document_id, user, timeout=timeout,
184
+ )
185
+ return base64.b64decode(content).decode("utf-8")
186
+
187
+ async def fetch_document_metadata(self, document_id, user, timeout=120):
188
+ """Fetch document metadata from the librarian."""
189
+ req = LibrarianRequest(
190
+ operation="get-document-metadata",
191
+ document_id=document_id,
192
+ user=user,
193
+ )
194
+ response = await self.request(req, timeout=timeout)
195
+ return response.document_metadata
196
+
197
+ async def save_child_document(self, doc_id, parent_id, user, content,
198
+ document_type="chunk", title=None,
199
+ kind="text/plain", timeout=120):
200
+ """Save a child document to the librarian."""
201
+ if isinstance(content, str):
202
+ content = content.encode("utf-8")
203
+
204
+ doc_metadata = DocumentMetadata(
205
+ id=doc_id,
206
+ user=user,
207
+ kind=kind,
208
+ title=title or doc_id,
209
+ parent_id=parent_id,
210
+ document_type=document_type,
211
+ )
212
+
213
+ req = LibrarianRequest(
214
+ operation="add-child-document",
215
+ document_metadata=doc_metadata,
216
+ content=base64.b64encode(content).decode("utf-8"),
217
+ )
218
+
219
+ await self.request(req, timeout=timeout)
220
+ return doc_id
221
+
222
+ async def save_document(self, doc_id, user, content, title=None,
223
+ document_type="answer", kind="text/plain",
224
+ timeout=120):
225
+ """Save a document to the librarian."""
226
+ if isinstance(content, str):
227
+ content = content.encode("utf-8")
228
+
229
+ doc_metadata = DocumentMetadata(
230
+ id=doc_id,
231
+ user=user,
232
+ kind=kind,
233
+ title=title or doc_id,
234
+ document_type=document_type,
235
+ )
236
+
237
+ req = LibrarianRequest(
238
+ operation="add-document",
239
+ document_id=doc_id,
240
+ document_metadata=doc_metadata,
241
+ content=base64.b64encode(content).decode("utf-8"),
242
+ user=user,
243
+ )
244
+
245
+ await self.request(req, timeout=timeout)
246
+ return doc_id