trustgraph-base 2.2.14__tar.gz → 2.2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/PKG-INFO +2 -1
  2. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/pyproject.toml +1 -0
  3. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/library.py +3 -2
  4. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/__init__.py +2 -1
  5. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/async_processor.py +9 -18
  6. trustgraph_base-2.2.16/trustgraph/base/chunking_service.py +104 -0
  7. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/consumer.py +52 -47
  8. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/flow_processor.py +0 -2
  9. trustgraph_base-2.2.16/trustgraph/base/librarian_client.py +246 -0
  10. trustgraph_base-2.2.16/trustgraph/base/pubsub.py +121 -0
  11. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/pulsar_backend.py +30 -129
  12. trustgraph_base-2.2.16/trustgraph/base/rabbitmq_backend.py +390 -0
  13. trustgraph_base-2.2.16/trustgraph/base/serialization.py +115 -0
  14. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/subscriber.py +1 -1
  15. trustgraph_base-2.2.16/trustgraph/base_version.py +1 -0
  16. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/agent_client.py +1 -7
  17. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/base.py +4 -21
  18. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/config_client.py +3 -13
  19. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/document_embeddings_client.py +1 -7
  20. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/document_rag_client.py +0 -7
  21. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/embeddings_client.py +1 -8
  22. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/graph_embeddings_client.py +1 -7
  23. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/graph_rag_client.py +0 -7
  24. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/llm_client.py +1 -7
  25. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/prompt_client.py +1 -7
  26. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/row_embeddings_client.py +1 -7
  27. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/triples_query_client.py +1 -7
  28. trustgraph_base-2.2.16/trustgraph/log_level.py +12 -0
  29. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/agent.py +6 -6
  30. trustgraph_base-2.2.16/trustgraph/messaging/translators/base.py +46 -0
  31. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/collection.py +4 -4
  32. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/config.py +6 -6
  33. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/diagnosis.py +6 -6
  34. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/document_loading.py +8 -8
  35. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/embeddings.py +6 -6
  36. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/embeddings_query.py +19 -19
  37. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/flow.py +6 -6
  38. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/knowledge.py +12 -12
  39. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/library.py +13 -13
  40. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/metadata.py +6 -6
  41. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/nlp_query.py +6 -6
  42. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/primitives.py +28 -28
  43. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/prompt.py +6 -6
  44. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/retrieval.py +12 -12
  45. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/rows_query.py +6 -6
  46. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/structured_query.py +6 -6
  47. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/text_completion.py +6 -6
  48. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/tool.py +6 -6
  49. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/triples.py +13 -13
  50. trustgraph_base-2.2.16/trustgraph/schema/core/topic.py +26 -0
  51. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/document.py +0 -1
  52. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/embeddings.py +0 -1
  53. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/graph.py +0 -1
  54. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/knowledge.py +3 -7
  55. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/nlp.py +0 -1
  56. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/object.py +0 -1
  57. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/rows.py +0 -1
  58. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/structured.py +0 -1
  59. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/agent.py +0 -1
  60. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/collection.py +3 -7
  61. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/config.py +4 -10
  62. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/flow.py +3 -7
  63. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/library.py +7 -8
  64. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/llm.py +0 -1
  65. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/lookup.py +0 -1
  66. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/nlp_query.py +0 -1
  67. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/prompt.py +0 -1
  68. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/query.py +5 -13
  69. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/retrieval.py +0 -1
  70. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/rows_query.py +0 -1
  71. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/structured_query.py +0 -1
  72. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/PKG-INFO +2 -1
  73. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/SOURCES.txt +3 -0
  74. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/requires.txt +1 -0
  75. trustgraph_base-2.2.14/trustgraph/base/chunking_service.py +0 -264
  76. trustgraph_base-2.2.14/trustgraph/base/pubsub.py +0 -110
  77. trustgraph_base-2.2.14/trustgraph/base_version.py +0 -1
  78. trustgraph_base-2.2.14/trustgraph/log_level.py +0 -20
  79. trustgraph_base-2.2.14/trustgraph/messaging/translators/base.py +0 -43
  80. trustgraph_base-2.2.14/trustgraph/schema/core/topic.py +0 -23
  81. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/README.md +0 -0
  82. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/setup.cfg +0 -0
  83. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/__init__.py +0 -0
  84. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/api.py +0 -0
  85. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/async_bulk_client.py +0 -0
  86. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/async_flow.py +0 -0
  87. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/async_metrics.py +0 -0
  88. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/async_socket_client.py +0 -0
  89. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/bulk_client.py +0 -0
  90. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/collection.py +0 -0
  91. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/config.py +0 -0
  92. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/exceptions.py +0 -0
  93. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/explainability.py +0 -0
  94. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/flow.py +0 -0
  95. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/knowledge.py +0 -0
  96. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/metrics.py +0 -0
  97. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/socket_client.py +0 -0
  98. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/api/types.py +0 -0
  99. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/agent_client.py +0 -0
  100. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/agent_service.py +0 -0
  101. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/backend.py +0 -0
  102. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/cassandra_config.py +0 -0
  103. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/collection_config_handler.py +0 -0
  104. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/consumer_spec.py +0 -0
  105. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/document_embeddings_client.py +0 -0
  106. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/document_embeddings_query_service.py +0 -0
  107. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/document_embeddings_store_service.py +0 -0
  108. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/dynamic_tool_service.py +0 -0
  109. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/embeddings_client.py +0 -0
  110. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/embeddings_service.py +0 -0
  111. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/flow.py +0 -0
  112. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/graph_embeddings_client.py +0 -0
  113. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/graph_embeddings_query_service.py +0 -0
  114. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/graph_embeddings_store_service.py +0 -0
  115. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/graph_rag_client.py +0 -0
  116. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/llm_service.py +0 -0
  117. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/logging.py +0 -0
  118. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/metrics.py +0 -0
  119. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/parameter_spec.py +0 -0
  120. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/producer.py +0 -0
  121. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/producer_spec.py +0 -0
  122. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/prompt_client.py +0 -0
  123. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/publisher.py +0 -0
  124. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/request_response_spec.py +0 -0
  125. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/row_embeddings_query_client.py +0 -0
  126. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/spec.py +0 -0
  127. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/structured_query_client.py +0 -0
  128. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/subscriber_spec.py +0 -0
  129. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/text_completion_client.py +0 -0
  130. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/tool_client.py +0 -0
  131. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/tool_service.py +0 -0
  132. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/tool_service_client.py +0 -0
  133. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/triples_client.py +0 -0
  134. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/triples_query_service.py +0 -0
  135. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/base/triples_store_service.py +0 -0
  136. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/clients/__init__.py +0 -0
  137. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/exceptions.py +0 -0
  138. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/knowledge/__init__.py +0 -0
  139. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/knowledge/defs.py +0 -0
  140. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/knowledge/document.py +0 -0
  141. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/knowledge/identifier.py +0 -0
  142. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/knowledge/organization.py +0 -0
  143. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/knowledge/publication.py +0 -0
  144. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/__init__.py +0 -0
  145. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/registry.py +0 -0
  146. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/messaging/translators/__init__.py +0 -0
  147. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/objects/__init__.py +0 -0
  148. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/objects/field.py +0 -0
  149. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/objects/object.py +0 -0
  150. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/provenance/__init__.py +0 -0
  151. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/provenance/agent.py +0 -0
  152. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/provenance/namespaces.py +0 -0
  153. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/provenance/triples.py +0 -0
  154. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/provenance/uris.py +0 -0
  155. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/provenance/vocabulary.py +0 -0
  156. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/rdf.py +0 -0
  157. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/__init__.py +0 -0
  158. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/core/__init__.py +0 -0
  159. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/core/metadata.py +0 -0
  160. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/core/primitives.py +0 -0
  161. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/knowledge/__init__.py +0 -0
  162. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/__init__.py +0 -0
  163. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/diagnosis.py +0 -0
  164. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/storage.py +0 -0
  165. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph/schema/services/tool_service.py +0 -0
  166. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/dependency_links.txt +0 -0
  167. {trustgraph_base-2.2.14 → trustgraph_base-2.2.16}/trustgraph_base.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trustgraph-base
3
- Version: 2.2.14
3
+ Version: 2.2.16
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Author-email: "trustgraph.ai" <security@trustgraph.ai>
6
6
  Project-URL: Homepage, https://github.com/trustgraph-ai/trustgraph
@@ -12,5 +12,6 @@ Requires-Dist: pulsar-client
12
12
  Requires-Dist: prometheus-client
13
13
  Requires-Dist: requests
14
14
  Requires-Dist: python-logging-loki
15
+ Requires-Dist: pika
15
16
 
16
17
  See https://trustgraph.ai/
@@ -14,6 +14,7 @@ dependencies = [
14
14
  "prometheus-client",
15
15
  "requests",
16
16
  "python-logging-loki",
17
+ "pika",
17
18
  ]
18
19
  classifiers = [
19
20
  "Programming Language :: Python :: 3",
@@ -22,8 +22,9 @@ logger = logging.getLogger(__name__)
22
22
  # Lower threshold provides progress feedback and resumability on slower connections
23
23
  CHUNKED_UPLOAD_THRESHOLD = 2 * 1024 * 1024
24
24
 
25
- # Default chunk size (5MB - S3 multipart minimum)
26
- DEFAULT_CHUNK_SIZE = 5 * 1024 * 1024
25
+ # Default chunk size (3MB - stays under broker message size limits
26
+ # after base64 encoding ~4MB)
27
+ DEFAULT_CHUNK_SIZE = 3 * 1024 * 1024
27
28
 
28
29
 
29
30
  def to_value(x):
@@ -1,5 +1,5 @@
1
1
 
2
- from . pubsub import PulsarClient, get_pubsub
2
+ from . pubsub import get_pubsub, add_pubsub_args
3
3
  from . async_processor import AsyncProcessor
4
4
  from . consumer import Consumer
5
5
  from . producer import Producer
@@ -14,6 +14,7 @@ from . producer_spec import ProducerSpec
14
14
  from . subscriber_spec import SubscriberSpec
15
15
  from . request_response_spec import RequestResponseSpec
16
16
  from . llm_service import LlmService, LlmResult, LlmChunk
17
+ from . librarian_client import LibrarianClient
17
18
  from . chunking_service import ChunkingService
18
19
  from . embeddings_service import EmbeddingsService
19
20
  from . embeddings_client import EmbeddingsClientSpec
@@ -6,7 +6,6 @@
6
6
 
7
7
  import asyncio
8
8
  import argparse
9
- import _pulsar
10
9
  import time
11
10
  import uuid
12
11
  import logging
@@ -15,7 +14,7 @@ from prometheus_client import start_http_server, Info
15
14
 
16
15
  from .. schema import ConfigPush, config_push_queue
17
16
  from .. log_level import LogLevel
18
- from . pubsub import PulsarClient, get_pubsub
17
+ from . pubsub import get_pubsub, add_pubsub_args
19
18
  from . producer import Producer
20
19
  from . consumer import Consumer
21
20
  from . metrics import ProcessorMetrics, ConsumerMetrics
@@ -69,11 +68,12 @@ class AsyncProcessor:
69
68
  processor = self.id, flow = None, name = "config",
70
69
  )
71
70
 
72
- # Subscribe to config queue
71
+ # Subscribe to config queue — exclusive so every processor
72
+ # gets its own copy of config pushes (broadcast pattern)
73
73
  self.config_sub_task = Consumer(
74
74
 
75
75
  taskgroup = self.taskgroup,
76
- backend = self.pubsub_backend, # Changed from client to backend
76
+ backend = self.pubsub_backend,
77
77
  subscriber = config_subscriber_id,
78
78
  flow = None,
79
79
 
@@ -84,9 +84,8 @@ class AsyncProcessor:
84
84
 
85
85
  metrics = config_consumer_metrics,
86
86
 
87
- # This causes new subscriptions to view the entire history of
88
- # configuration
89
- start_of_messages = True
87
+ start_of_messages = True,
88
+ consumer_type = 'exclusive',
90
89
  )
91
90
 
92
91
  self.running = True
@@ -223,8 +222,8 @@ class AsyncProcessor:
223
222
  logger.info("Keyboard interrupt.")
224
223
  return
225
224
 
226
- except _pulsar.Interrupted:
227
- logger.info("Pulsar Interrupted.")
225
+ except KeyboardInterrupt:
226
+ logger.info("Interrupted.")
228
227
  return
229
228
 
230
229
  # Exceptions from a taskgroup come in as an exception group
@@ -250,15 +249,7 @@ class AsyncProcessor:
250
249
  @staticmethod
251
250
  def add_args(parser):
252
251
 
253
- # Pub/sub backend selection
254
- parser.add_argument(
255
- '--pubsub-backend',
256
- default=os.getenv('PUBSUB_BACKEND', 'pulsar'),
257
- choices=['pulsar', 'mqtt'],
258
- help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)',
259
- )
260
-
261
- PulsarClient.add_args(parser)
252
+ add_pubsub_args(parser)
262
253
  add_logging_args(parser)
263
254
 
264
255
  parser.add_argument(
@@ -0,0 +1,104 @@
1
+ """
2
+ Base chunking service that provides parameter specification functionality
3
+ for chunk-size and chunk-overlap parameters, and librarian client for
4
+ fetching large document content.
5
+ """
6
+
7
+ import asyncio
8
+ import base64
9
+ import logging
10
+
11
+ from .flow_processor import FlowProcessor
12
+ from .parameter_spec import ParameterSpec
13
+ from .librarian_client import LibrarianClient
14
+
15
+ # Module logger
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class ChunkingService(FlowProcessor):
20
+ """Base service for chunking processors with parameter specification support"""
21
+
22
+ def __init__(self, **params):
23
+
24
+ id = params.get("id", "chunker")
25
+
26
+ # Call parent constructor
27
+ super(ChunkingService, self).__init__(**params)
28
+
29
+ # Register parameter specifications for chunk-size and chunk-overlap
30
+ self.register_specification(
31
+ ParameterSpec(name="chunk-size")
32
+ )
33
+
34
+ self.register_specification(
35
+ ParameterSpec(name="chunk-overlap")
36
+ )
37
+
38
+ # Librarian client
39
+ self.librarian = LibrarianClient(
40
+ id=id,
41
+ backend=self.pubsub,
42
+ taskgroup=self.taskgroup,
43
+ )
44
+
45
+ logger.debug("ChunkingService initialized with parameter specifications")
46
+
47
+ async def start(self):
48
+ await super(ChunkingService, self).start()
49
+ await self.librarian.start()
50
+
51
+ async def get_document_text(self, doc):
52
+ """
53
+ Get text content from a TextDocument, fetching from librarian if needed.
54
+
55
+ Args:
56
+ doc: TextDocument with either inline text or document_id
57
+
58
+ Returns:
59
+ str: The document text content
60
+ """
61
+ if doc.document_id and not doc.text:
62
+ logger.info(f"Fetching document {doc.document_id} from librarian...")
63
+ text = await self.librarian.fetch_document_text(
64
+ document_id=doc.document_id,
65
+ user=doc.metadata.user,
66
+ )
67
+ logger.info(f"Fetched {len(text)} characters from librarian")
68
+ return text
69
+ else:
70
+ return doc.text.decode("utf-8")
71
+
72
+ async def chunk_document(self, msg, consumer, flow, default_chunk_size, default_chunk_overlap):
73
+ """
74
+ Extract chunk parameters from flow and return effective values
75
+
76
+ Args:
77
+ msg: The message being processed
78
+ consumer: The consumer instance
79
+ flow: The flow object containing parameters
80
+ default_chunk_size: Default chunk size if not configured
81
+ default_chunk_overlap: Default chunk overlap if not configured
82
+
83
+ Returns:
84
+ tuple: (chunk_size, chunk_overlap) effective values
85
+ """
86
+
87
+ chunk_size = default_chunk_size
88
+ chunk_overlap = default_chunk_overlap
89
+
90
+ try:
91
+ cs = flow.parameters.get("chunk-size")
92
+ if cs is not None:
93
+ chunk_size = int(cs)
94
+ except Exception as e:
95
+ logger.warning(f"Could not parse chunk-size parameter: {e}")
96
+
97
+ try:
98
+ co = flow.parameters.get("chunk-overlap")
99
+ if co is not None:
100
+ chunk_overlap = int(co)
101
+ except Exception as e:
102
+ logger.warning(f"Could not parse chunk-overlap parameter: {e}")
103
+
104
+ return chunk_size, chunk_overlap
@@ -32,6 +32,7 @@ class Consumer:
32
32
  rate_limit_retry_time = 10, rate_limit_timeout = 7200,
33
33
  reconnect_time = 5,
34
34
  concurrency = 1, # Number of concurrent requests to handle
35
+ consumer_type = 'shared',
35
36
  ):
36
37
 
37
38
  self.taskgroup = taskgroup
@@ -42,6 +43,8 @@ class Consumer:
42
43
  self.schema = schema
43
44
  self.handler = handler
44
45
 
46
+ self.consumer_type = consumer_type
47
+
45
48
  self.rate_limit_retry_time = rate_limit_retry_time
46
49
  self.rate_limit_timeout = rate_limit_timeout
47
50
 
@@ -93,33 +96,11 @@ class Consumer:
93
96
  if self.metrics:
94
97
  self.metrics.state("stopped")
95
98
 
96
- try:
97
-
98
- logger.info(f"Subscribing to topic: {self.topic}")
99
-
100
- # Determine initial position
101
- if self.start_of_messages:
102
- initial_pos = 'earliest'
103
- else:
104
- initial_pos = 'latest'
105
-
106
- # Create consumer via backend
107
- self.consumer = await asyncio.to_thread(
108
- self.backend.create_consumer,
109
- topic = self.topic,
110
- subscription = self.subscriber,
111
- schema = self.schema,
112
- initial_position = initial_pos,
113
- consumer_type = 'shared',
114
- )
115
-
116
- except Exception as e:
117
-
118
- logger.error(f"Consumer subscription exception: {e}", exc_info=True)
119
- await asyncio.sleep(self.reconnect_time)
120
- continue
121
-
122
- logger.info(f"Successfully subscribed to topic: {self.topic}")
99
+ # Determine initial position
100
+ if self.start_of_messages:
101
+ initial_pos = 'earliest'
102
+ else:
103
+ initial_pos = 'latest'
123
104
 
124
105
  if self.metrics:
125
106
  self.metrics.state("running")
@@ -128,14 +109,30 @@ class Consumer:
128
109
 
129
110
  logger.info(f"Starting {self.concurrency} receiver threads")
130
111
 
131
- async with asyncio.TaskGroup() as tg:
132
-
133
- tasks = []
134
-
135
- for i in range(0, self.concurrency):
136
- tasks.append(
137
- tg.create_task(self.consume_from_queue())
112
+ # Create one backend consumer per concurrent task.
113
+ # Each gets its own connection — required for backends
114
+ # like RabbitMQ where connections are not thread-safe.
115
+ consumers = []
116
+ for i in range(self.concurrency):
117
+ try:
118
+ logger.info(f"Subscribing to topic: {self.topic} (worker {i})")
119
+ c = await asyncio.to_thread(
120
+ self.backend.create_consumer,
121
+ topic = self.topic,
122
+ subscription = self.subscriber,
123
+ schema = self.schema,
124
+ initial_position = initial_pos,
125
+ consumer_type = self.consumer_type,
138
126
  )
127
+ consumers.append(c)
128
+ logger.info(f"Successfully subscribed to topic: {self.topic} (worker {i})")
129
+ except Exception as e:
130
+ logger.error(f"Consumer subscription exception (worker {i}): {e}", exc_info=True)
131
+ raise
132
+
133
+ async with asyncio.TaskGroup() as tg:
134
+ for c in consumers:
135
+ tg.create_task(self.consume_from_queue(c))
139
136
 
140
137
  if self.metrics:
141
138
  self.metrics.state("stopped")
@@ -143,23 +140,31 @@ class Consumer:
143
140
  except Exception as e:
144
141
 
145
142
  logger.error(f"Consumer loop exception: {e}", exc_info=True)
146
- self.consumer.unsubscribe()
147
- self.consumer.close()
148
- self.consumer = None
143
+ for c in consumers:
144
+ try:
145
+ c.unsubscribe()
146
+ c.close()
147
+ except Exception:
148
+ pass
149
+ consumers = []
149
150
  await asyncio.sleep(self.reconnect_time)
150
151
  continue
151
152
 
152
- if self.consumer:
153
- self.consumer.unsubscribe()
154
- self.consumer.close()
153
+ finally:
154
+ for c in consumers:
155
+ try:
156
+ c.unsubscribe()
157
+ c.close()
158
+ except Exception:
159
+ pass
155
160
 
156
- async def consume_from_queue(self):
161
+ async def consume_from_queue(self, consumer):
157
162
 
158
163
  while self.running:
159
164
 
160
165
  try:
161
166
  msg = await asyncio.to_thread(
162
- self.consumer.receive,
167
+ consumer.receive,
163
168
  timeout_millis=2000
164
169
  )
165
170
  except Exception as e:
@@ -168,9 +173,9 @@ class Consumer:
168
173
  continue
169
174
  raise e
170
175
 
171
- await self.handle_one_from_queue(msg)
176
+ await self.handle_one_from_queue(msg, consumer)
172
177
 
173
- async def handle_one_from_queue(self, msg):
178
+ async def handle_one_from_queue(self, msg, consumer):
174
179
 
175
180
  expiry = time.time() + self.rate_limit_timeout
176
181
 
@@ -183,7 +188,7 @@ class Consumer:
183
188
 
184
189
  # Message failed to be processed, this causes it to
185
190
  # be retried
186
- self.consumer.negative_acknowledge(msg)
191
+ consumer.negative_acknowledge(msg)
187
192
 
188
193
  if self.metrics:
189
194
  self.metrics.process("error")
@@ -206,7 +211,7 @@ class Consumer:
206
211
  logger.debug("Message processed successfully")
207
212
 
208
213
  # Acknowledge successful processing of the message
209
- self.consumer.acknowledge(msg)
214
+ consumer.acknowledge(msg)
210
215
 
211
216
  if self.metrics:
212
217
  self.metrics.process("success")
@@ -233,7 +238,7 @@ class Consumer:
233
238
 
234
239
  # Message failed to be processed, this causes it to
235
240
  # be retried
236
- self.consumer.negative_acknowledge(msg)
241
+ consumer.negative_acknowledge(msg)
237
242
 
238
243
  if self.metrics:
239
244
  self.metrics.process("error")
@@ -6,8 +6,6 @@
6
6
  import json
7
7
  import logging
8
8
 
9
- from pulsar.schema import JsonSchema
10
-
11
9
  from .. schema import Error
12
10
  from .. schema import config_request_queue, config_response_queue
13
11
  from .. schema import config_push_queue
@@ -0,0 +1,246 @@
1
+ """
2
+ Shared librarian client for services that need to communicate
3
+ with the librarian via pub/sub.
4
+
5
+ Provides request-response and streaming operations over the message
6
+ broker, with proper support for large documents via stream-document.
7
+
8
+ Usage:
9
+ self.librarian = LibrarianClient(
10
+ id=id, backend=self.pubsub, taskgroup=self.taskgroup, **params
11
+ )
12
+ await self.librarian.start()
13
+ content = await self.librarian.fetch_document_content(doc_id, user)
14
+ """
15
+
16
+ import asyncio
17
+ import base64
18
+ import logging
19
+ import uuid
20
+
21
+ from .consumer import Consumer
22
+ from .producer import Producer
23
+ from .metrics import ConsumerMetrics, ProducerMetrics
24
+
25
+ from ..schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
26
+ from ..schema import librarian_request_queue, librarian_response_queue
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class LibrarianClient:
32
+ """Client for librarian request-response over the message broker."""
33
+
34
+ def __init__(self, id, backend, taskgroup, **params):
35
+
36
+ librarian_request_q = params.get(
37
+ "librarian_request_queue", librarian_request_queue,
38
+ )
39
+ librarian_response_q = params.get(
40
+ "librarian_response_queue", librarian_response_queue,
41
+ )
42
+
43
+ librarian_request_metrics = ProducerMetrics(
44
+ processor=id, flow=None, name="librarian-request",
45
+ )
46
+
47
+ self._producer = Producer(
48
+ backend=backend,
49
+ topic=librarian_request_q,
50
+ schema=LibrarianRequest,
51
+ metrics=librarian_request_metrics,
52
+ )
53
+
54
+ librarian_response_metrics = ConsumerMetrics(
55
+ processor=id, flow=None, name="librarian-response",
56
+ )
57
+
58
+ self._consumer = Consumer(
59
+ taskgroup=taskgroup,
60
+ backend=backend,
61
+ flow=None,
62
+ topic=librarian_response_q,
63
+ subscriber=f"{id}-librarian",
64
+ schema=LibrarianResponse,
65
+ handler=self._on_response,
66
+ metrics=librarian_response_metrics,
67
+ consumer_type='exclusive',
68
+ )
69
+
70
+ # Single-response requests: request_id -> asyncio.Future
71
+ self._pending = {}
72
+ # Streaming requests: request_id -> asyncio.Queue
73
+ self._streams = {}
74
+
75
+ async def start(self):
76
+ """Start the librarian producer and consumer."""
77
+ await self._producer.start()
78
+ await self._consumer.start()
79
+
80
+ async def _on_response(self, msg, consumer, flow):
81
+ """Route librarian responses to the right waiter."""
82
+ response = msg.value()
83
+ request_id = msg.properties().get("id")
84
+
85
+ if not request_id:
86
+ return
87
+
88
+ if request_id in self._pending:
89
+ future = self._pending.pop(request_id)
90
+ future.set_result(response)
91
+ elif request_id in self._streams:
92
+ await self._streams[request_id].put(response)
93
+
94
+ async def request(self, request, timeout=120):
95
+ """Send a request to the librarian and wait for a single response."""
96
+ request_id = str(uuid.uuid4())
97
+
98
+ future = asyncio.get_event_loop().create_future()
99
+ self._pending[request_id] = future
100
+
101
+ try:
102
+ await self._producer.send(
103
+ request, properties={"id": request_id},
104
+ )
105
+ response = await asyncio.wait_for(future, timeout=timeout)
106
+
107
+ if response.error:
108
+ raise RuntimeError(
109
+ f"Librarian error: {response.error.type}: "
110
+ f"{response.error.message}"
111
+ )
112
+
113
+ return response
114
+
115
+ except asyncio.TimeoutError:
116
+ self._pending.pop(request_id, None)
117
+ raise RuntimeError("Timeout waiting for librarian response")
118
+
119
+ async def stream(self, request, timeout=120):
120
+ """Send a request and collect streamed response chunks."""
121
+ request_id = str(uuid.uuid4())
122
+
123
+ q = asyncio.Queue()
124
+ self._streams[request_id] = q
125
+
126
+ try:
127
+ await self._producer.send(
128
+ request, properties={"id": request_id},
129
+ )
130
+
131
+ chunks = []
132
+ while True:
133
+ response = await asyncio.wait_for(q.get(), timeout=timeout)
134
+
135
+ if response.error:
136
+ raise RuntimeError(
137
+ f"Librarian error: {response.error.type}: "
138
+ f"{response.error.message}"
139
+ )
140
+
141
+ chunks.append(response)
142
+
143
+ if response.is_final:
144
+ break
145
+
146
+ return chunks
147
+
148
+ except asyncio.TimeoutError:
149
+ self._streams.pop(request_id, None)
150
+ raise RuntimeError("Timeout waiting for librarian stream")
151
+ finally:
152
+ self._streams.pop(request_id, None)
153
+
154
+ async def fetch_document_content(self, document_id, user, timeout=120):
155
+ """Fetch document content using streaming.
156
+
157
+ Returns base64-encoded content. Caller is responsible for decoding.
158
+ """
159
+ req = LibrarianRequest(
160
+ operation="stream-document",
161
+ document_id=document_id,
162
+ user=user,
163
+ )
164
+ chunks = await self.stream(req, timeout=timeout)
165
+
166
+ # Decode each chunk's base64 to raw bytes, concatenate,
167
+ # re-encode for the caller.
168
+ raw = b""
169
+ for chunk in chunks:
170
+ if chunk.content:
171
+ if isinstance(chunk.content, bytes):
172
+ raw += base64.b64decode(chunk.content)
173
+ else:
174
+ raw += base64.b64decode(
175
+ chunk.content.encode("utf-8")
176
+ )
177
+
178
+ return base64.b64encode(raw)
179
+
180
+ async def fetch_document_text(self, document_id, user, timeout=120):
181
+ """Fetch document content and decode as UTF-8 text."""
182
+ content = await self.fetch_document_content(
183
+ document_id, user, timeout=timeout,
184
+ )
185
+ return base64.b64decode(content).decode("utf-8")
186
+
187
+ async def fetch_document_metadata(self, document_id, user, timeout=120):
188
+ """Fetch document metadata from the librarian."""
189
+ req = LibrarianRequest(
190
+ operation="get-document-metadata",
191
+ document_id=document_id,
192
+ user=user,
193
+ )
194
+ response = await self.request(req, timeout=timeout)
195
+ return response.document_metadata
196
+
197
+ async def save_child_document(self, doc_id, parent_id, user, content,
198
+ document_type="chunk", title=None,
199
+ kind="text/plain", timeout=120):
200
+ """Save a child document to the librarian."""
201
+ if isinstance(content, str):
202
+ content = content.encode("utf-8")
203
+
204
+ doc_metadata = DocumentMetadata(
205
+ id=doc_id,
206
+ user=user,
207
+ kind=kind,
208
+ title=title or doc_id,
209
+ parent_id=parent_id,
210
+ document_type=document_type,
211
+ )
212
+
213
+ req = LibrarianRequest(
214
+ operation="add-child-document",
215
+ document_metadata=doc_metadata,
216
+ content=base64.b64encode(content).decode("utf-8"),
217
+ )
218
+
219
+ await self.request(req, timeout=timeout)
220
+ return doc_id
221
+
222
+ async def save_document(self, doc_id, user, content, title=None,
223
+ document_type="answer", kind="text/plain",
224
+ timeout=120):
225
+ """Save a document to the librarian."""
226
+ if isinstance(content, str):
227
+ content = content.encode("utf-8")
228
+
229
+ doc_metadata = DocumentMetadata(
230
+ id=doc_id,
231
+ user=user,
232
+ kind=kind,
233
+ title=title or doc_id,
234
+ document_type=document_type,
235
+ )
236
+
237
+ req = LibrarianRequest(
238
+ operation="add-document",
239
+ document_id=doc_id,
240
+ document_metadata=doc_metadata,
241
+ content=base64.b64encode(content).decode("utf-8"),
242
+ user=user,
243
+ )
244
+
245
+ await self.request(req, timeout=timeout)
246
+ return doc_id