tensorwatch-api 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: tensorwatch-api
3
+ Version: 0.1
4
+ Requires-Dist: pykafka
5
+ Requires-Dist: tensorwatchext
6
+ Requires-Dist: ipywidgets
7
+ Requires-Dist: ipympl
8
+ Dynamic: requires-dist
@@ -0,0 +1,10 @@
1
+ twapi/Example_Senter.py,sha256=jPKi7f9teidzXKq-yXMW0sa2yzYGCob3WIHVi3J1uNA,1532
2
+ twapi/__init__.py,sha256=wYkftb8fUjtf62Qs3yXegUNkMn4bSTaSWLyRCvv4yt0,34
3
+ twapi/kafka_connector.py,sha256=RUcK9MYnfaMDO47QQC9-_SyqahRZmmoBtBwCSetAVFs,10829
4
+ twapi/pykafka_connector.py,sha256=7e5fHEZyqJo1a43mHTpqmscbQJZb7CjZhpN8kyRw-ok,12977
5
+ twapi/twapi.py,sha256=NkfcTz6u-YuJyCc9NsjwYuH6A2Pzkhn7lQVxZf0aXJQ,9158
6
+ tensorwatch_api-0.1.dist-info/METADATA,sha256=vdsLmWDb-RRbGvXv7fdc1ajqJoUQwFTydttNhlMqdg4,189
7
+ tensorwatch_api-0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ tensorwatch_api-0.1.dist-info/entry_points.txt,sha256=xIxCPp_fRVck1fVJE1kqhxAEVEycrxOk8TM48EXeHWs,59
9
+ tensorwatch_api-0.1.dist-info/top_level.txt,sha256=E48dj13nSJGzHKjYnAoCGtiUjM9AU2LZk3vvmdWtNhM,6
10
+ tensorwatch_api-0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ twapi-senter = twapi.Example_Senter:main
@@ -0,0 +1 @@
1
+ twapi
@@ -0,0 +1,51 @@
1
+ from pykafka import KafkaClient
2
+ import json
3
+ import time
4
+ import random
5
+ import sys
6
+
7
+ def main():
8
+ """
9
+ Benchmark producer to test throughput and latency.
10
+ """
11
+ try:
12
+ client = KafkaClient(hosts="127.0.0.1:9093")
13
+ topic = client.topics['gemini2']
14
+ except Exception as e:
15
+ print(f"Failed to connect to Kafka: {e}")
16
+ sys.exit(1)
17
+
18
+ parse_type = "json"
19
+ num_messages = 200000 # Number of messages to send
20
+
21
+ with topic.get_sync_producer() as producer:
22
+ print("Starting benchmark...")
23
+ print(f"Sending {num_messages} messages to topic '{topic.name.decode()}'...")
24
+
25
+ start_time = time.time()
26
+
27
+ for i in range(num_messages):
28
+ message = {
29
+ 'seq': i,
30
+ 'send_time': time.time(),
31
+ 'data': random.randint(0, 1000)
32
+ }
33
+
34
+ if parse_type == "json":
35
+ data = json.dumps(message)
36
+ producer.produce(data.encode('utf-8'))
37
+
38
+ if (i + 1) % 1000 == 0:
39
+ print(f"Sent {i + 1}/{num_messages} messages...")
40
+
41
+ end_time = time.time()
42
+ duration = end_time - start_time
43
+ throughput = num_messages / duration if duration > 0 else float('inf')
44
+
45
+ print("\n--- BENCHMARK SUMMARY ---")
46
+ print(f"Sent {num_messages} messages in {duration:.2f} seconds.")
47
+ print(f"Producer throughput: {throughput:.2f} messages/sec.")
48
+ print("------------------------")
49
+
50
+ if __name__ == "__main__":
51
+ main()
twapi/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .tensorwatch-api import twapi
@@ -0,0 +1,255 @@
1
+ import time
2
+ import threading
3
+ from queue import Queue
4
+ import random
5
+ import json
6
+ import pickle
7
+ from typing import Dict
8
+ from confluent_kafka import Consumer
9
+ from tensorwatch import Watcher
10
+ from probables import CountMinSketch
11
+ import logging
12
+
13
+ # Optional Parsers
14
+ try:
15
+ import xmltodict
16
+ except ImportError:
17
+ xmltodict = None
18
+
19
+ try:
20
+ import avro.schema
21
+ from avro.io import DatumReader, BinaryDecoder
22
+ import io
23
+ except ImportError:
24
+ avro = None
25
+
26
+ try:
27
+ from protobuf_to_dict import protobuf_to_dict
28
+ from google.protobuf import message
29
+ except ImportError:
30
+ protobuf_to_dict = None
31
+
32
+ class KafkaConnector(threading.Thread):
33
+ """
34
+ A Kafka consumer that runs in a separate thread to consume messages from a Kafka topic.
35
+ It supports various message formats and integrates with TensorWatch for real-time data visualization.
36
+ """
37
+ def __init__(self, hosts="localhost:9092", topic=None, parsetype=None, avro_schema=None, queue_length=50000,
38
+ cluster_size=1, consumer_config=None, poll=1.0, auto_offset="earliest", group_id="mygroup",
39
+ decode="utf-8", schema_path=None, protobuf_message=None, random_sampling=None, countmin_width=None,
40
+ countmin_depth=None, twapi_instance=None):
41
+ """
42
+ Initializes the KafkaConnector.
43
+
44
+ Args:
45
+ hosts (str): Comma-separated list of Kafka brokers.
46
+ topic (str): The Kafka topic to consume from.
47
+ parsetype (str): The format of the messages (e.g., "json", "pickle", "xml", "avro", "protobuf").
48
+ avro_schema (str): The Avro schema for message deserialization.
49
+ queue_length (int): The maximum number of messages to store in the internal queue.
50
+ cluster_size (int): The number of consumer threads to run.
51
+ consumer_config (dict): A dictionary of Kafka consumer configuration settings.
52
+ poll (float): The timeout for polling for new messages from Kafka.
53
+ auto_offset (str): The offset reset policy.
54
+ group_id (str): The consumer group ID.
55
+ decode (str): The encoding to use for decoding messages.
56
+ schema_path (str): The path to the Avro schema file.
57
+ protobuf_message (str): The name of the Protobuf message class.
58
+ random_sampling (int): The percentage of messages to sample (0-100).
59
+ countmin_width (int): The width of the Count-Min Sketch.
60
+ countmin_depth (int): The depth of the Count-Min Sketch.
61
+ twapi_instance: An instance of the TensorWatch API for updating metrics.
62
+ """
63
+ super().__init__()
64
+ self.hosts = hosts or "localhost:9092"
65
+ self.topic = topic
66
+ self.cluster_size = cluster_size
67
+ self.decode = decode
68
+ self.parsetype = parsetype
69
+ self.protobuf_message = protobuf_message
70
+ self.queue_length = queue_length
71
+ self.data = Queue(maxsize=queue_length)
72
+ self.cms = {} # Count-Min Sketch table
73
+ self.countmin_width = countmin_width
74
+ self.countmin_depth = countmin_depth
75
+ self.random_sampling = random_sampling
76
+ self.poll = poll
77
+ self.consumer_config = consumer_config or {
78
+ "bootstrap.servers": self.hosts,
79
+ "group.id": group_id,
80
+ "auto.offset.reset": auto_offset,
81
+ }
82
+ self._quit = threading.Event()
83
+ self.size = 0
84
+ self.watcher = Watcher()
85
+ self.schema = None
86
+ self.reader = None
87
+
88
+ self.twapi_instance = twapi_instance
89
+ self.latencies = []
90
+ self.received_count = 0
91
+ self.last_report_time = time.time()
92
+ self.first_message_sent = False
93
+
94
+ # Load Avro Schema if needed
95
+ if parsetype == "avro" and avro:
96
+ try:
97
+ self.schema = avro.schema.parse(avro_schema)
98
+ self.reader = DatumReader(self.schema)
99
+ except Exception as e:
100
+ logging.error(f"Avro Schema Error: {e}, Avro may not work")
101
+ print(f"Avro Schema Error: {e}, Avro may not work")
102
+ return
103
+
104
+ # Load Protobuf if needed
105
+ if parsetype == "protobuf" and protobuf_to_dict:
106
+ try:
107
+ import importlib
108
+ module = importlib.import_module(protobuf_message)
109
+ self.protobuf_class = getattr(module, protobuf_message)
110
+
111
+ except Exception as e:
112
+ logging.error(f"Protobuf Import Error: {e}")
113
+ print(f"Protobuf Import Error: {e}")
114
+ self.protobuf_class = None
115
+
116
+ self.start()
117
+
118
+ def myparser(self, message):
119
+ """
120
+ Parses a message based on the specified format.
121
+
122
+ Args:
123
+ message: The message to parse.
124
+
125
+ Returns:
126
+ The parsed message, or None if parsing fails.
127
+ """
128
+ try:
129
+ if self.parsetype is None or self.parsetype.lower() == "json":
130
+ return json.loads(message)
131
+ elif self.parsetype.lower() == "pickle":
132
+ return pickle.loads(message)
133
+ elif self.parsetype.lower() == "xml" and xmltodict:
134
+ return xmltodict.parse(message)["root"]
135
+ elif self.parsetype.lower() == "protobuf" and protobuf_to_dict:
136
+ if self.protobuf_class:
137
+ dynamic_message = self.protobuf_class()
138
+ dynamic_message.ParseFromString(message)
139
+ return protobuf_to_dict(dynamic_message)
140
+ elif self.parsetype.lower() == "avro" and avro:
141
+ decoder = BinaryDecoder(io.BytesIO(message))
142
+ return self.reader.read(decoder)
143
+ except Exception as e:
144
+ logging.error(f"Parsing Error ({self.parsetype}): {e}")
145
+ print(f"Parsing Error ({self.parsetype}): {e}")
146
+ return None
147
+
148
+ def process_message(self, msg):
149
+ """
150
+ Processes a single message from Kafka. This includes parsing, calculating latency,
151
+ and adding the message to the data queue.
152
+ """
153
+ receive_time = time.time()
154
+ try:
155
+ # Apply random sampling if configured
156
+ if self.random_sampling and self.random_sampling > random.randint(0, 100):
157
+ return
158
+
159
+ message = msg.value().decode(self.decode)
160
+ parsed_message = self.myparser(message)
161
+
162
+ # Calculate and record latency if send_time is in the message
163
+ if parsed_message and isinstance(parsed_message, dict) and 'send_time' in parsed_message:
164
+ self.received_count += 1
165
+ send_time = parsed_message['send_time']
166
+ latency = receive_time - send_time
167
+ self.latencies.append(latency)
168
+ parsed_message['latency'] = latency
169
+ parsed_message['receive_time'] = receive_time
170
+
171
+ # Add the parsed message to the queue if it's not full
172
+ if parsed_message and not self.data.full():
173
+ self.data.put(parsed_message, block=False)
174
+ # Notify the twapi_instance on the first message
175
+ if not self.first_message_sent and self.twapi_instance:
176
+ logging.info("First message received, enabling apply button.")
177
+ self.twapi_instance.enable_apply_button()
178
+ self.first_message_sent = True
179
+ elif self.data.full():
180
+ logging.warning("Queue is full, dropping message.")
181
+
182
+ # Update Count-Min Sketch if configured
183
+ if isinstance(parsed_message, dict) and self.countmin_width and self.countmin_depth:
184
+ for key, value in parsed_message.items():
185
+ self.cms.setdefault(key, CountMinSketch(width=self.countmin_width, depth=self.countmin_depth))
186
+ self.cms[key].add(str(value))
187
+
188
+ self.size += 1
189
+ except Exception as e:
190
+ logging.error(f"Message Processing Error: {e}, Message: {message}")
191
+ print(f"Message Processing Error: {e}, Message: {message}")
192
+
193
+ def consumer_loop(self):
194
+ """
195
+ The main loop for the Kafka consumer. It polls for messages, processes them,
196
+ and handles errors.
197
+ """
198
+ logging.info(f"Starting consumer loop for topic '{self.topic}'")
199
+ consumer = Consumer(self.consumer_config)
200
+ consumer.subscribe([self.topic])
201
+
202
+ while not self._quit.is_set():
203
+ msg = consumer.poll(self.poll)
204
+ if msg and not msg.error():
205
+ self.process_message(msg)
206
+ elif msg and msg.error():
207
+ logging.error(f"Kafka Error: {msg.error()}")
208
+ print(f"Kafka Error: {msg.error()}")
209
+
210
+ consumer.close()
211
+ logging.info("Consumer loop stopped")
212
+
213
+ def run(self):
214
+ """
215
+ Starts the consumer threads and the main watcher loop.
216
+ """
217
+ logging.info(f"Starting {self.cluster_size} consumer threads")
218
+ threads = [threading.Thread(target=self.consumer_loop, daemon=True) for _ in range(self.cluster_size)]
219
+ for thread in threads:
220
+ thread.start()
221
+
222
+ while not self._quit.is_set():
223
+ # Observe the data queue with TensorWatch
224
+ if not self.data.empty():
225
+ self.watcher.observe(data=list(self.data.queue), size=self.size, cms=self.cms)
226
+
227
+ # --- BENCHMARK REPORTING ---
228
+ current_time = time.time()
229
+ if current_time - self.last_report_time > 5.0: # Report every 5 seconds
230
+ if self.latencies:
231
+ avg_latency = sum(self.latencies) / len(self.latencies)
232
+ max_latency = max(self.latencies)
233
+ min_latency = min(self.latencies)
234
+
235
+ time_since_last_report = current_time - self.last_report_time
236
+ throughput = self.received_count / time_since_last_report if time_since_last_report > 0 else 0
237
+
238
+ stats_str = (f"Recv Throughput: {throughput:.2f} msgs/s | "
239
+ f"Send-Recv Latency (ms): "
240
+ f"Avg: {avg_latency*1000:.2f}, "
241
+ f"Min: {min_latency*1000:.2f}, "
242
+ f"Max: {max_latency*1000:.2f}")
243
+ logging.info(f"Benchmark stats: {stats_str}")
244
+ print(stats_str)
245
+
246
+ # Update the TensorWatch API with the latest metrics
247
+ if self.twapi_instance:
248
+ self.twapi_instance.update_metrics(stats_str)
249
+
250
+ # Reset stats for the next interval
251
+ self.latencies = []
252
+ self.received_count = 0
253
+ self.last_report_time = current_time
254
+
255
+ time.sleep(0.4)
@@ -0,0 +1,293 @@
1
+ from pykafka import KafkaClient
2
+ from pykafka.common import OffsetType
3
+ from tensorwatch import Watcher
4
+ import threading
5
+ from queue import Queue
6
+ import json
7
+ import pickle
8
+ import time
9
+ import random
10
+ import logging
11
+ import io
12
+
13
+ # Optional Parsers
14
+ try:
15
+ import xmltodict
16
+ except ImportError:
17
+ xmltodict = None
18
+
19
+ try:
20
+ import avro.schema
21
+ from avro.io import DatumReader, BinaryDecoder
22
+ except ImportError:
23
+ avro = None
24
+
25
+ try:
26
+ from protobuf_to_dict import protobuf_to_dict
27
+ except ImportError:
28
+ protobuf_to_dict = None
29
+
30
+ from probables import CountMinSketch
31
+
32
+ class pykafka_connector(threading.Thread):
33
+ """
34
+ A Kafka consumer that uses the pykafka library to consume messages from a Kafka topic.
35
+ It runs in a separate thread and supports various message formats.
36
+ """
37
+ def __init__(self, hosts: str = None, topic: str = None, parsetype: str = None, queue_length: int = None, cluster_size: int = 1,
38
+ consumer_group: bytes = b'default', auto_offset_reset: OffsetType = OffsetType.EARLIEST,
39
+ fetch_message_max_bytes: int = 1024 * 1024, num_consumer_fetchers: int = 1,
40
+ auto_commit_enable: bool = False, auto_commit_interval_ms: int = 1000,
41
+ queued_max_messages: int = 2000, fetch_min_bytes: int = 1,
42
+ consumer_timeout_ms: int = -1, decode: str = "utf-8",
43
+ scema_path: str = None, random_sampling: int = None, countmin_width: int = None,
44
+ countmin_depth: int = None, twapi_instance=None, parser_extra=None, probuf_message=None, zookeeper_hosts:str='127.0.0.1:2181'):
45
+ """
46
+ Initializes the pykafka_connector.
47
+
48
+ Args:
49
+ hosts (str): Comma-separated list of Kafka brokers.
50
+ topic (str): The Kafka topic to consume from.
51
+ parsetype (str): The format of the messages (e.g., "json", "pickle", "xml", "avro", "protobuf").
52
+ queue_length (int): The maximum number of messages to store in the internal queue.
53
+ cluster_size (int): The number of consumer threads to run.
54
+ consumer_group (bytes): The consumer group ID.
55
+ auto_offset_reset (OffsetType): The offset reset policy.
56
+ fetch_message_max_bytes (int): The maximum size of a message to fetch.
57
+ num_consumer_fetchers (int): The number of fetcher threads.
58
+ auto_commit_enable (bool): Whether to enable auto-commit.
59
+ auto_commit_interval_ms (int): The auto-commit interval in milliseconds.
60
+ queued_max_messages (int): The maximum number of messages to queue.
61
+ fetch_min_bytes (int): The minimum number of bytes to fetch.
62
+ consumer_timeout_ms (int): The consumer timeout in milliseconds.
63
+ decode (str): The encoding to use for decoding messages.
64
+ scema_path (str): The path to the Avro or Protobuf schema.
65
+ random_sampling (int): The percentage of messages to sample (0-100).
66
+ countmin_width (int): The width of the Count-Min Sketch.
67
+ countmin_depth (int): The depth of the Count-Min Sketch.
68
+ twapi_instance: An instance of the TensorWatch API for updating metrics.
69
+ parser_extra (str): Extra information for the parser (e.g., Avro schema, Protobuf module).
70
+ probuf_message (str): The name of the Protobuf message class.
71
+ zookeeper_hosts (str): Comma-separated list of Zookeeper hosts.
72
+ """
73
+ super().__init__()
74
+ self.hosts = hosts or "127.0.0.1:9092"
75
+ self.topic = topic
76
+ self.cluster_size = cluster_size
77
+ self.decode = decode
78
+ self.parsetype = parsetype
79
+ self.scema_path = scema_path
80
+ self.random_sampling = random_sampling
81
+ self.parser_extra = parser_extra
82
+ self.probuf_message = probuf_message
83
+ self.queue_length = queue_length
84
+ self.data = Queue(maxsize=queue_length or 50000)
85
+ self._quit = threading.Event()
86
+ self.size = 0
87
+ self.watcher = Watcher()
88
+ self.cms = {}
89
+ self.countmin_depth = countmin_depth
90
+ self.countmin_width = countmin_width
91
+
92
+ # pykafka specific settings
93
+ self.consumer_group = consumer_group
94
+ self.auto_offset_reset = auto_offset_reset
95
+ self.fetch_message_max_bytes = fetch_message_max_bytes
96
+ self.num_consumer_fetchers = num_consumer_fetchers
97
+ self.auto_commit_enable = auto_commit_enable
98
+ self.auto_commit_interval_ms = auto_commit_interval_ms
99
+ self.queued_max_messages = queued_max_messages
100
+ self.fetch_min_bytes = fetch_min_bytes
101
+ self.consumer_timeout_ms = consumer_timeout_ms
102
+ self.zookeeper_hosts = zookeeper_hosts
103
+
104
+ # twapi integration
105
+ self.twapi_instance = twapi_instance
106
+ self.latencies = []
107
+ self.received_count = 0
108
+ self.last_report_time = time.time()
109
+ self.first_message_sent = False
110
+
111
+ # Parsers initialization
112
+ self.reader = None
113
+ self.mymodule = None
114
+ if self.parsetype:
115
+ if self.parsetype.lower() == 'avro' and avro:
116
+ try:
117
+ schema = avro.schema.parse(parser_extra)
118
+ self.reader = DatumReader(schema)
119
+ except Exception as e:
120
+ print(f"Avro schema error or avro not installed: {e}")
121
+ elif self.parsetype.lower() == 'protobuf' and protobuf_to_dict:
122
+ try:
123
+ import sys
124
+ import importlib
125
+ if scema_path:
126
+ sys.path.append(scema_path)
127
+ mymodule = importlib.import_module(parser_extra)
128
+ method_to_call = getattr(mymodule, probuf_message)
129
+ self.mymodule = method_to_call
130
+ except Exception as e:
131
+ print(f"Error importing protobuf: {e}")
132
+
133
+ self.start()
134
+
135
+ def myparser(self, message):
136
+ """
137
+ Parses a message based on the specified format.
138
+
139
+ Args:
140
+ message: The message to parse.
141
+
142
+ Returns:
143
+ The parsed message, or None if parsing fails.
144
+ """
145
+ try:
146
+ if self.parsetype is None or self.parsetype.lower() == 'json':
147
+ return json.loads(message)
148
+ elif self.parsetype.lower() == 'pickle':
149
+ return pickle.loads(message)
150
+ elif self.parsetype.lower() == 'xml' and xmltodict:
151
+ return xmltodict.parse(message).get("root")
152
+ elif self.parsetype.lower() == 'protobuf' and self.mymodule:
153
+ dynamic_message = self.mymodule()
154
+ dynamic_message.ParseFromString(message)
155
+ return protobuf_to_dict(dynamic_message)
156
+ elif self.parsetype.lower() == 'avro' and self.reader:
157
+ message_bytes = io.BytesIO(message)
158
+ decoder = BinaryDecoder(message_bytes)
159
+ return self.reader.read(decoder)
160
+ except Exception as e:
161
+ logging.error(f"Parsing Error ({self.parsetype}): {e}")
162
+ return None
163
+
164
+ def process_message(self, message_bytes):
165
+ """
166
+ Processes a single message from Kafka. This includes parsing, calculating latency,
167
+ and adding the message to the data queue.
168
+ """
169
+ receive_time = time.time()
170
+ try:
171
+ # Apply random sampling if configured
172
+ if self.random_sampling and self.random_sampling > random.randint(0, 100):
173
+ return
174
+
175
+ parsed_message = self.myparser(message_bytes)
176
+ if parsed_message is None:
177
+ return
178
+
179
+ # Calculate and record latency if send_time is in the message
180
+ if isinstance(parsed_message, dict) and 'send_time' in parsed_message:
181
+ self.received_count += 1
182
+ send_time = parsed_message['send_time']
183
+ latency = receive_time - send_time
184
+ self.latencies.append(latency)
185
+ parsed_message['latency'] = latency
186
+ parsed_message['receive_time'] = receive_time
187
+
188
+ # Add the parsed message to the queue if it's not full
189
+ if not self.data.full():
190
+ self.data.put(parsed_message, block=False)
191
+ # Notify the twapi_instance on the first message
192
+ if not self.first_message_sent and self.twapi_instance:
193
+ logging.info("First message received, enabling apply button.")
194
+ self.twapi_instance.enable_apply_button()
195
+ self.first_message_sent = True
196
+ else:
197
+ logging.warning("Queue is full, dropping message.")
198
+
199
+ # Update Count-Min Sketch if configured
200
+ if isinstance(parsed_message, dict) and self.countmin_width and self.countmin_depth:
201
+ for key, value in parsed_message.items():
202
+ self.cms.setdefault(key, CountMinSketch(width=self.countmin_width, depth=self.countmin_depth))
203
+ self.cms[key].add(str(value))
204
+
205
+ self.size += 1
206
+ except Exception as e:
207
+ logging.error(f"Message Processing Error: {e}")
208
+
209
+ def consumer_loop(self):
210
+ """
211
+ The main loop for the Kafka consumer. It creates a Kafka client and consumes messages
212
+ from the specified topic.
213
+ """
214
+ logging.info(f"Starting pykafka consumer loop for topic '{self.topic}'")
215
+ client = KafkaClient(hosts=self.hosts)
216
+ topic = client.topics[self.topic]
217
+
218
+ # Use a balanced consumer if cluster_size is greater than 1
219
+ if self.cluster_size > 1:
220
+ consumer = topic.get_balanced_consumer(
221
+ consumer_group=self.consumer_group,
222
+ auto_commit_enable=self.auto_commit_enable,
223
+ auto_offset_reset=self.auto_offset_reset,
224
+ num_consumer_fetchers=self.num_consumer_fetchers,
225
+ auto_commit_interval_ms=self.auto_commit_interval_ms,
226
+ queued_max_messages=self.queued_max_messages,
227
+ fetch_min_bytes=self.fetch_min_bytes,
228
+ zookeeper_connect=self.zookeeper_hosts
229
+ )
230
+ else:
231
+ consumer = topic.get_simple_consumer(
232
+ auto_offset_reset=self.auto_offset_reset,
233
+ consumer_timeout_ms=self.consumer_timeout_ms,
234
+ fetch_message_max_bytes=self.fetch_message_max_bytes,
235
+ auto_commit_enable=self.auto_commit_enable,
236
+ auto_commit_interval_ms=self.auto_commit_interval_ms,
237
+ queued_max_messages=self.queued_max_messages,
238
+ fetch_min_bytes=self.fetch_min_bytes
239
+ )
240
+
241
+ for message in consumer:
242
+ if self._quit.is_set():
243
+ break
244
+ if message is not None:
245
+ self.process_message(message.value)
246
+
247
+ consumer.stop()
248
+ logging.info("Consumer loop stopped")
249
+
250
+ def run(self):
251
+ """
252
+ Starts the consumer threads and the main watcher loop.
253
+ """
254
+ logging.info(f"Starting {self.cluster_size} pykafka consumer threads")
255
+ threads = [threading.Thread(target=self.consumer_loop, daemon=True) for _ in range(self.cluster_size)]
256
+ for thread in threads:
257
+ thread.start()
258
+
259
+ while not self._quit.is_set():
260
+ # Observe the data queue with TensorWatch
261
+ if not self.data.empty():
262
+ self.watcher.observe(data=list(self.data.queue), size=self.size, cms=self.cms)
263
+
264
+ # --- BENCHMARK REPORTING ---
265
+ current_time = time.time()
266
+ if current_time - self.last_report_time > 5.0: # Report every 5 seconds
267
+ if self.latencies:
268
+ avg_latency = sum(self.latencies) / len(self.latencies)
269
+ max_latency = max(self.latencies)
270
+ min_latency = min(self.latencies)
271
+ time_since_last_report = current_time - self.last_report_time
272
+ throughput = self.received_count / time_since_last_report if time_since_last_report > 0 else 0
273
+
274
+ stats_str = (f"Recv Throughput: {throughput:.2f} msgs/s | "
275
+ f"Send-Recv Latency (ms): "
276
+ f"Avg: {avg_latency*1000:.2f}, "
277
+ f"Min: {min_latency*1000:.2f}, "
278
+ f"Max: {max_latency*1000:.2f}")
279
+
280
+ # Update the TensorWatch API with the latest metrics
281
+ if self.twapi_instance:
282
+ self.twapi_instance.update_metrics(stats_str)
283
+
284
+ # Reset stats for the next interval
285
+ self.latencies = []
286
+ self.received_count = 0
287
+ self.last_report_time = current_time
288
+
289
+ time.sleep(0.4)
290
+
291
+ def quit(self):
292
+ """Stops the consumer thread."""
293
+ self._quit.set()
twapi/twapi.py ADDED
@@ -0,0 +1,197 @@
1
+ import tensorwatch as tw
2
+ from . import kafka_connector as kc
3
+ from . import pykafka_connector as pyc
4
+ from IPython.display import display
5
+ from ipywidgets import widgets
6
+ import asyncio
7
+ import time
8
+ import logging
9
+ import matplotlib.pyplot as plt
10
+
11
+ class twapi:
12
+ """TensorWatch API Wrapper for Kafka Streaming and Visualization"""
13
+
14
+ def __init__(self):
15
+ """Initializes the twapi class, setting up the UI widgets and event handlers."""
16
+ self.default_value = 10
17
+ self.visualizer = None # Initialize visualizer as None
18
+ self.client = tw.WatcherClient()
19
+ self.out = widgets.Output(layout={})
20
+
21
+ # Initialize UI widgets
22
+ self.update_interval = 0.5 # Delay in seconds
23
+ self.my_slider = widgets.IntSlider(value=self.default_value, min=1, max=100, step=1, description="Window Size:")
24
+ self.my_slider2 = widgets.IntSlider(value=self.default_value, min=1, max=100, step=1, description="Window Width:")
25
+ self.datebutton = widgets.Checkbox(value=False, description="Date")
26
+ self.offsetbutton = widgets.Checkbox(value=False, description="Use Offset")
27
+ self.dimhistorybutton = widgets.Checkbox(value=True, description="Dim History")
28
+ self.colorpicker = widgets.ColorPicker(value="blue", description="Pick a Color")
29
+
30
+ self.button_reset = widgets.Button(description="Reset", tooltip="Reset stream settings")
31
+ self.button_apply = widgets.Button(description="Please wait", tooltip="Apply changes to the visualization", disabled=True)
32
+
33
+ # Group widgets for a cleaner UI
34
+ left_box = widgets.VBox([self.my_slider, self.my_slider2, self.colorpicker])
35
+ right_box = widgets.VBox([self.offsetbutton, self.dimhistorybutton, self.datebutton])
36
+ self.options_box = widgets.HBox([left_box, right_box])
37
+ self.accordion = widgets.Accordion(children=[self.options_box])
38
+ self.accordion.set_title(0, 'Visualization Options')
39
+
40
+ # Event handlers
41
+ self._last_update = time.time()
42
+ self.button_reset.on_click(self.reset)
43
+ self.button_apply.on_click(self.apply_with_debounce)
44
+ self.metrics_label = widgets.Label(value="")
45
+
46
+ # Observe widget changes directly
47
+ self.my_slider.observe(self.apply_with_debounce, names='value')
48
+ self.my_slider2.observe(self.apply_with_debounce, names='value')
49
+ self.colorpicker.observe(self.apply_with_debounce, names='value')
50
+
51
+ def stream(self, expr):
52
+ """Creates a TensorWatch stream from an expression."""
53
+ self.expr = expr
54
+ try:
55
+ self.streamdata = self.client.create_stream(expr=expr)
56
+ logging.debug("Stream created successfully")
57
+ except Exception as e:
58
+ logging.error(f"Error creating stream: {e}")
59
+ print(f"Error creating stream: {e}")
60
+ return self
61
+
62
+ def apply_with_debounce(self, _=None):
63
+ """Debounced apply function to prevent too frequent updates."""
64
+ now = time.time()
65
+ if now - self._last_update > self.update_interval:
66
+ self.update_visualizer()
67
+ self._last_update = now
68
+ if self.button_apply.description == "Start":
69
+ self.button_apply.description = "Apply Changes"
70
+
71
+ def update_visualizer(self, _=None):
72
+ """Updates the TensorWatch visualizer with the latest widget values."""
73
+ if not hasattr(self, 'streamdata') or not self.streamdata:
74
+ self.out.clear_output(wait=True)
75
+ with self.out:
76
+ print("Stream data not available or empty yet. Please wait for data.")
77
+ return
78
+
79
+ try:
80
+ # Always clear output before drawing
81
+ self.out.clear_output(wait=True)
82
+
83
+ # Close previous visualizer if it exists to free resources
84
+ if self.visualizer:
85
+ self.visualizer.close()
86
+ plt.close('all') # Also close any lingering matplotlib figures
87
+
88
+ # Create a new visualizer with the current settings
89
+ self.visualizer = tw.Visualizer(
90
+ self.streamdata,
91
+ vis_type="line",
92
+ window_width=self.my_slider2.value,
93
+ window_size=self.my_slider.value,
94
+ Date=self.datebutton.value,
95
+ useOffset=self.offsetbutton.value,
96
+ dim_history=self.dimhistorybutton.value,
97
+ color=self.colorpicker.value,
98
+ )
99
+ with self.out:
100
+ self.visualizer.show()
101
+
102
+ except Exception as e:
103
+ self.out.clear_output(wait=True)
104
+ with self.out:
105
+ print(f"Error updating visualizer: {e}")
106
+
107
+ def enable_apply_button(self):
108
+ """Enables the apply button and changes its description to 'Start'."""
109
+ logging.debug("Enabling apply button.")
110
+ self.button_apply.disabled = False
111
+ self.button_apply.description = "Start"
112
+
113
+ def reset(self, _=None):
114
+ """Resets all widget values to their defaults and clears the visualization."""
115
+ self.my_slider.value = self.default_value
116
+ self.my_slider2.value = self.default_value
117
+ self.datebutton.value = False
118
+ self.offsetbutton.value = False
119
+ self.dimhistorybutton.value = True
120
+ self.colorpicker.value = "blue"
121
+
122
+ # Clear the output and close the visualizer
123
+ self.out.clear_output()
124
+ if self.visualizer:
125
+ self.visualizer.close()
126
+ plt.close('all')
127
+ self.visualizer = None
128
+
129
+ def draw(self):
130
+ """Displays the UI for controlling the visualization."""
131
+ ui = widgets.VBox([
132
+ widgets.HBox([self.button_reset, self.button_apply]),
133
+ self.accordion,
134
+ self.out
135
+ ])
136
+ display(ui)
137
+
138
+ def draw_with_metrics(self):
139
+ """Displays the UI for controlling the visualization with a metrics label."""
140
+ ui = widgets.VBox([
141
+ self.metrics_label,
142
+ widgets.HBox([self.button_reset, self.button_apply]),
143
+ self.accordion,
144
+ self.out
145
+ ])
146
+ display(ui)
147
+
148
+ def update_metrics(self, metrics):
149
+ """Updates the metrics label with the provided text."""
150
+ self.metrics_label.value = metrics
151
+
152
+ def connector(self, topic, host, parsetype="json", cluster_size=1, conn_type="kafka", queue_length=50000,
153
+ group_id="mygroup", avro_schema=None, schema_path=None, protobuf_message=None, parser_extra=None,
154
+ random_sampling=None, countmin_width=None, countmin_depth=None):
155
+ """
156
+ Creates and returns a Kafka or PyKafka connector.
157
+
158
+ Args:
159
+ topic (str): The Kafka topic to consume from.
160
+ host (str): The Kafka broker host.
161
+ parsetype (str): The message format (e.g., 'json', 'pickle', 'avro').
162
+ cluster_size (int): The number of consumer threads.
163
+ conn_type (str): The type of connector to use ('kafka' or 'pykafka').
164
+ queue_length (int): The maximum size of the message queue.
165
+ group_id (str): The Kafka consumer group ID.
166
+ avro_schema (str): The Avro schema for 'kafka' connector.
167
+ schema_path (str): The path to the schema file.
168
+ protobuf_message (str): The name of the Protobuf message class.
169
+ parser_extra (str): Extra data for the parser (e.g., Avro schema for 'pykafka').
170
+ random_sampling (int): The percentage of messages to sample.
171
+ countmin_width (int): The width of the Count-Min Sketch.
172
+ countmin_depth (int): The depth of the Count-Min Sketch.
173
+
174
+ Returns:
175
+ A KafkaConnector or pykafka_connector instance.
176
+ """
177
+ if conn_type == "kafka":
178
+ return kc.KafkaConnector(
179
+ topic=topic, hosts=host, parsetype=parsetype, cluster_size=cluster_size,
180
+ twapi_instance=self, queue_length=queue_length, group_id=group_id,
181
+ avro_schema=avro_schema, schema_path=schema_path, protobuf_message=protobuf_message,
182
+ random_sampling=random_sampling, countmin_width=countmin_width,
183
+ countmin_depth=countmin_depth)
184
+ elif conn_type == "pykafka":
185
+ return pyc.pykafka_connector(
186
+ topic=topic, hosts=host, parsetype=parsetype, cluster_size=cluster_size,twapi_instance=self,
187
+ queue_length=queue_length, consumer_group=bytes(group_id, 'utf-8'),
188
+ parser_extra=parser_extra, scema_path=schema_path, probuf_message=protobuf_message,
189
+ random_sampling=random_sampling, countmin_width=countmin_width,
190
+ countmin_depth=countmin_depth)
191
+ else:
192
+ raise ValueError("Invalid connector type. Choose 'kafka' or 'pykafka'.")
193
+
194
+ async def some_async_function(self):
195
+ """Example of an async function that can be called."""
196
+ await asyncio.sleep(1)
197
+ print("Async function completed")