wiliot-certificate 1.3.0a1__py3-none-any.whl → 1.4.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. brg_certificate/__init__.py +0 -0
  2. brg_certificate/ag/energous_v0_defines.py +925 -0
  3. brg_certificate/ag/energous_v1_defines.py +931 -0
  4. brg_certificate/ag/energous_v2_defines.py +925 -0
  5. brg_certificate/ag/energous_v3_defines.py +925 -0
  6. brg_certificate/ag/energous_v4_defines.py +925 -0
  7. brg_certificate/ag/fanstel_lan_v0_defines.py +925 -0
  8. brg_certificate/ag/fanstel_lte_v0_defines.py +925 -0
  9. brg_certificate/ag/fanstel_wifi_v0_defines.py +925 -0
  10. brg_certificate/ag/minew_lte_v0_defines.py +925 -0
  11. brg_certificate/ag/wlt_cmd_if.html +102 -0
  12. brg_certificate/ag/wlt_types.html +6114 -0
  13. brg_certificate/ag/wlt_types_ag.py +7840 -0
  14. brg_certificate/ag/wlt_types_ag_jsons/brg2brg_ota.json +142 -0
  15. brg_certificate/ag/wlt_types_ag_jsons/brg2gw_hb.json +785 -0
  16. brg_certificate/ag/wlt_types_ag_jsons/brg2gw_hb_sleep.json +139 -0
  17. brg_certificate/ag/wlt_types_ag_jsons/calibration.json +394 -0
  18. brg_certificate/ag/wlt_types_ag_jsons/custom.json +515 -0
  19. brg_certificate/ag/wlt_types_ag_jsons/datapath.json +672 -0
  20. brg_certificate/ag/wlt_types_ag_jsons/energy2400.json +550 -0
  21. brg_certificate/ag/wlt_types_ag_jsons/energySub1g.json +595 -0
  22. brg_certificate/ag/wlt_types_ag_jsons/externalSensor.json +598 -0
  23. brg_certificate/ag/wlt_types_ag_jsons/interface.json +938 -0
  24. brg_certificate/ag/wlt_types_ag_jsons/powerManagement.json +1234 -0
  25. brg_certificate/ag/wlt_types_ag_jsons/side_info_sensor.json +105 -0
  26. brg_certificate/ag/wlt_types_ag_jsons/signal_indicator_data.json +77 -0
  27. brg_certificate/ag/wlt_types_ag_jsons/unified_echo_ext_pkt.json +61 -0
  28. brg_certificate/ag/wlt_types_ag_jsons/unified_echo_pkt.json +110 -0
  29. brg_certificate/brg_certificate.py +191 -0
  30. brg_certificate/brg_certificate_cli.py +47 -0
  31. brg_certificate/cert_common.py +828 -0
  32. brg_certificate/cert_config.py +395 -0
  33. brg_certificate/cert_data_sim.py +188 -0
  34. brg_certificate/cert_defines.py +337 -0
  35. brg_certificate/cert_gw_sim.py +285 -0
  36. brg_certificate/cert_mqtt.py +373 -0
  37. brg_certificate/cert_prints.py +181 -0
  38. brg_certificate/cert_protobuf.py +88 -0
  39. brg_certificate/cert_results.py +300 -0
  40. brg_certificate/cert_utils.py +358 -0
  41. brg_certificate/certificate_sanity_test_list.txt +36 -0
  42. brg_certificate/certificate_test_list.txt +43 -0
  43. brg_certificate/config/eclipse.json +10 -0
  44. brg_certificate/config/hivemq.json +10 -0
  45. brg_certificate/config/mosquitto.json +10 -0
  46. brg_certificate/config/mosquitto.md +95 -0
  47. brg_certificate/config/wiliot-dev.json +10 -0
  48. brg_certificate/restore_brg.py +59 -0
  49. brg_certificate/tests/calibration/interval_test/interval_test.json +13 -0
  50. brg_certificate/tests/calibration/interval_test/interval_test.py +28 -0
  51. brg_certificate/tests/calibration/output_power_test/output_power_test.json +13 -0
  52. brg_certificate/tests/calibration/output_power_test/output_power_test.py +28 -0
  53. brg_certificate/tests/calibration/pattern_test/pattern_test.json +13 -0
  54. brg_certificate/tests/calibration/pattern_test/pattern_test.py +70 -0
  55. brg_certificate/tests/datapath/adaptive_pacer_algo_test/adaptive_pacer_algo_test.json +13 -0
  56. brg_certificate/tests/datapath/adaptive_pacer_algo_test/adaptive_pacer_algo_test.py +76 -0
  57. brg_certificate/tests/datapath/num_of_tags_test/num_of_tags_test.json +13 -0
  58. brg_certificate/tests/datapath/num_of_tags_test/num_of_tags_test.py +83 -0
  59. brg_certificate/tests/datapath/output_power_test/output_power_test.json +13 -0
  60. brg_certificate/tests/datapath/output_power_test/output_power_test.py +27 -0
  61. brg_certificate/tests/datapath/pacer_interval_ble5_test/pacer_interval_ble5_test.json +13 -0
  62. brg_certificate/tests/datapath/pacer_interval_ble5_test/pacer_interval_ble5_test.py +43 -0
  63. brg_certificate/tests/datapath/pacer_interval_tags_count_test/pacer_interval_tags_count_test.json +13 -0
  64. brg_certificate/tests/datapath/pacer_interval_tags_count_test/pacer_interval_tags_count_test.py +63 -0
  65. brg_certificate/tests/datapath/pacer_interval_test/pacer_interval_test.json +13 -0
  66. brg_certificate/tests/datapath/pacer_interval_test/pacer_interval_test.py +50 -0
  67. brg_certificate/tests/datapath/pattern_test/pattern_test.json +13 -0
  68. brg_certificate/tests/datapath/pattern_test/pattern_test.py +28 -0
  69. brg_certificate/tests/datapath/pkt_filter_ble5_test/pkt_filter_ble5_test.json +13 -0
  70. brg_certificate/tests/datapath/pkt_filter_ble5_test/pkt_filter_ble5_test.py +51 -0
  71. brg_certificate/tests/datapath/pkt_filter_gen3_test/pkt_filter_gen3_test.json +13 -0
  72. brg_certificate/tests/datapath/pkt_filter_gen3_test/pkt_filter_gen3_test.py +54 -0
  73. brg_certificate/tests/datapath/pkt_filter_test/pkt_filter_test.json +13 -0
  74. brg_certificate/tests/datapath/pkt_filter_test/pkt_filter_test.py +55 -0
  75. brg_certificate/tests/datapath/rssi_threshold_test/rssi_threshold_test.json +13 -0
  76. brg_certificate/tests/datapath/rssi_threshold_test/rssi_threshold_test.py +73 -0
  77. brg_certificate/tests/datapath/rx_channel_test/rx_channel_test.json +13 -0
  78. brg_certificate/tests/datapath/rx_channel_test/rx_channel_test.py +41 -0
  79. brg_certificate/tests/datapath/rx_rate_gen2_test/rx_rate_gen2_test.json +21 -0
  80. brg_certificate/tests/datapath/rx_rate_gen2_test/rx_rate_gen2_test.py +184 -0
  81. brg_certificate/tests/datapath/rx_rate_gen3_test/rx_rate_gen3_test.json +21 -0
  82. brg_certificate/tests/datapath/rx_rate_gen3_test/rx_rate_gen3_test.py +210 -0
  83. brg_certificate/tests/datapath/stress_gen3_test/stress_gen3_test.json +30 -0
  84. brg_certificate/tests/datapath/stress_gen3_test/stress_gen3_test.py +203 -0
  85. brg_certificate/tests/datapath/stress_test/stress_test.json +30 -0
  86. brg_certificate/tests/datapath/stress_test/stress_test.py +210 -0
  87. brg_certificate/tests/datapath/tx_repetition_algo_test/tx_repetition_algo_test.json +13 -0
  88. brg_certificate/tests/datapath/tx_repetition_algo_test/tx_repetition_algo_test.py +113 -0
  89. brg_certificate/tests/datapath/tx_repetition_test/tx_repetition_test.json +13 -0
  90. brg_certificate/tests/datapath/tx_repetition_test/tx_repetition_test.py +79 -0
  91. brg_certificate/tests/edge_mgmt/actions_test/actions_test.json +13 -0
  92. brg_certificate/tests/edge_mgmt/actions_test/actions_test.py +432 -0
  93. brg_certificate/tests/edge_mgmt/brg2brg_ota_ble5_test/brg2brg_ota_ble5_test.json +13 -0
  94. brg_certificate/tests/edge_mgmt/brg2brg_ota_ble5_test/brg2brg_ota_ble5_test.py +94 -0
  95. brg_certificate/tests/edge_mgmt/brg2brg_ota_test/brg2brg_ota_test.json +13 -0
  96. brg_certificate/tests/edge_mgmt/brg2brg_ota_test/brg2brg_ota_test.py +87 -0
  97. brg_certificate/tests/edge_mgmt/leds_test/leds_test.json +13 -0
  98. brg_certificate/tests/edge_mgmt/leds_test/leds_test.py +210 -0
  99. brg_certificate/tests/edge_mgmt/ota_test/ota_test.json +13 -0
  100. brg_certificate/tests/edge_mgmt/ota_test/ota_test.py +83 -0
  101. brg_certificate/tests/edge_mgmt/stat_test/stat_test.json +13 -0
  102. brg_certificate/tests/edge_mgmt/stat_test/stat_test.py +48 -0
  103. brg_certificate/tests/energy2400/duty_cycle_test/duty_cycle_test.json +13 -0
  104. brg_certificate/tests/energy2400/duty_cycle_test/duty_cycle_test.py +26 -0
  105. brg_certificate/tests/energy2400/output_power_test/output_power_test.json +13 -0
  106. brg_certificate/tests/energy2400/output_power_test/output_power_test.py +27 -0
  107. brg_certificate/tests/energy2400/pattern_test/pattern_test.json +13 -0
  108. brg_certificate/tests/energy2400/pattern_test/pattern_test.py +28 -0
  109. brg_certificate/tests/energy2400/signal_indicator_ble5_test/signal_indicator_ble5_test.json +13 -0
  110. brg_certificate/tests/energy2400/signal_indicator_ble5_test/signal_indicator_ble5_test.py +398 -0
  111. brg_certificate/tests/energy2400/signal_indicator_sub1g_2_4_test/signal_indicator_sub1g_2_4_test.json +13 -0
  112. brg_certificate/tests/energy2400/signal_indicator_sub1g_2_4_test/signal_indicator_sub1g_2_4_test.py +153 -0
  113. brg_certificate/tests/energy2400/signal_indicator_test/signal_indicator_test.json +13 -0
  114. brg_certificate/tests/energy2400/signal_indicator_test/signal_indicator_test.py +264 -0
  115. brg_certificate/tests/energy_sub1g/duty_cycle_test/duty_cycle_test.json +13 -0
  116. brg_certificate/tests/energy_sub1g/duty_cycle_test/duty_cycle_test.py +27 -0
  117. brg_certificate/tests/energy_sub1g/pattern_test/pattern_test.json +13 -0
  118. brg_certificate/tests/energy_sub1g/pattern_test/pattern_test.py +26 -0
  119. brg_certificate/tests/energy_sub1g/signal_indicator_functionality_test/signal_indicator_functionality_test.json +13 -0
  120. brg_certificate/tests/energy_sub1g/signal_indicator_functionality_test/signal_indicator_functionality_test.py +397 -0
  121. brg_certificate/tests/energy_sub1g/signal_indicator_test/signal_indicator_test.json +13 -0
  122. brg_certificate/tests/energy_sub1g/signal_indicator_test/signal_indicator_test.py +27 -0
  123. brg_certificate/wltPb_pb2.py +72 -0
  124. brg_certificate/wltPb_pb2.pyi +227 -0
  125. brg_certificate/wlt_types.py +114 -0
  126. gw_certificate/api/extended_api.py +7 -1531
  127. gw_certificate/api_if/200/data.json +106 -0
  128. gw_certificate/api_if/200/logs.json +12 -0
  129. gw_certificate/api_if/200/status.json +47 -0
  130. gw_certificate/api_if/201/data.json +98 -0
  131. gw_certificate/api_if/201/logs.json +12 -0
  132. gw_certificate/api_if/201/status.json +53 -0
  133. gw_certificate/api_if/202/data.json +83 -0
  134. gw_certificate/api_if/202/logs.json +12 -0
  135. gw_certificate/api_if/202/status.json +60 -0
  136. gw_certificate/api_if/203/data.json +85 -0
  137. gw_certificate/api_if/203/logs.json +12 -0
  138. gw_certificate/api_if/203/status.json +63 -0
  139. gw_certificate/api_if/204/data.json +85 -0
  140. gw_certificate/api_if/204/logs.json +12 -0
  141. gw_certificate/api_if/204/status.json +63 -0
  142. gw_certificate/api_if/205/data.json +85 -0
  143. gw_certificate/api_if/205/logs.json +12 -0
  144. gw_certificate/api_if/205/status.json +63 -0
  145. gw_certificate/api_if/api_validation.py +0 -2
  146. gw_certificate/common/analysis_data_bricks.py +18 -1413
  147. gw_certificate/common/debug.py +0 -21
  148. gw_certificate/common/utils.py +1 -212
  149. gw_certificate/common/utils_defines.py +0 -87
  150. gw_certificate/gw_certificate.py +9 -7
  151. gw_certificate/gw_certificate_cli.py +39 -23
  152. gw_certificate/interface/4.4.52_app.zip +0 -0
  153. gw_certificate/interface/4.4.52_sd_bl_app.zip +0 -0
  154. gw_certificate/interface/ble_simulator.py +0 -32
  155. gw_certificate/interface/if_defines.py +1 -0
  156. gw_certificate/interface/mqtt.py +96 -19
  157. gw_certificate/interface/nrfutil-linux +0 -0
  158. gw_certificate/interface/nrfutil-mac +0 -0
  159. gw_certificate/interface/nrfutil.exe +0 -0
  160. gw_certificate/interface/pkt_generator.py +0 -82
  161. gw_certificate/interface/uart_if.py +73 -43
  162. gw_certificate/templates/results.html +1 -1
  163. gw_certificate/tests/__init__.py +1 -2
  164. gw_certificate/tests/actions.py +134 -9
  165. gw_certificate/tests/connection.py +10 -5
  166. gw_certificate/tests/downlink.py +2 -4
  167. gw_certificate/tests/generic.py +62 -12
  168. gw_certificate/tests/registration.py +78 -27
  169. gw_certificate/tests/static/generated_packet_table.py +12 -48
  170. gw_certificate/tests/static/packet_table.csv +10048 -10048
  171. gw_certificate/tests/static/references.py +2 -1
  172. gw_certificate/tests/static/uplink_defines.py +0 -7
  173. gw_certificate/tests/throughput.py +7 -12
  174. gw_certificate/tests/uplink.py +83 -43
  175. {wiliot_certificate-1.3.0a1.dist-info → wiliot_certificate-1.4.0a2.dist-info}/METADATA +59 -8
  176. wiliot_certificate-1.4.0a2.dist-info/RECORD +198 -0
  177. {wiliot_certificate-1.3.0a1.dist-info → wiliot_certificate-1.4.0a2.dist-info}/WHEEL +1 -1
  178. wiliot_certificate-1.4.0a2.dist-info/entry_points.txt +3 -0
  179. wiliot_certificate-1.4.0a2.dist-info/top_level.txt +2 -0
  180. gw_certificate/interface/packet_error.py +0 -22
  181. wiliot_certificate-1.3.0a1.dist-info/RECORD +0 -51
  182. wiliot_certificate-1.3.0a1.dist-info/entry_points.txt +0 -2
  183. wiliot_certificate-1.3.0a1.dist-info/top_level.txt +0 -1
  184. {wiliot_certificate-1.3.0a1.dist-info → wiliot_certificate-1.4.0a2.dist-info}/LICENSE +0 -0
@@ -1,938 +1,12 @@
1
1
  import logging
2
2
  from importlib import reload
3
3
  import os
4
- import shutil
5
- import subprocess
6
- import datetime
7
- from itertools import combinations, chain
8
- from time import sleep
9
- from pathlib import Path
10
4
  import inspect
11
5
  from appdirs import user_data_dir
12
6
 
13
- import plotly.express as px
14
- import pandas as pd
15
- import numpy as np
16
- from plotly.offline import plot
17
- from os.path import exists
18
- import tabulate
7
+ from gw_certificate.common.debug import debug_print
8
+ from gw_certificate.common.utils import current_timestamp
19
9
 
20
- from gw_certificate.common.debug import debug_print, is_databricks
21
- from gw_certificate.common.utils import convert_timestamp_to_datetime, current_timestamp, mstimestamp_to_timezone
22
-
23
-
24
- if is_databricks():
25
- logging.getLogger("py4j.java_gateway").setLevel(logging.ERROR)
26
-
27
- class WiliotTableError(Exception):
28
- pass
29
-
30
- class WiliotDatabricksUtils:
31
-
32
- def __init__(self, spark):
33
- self.spark = spark
34
- self.is_databricks = is_databricks()
35
-
36
- def get_seen_tags(self, table, start_time, end_time, tags_list=None, bridges_list=None, gateways_list=None,external_rawdata_path=None):
37
- """
38
- does an SQL query of the packet data table between specified timestamps
39
- filters the data for specified tags/bridges/gateways (if specified)
40
- returns relevant values from the table
41
- :type table: str
42
- :param table: name of data table
43
- :type start_time: float
44
- :param start_time: time filter start timestamp (UTC milliseconds)
45
- :type end_time: float
46
- :param end_time: time filter end timestamp (UTC milliseconds)
47
- :type tags_list: list
48
- :param tags_list: list of tags to filter from the data
49
- :type bridges_list: list
50
- :param bridges_list: list of bridges to filter from the data
51
- :type gateways_list: list
52
- :param gateways_list: list of gateways to filter from the data
53
- :rtype: pandas DataFrame
54
- :return: dataframe of data from table
55
- """
56
- # TODO - debug the SQL Query with different types of lists
57
- if bridges_list is not None:
58
- if len(bridges_list) == 1:
59
- bridges_list = list(bridges_list) + [""]
60
- bridges_list = tuple(bridges_list)
61
- if gateways_list is not None:
62
- if len(gateways_list) == 1:
63
- gateways_list = list(gateways_list) + [""]
64
- gateways_list = tuple(gateways_list)
65
- if tags_list is not None:
66
- if len(tags_list) == 1:
67
- tags_list = list(tags_list) + [""]
68
- tags_list = tuple(tags_list)
69
- # adding search by date to improve search time (most tables in data bricks partition is by date)
70
- if external_rawdata_path:
71
- query_data = pd.read_csv(external_rawdata_path)
72
- return query_data
73
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
74
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
75
- start_datetime = query_start_datetime - datetime.timedelta(hours=24)
76
- end_datetime = query_end_datetime + datetime.timedelta(hours=24)
77
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
78
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
79
- sql_method = ''
80
- if 'enriched' in table:
81
- sql_method = f"""
82
- select gatewayId, decryptedData, sequenceId, timestamp, nonce, mic, encryptedData, rawPacket, rssi, bridgeId, tagId, externalId, packet_counter, rxPacketRate, packetVersion, flowVersion, dco, gpio, charge_time, internal_tmp, nfpkt, temp_sensor_dc, assetId
83
- from {table}
84
- where date between {start_date} and {end_date}
85
- and timestamp between {start_time} and {end_time}
86
- """
87
- else:
88
- sql_method = f"""
89
- select gatewayId, decryptedData, sequenceId, timestamp, nonce, mic, encryptedData, rawPacket, rssi, bridgeId, tagId, externalId, packet_counter, rxPacketRate, packetVersion, flowVersion, dco, gpio, charge_time, internal_tmp, nfpkt, temp_sensor_dc
90
- from {table}
91
- where date between {start_date} and {end_date}
92
- and timestamp between {start_time} and {end_time}
93
- """
94
- if tags_list is not None and tags_list != ():
95
- sql_method = sql_method + f"""and externalId in {tags_list}
96
- """
97
- if bridges_list is not None and bridges_list != ():
98
- sql_method = sql_method + f"""and bridgeId in {bridges_list}
99
- """
100
- if gateways_list is not None and gateways_list != ():
101
- sql_method = sql_method + f"""and gatewayId in {gateways_list}
102
- """
103
- if self.spark is not None:
104
- debug_print('Running SQL query...', center=True)
105
- debug_print(sql_method)
106
- query_data = self.spark.sql(sql_method)
107
- query_data = query_data.toPandas()
108
- return query_data
109
- else:
110
- raise EnvironmentError("SQL query can only run in databricks")
111
-
112
- def get_seen_events(self, table, start_time, end_time, bridges_list=None, gateways_list=None, platform=False):
113
- """
114
- does an SQL query of the packet data table between specified timestamps
115
- filters the data for specified tags/bridges/gateways (if specified)
116
- returns relevant values from the table
117
- :type table: str
118
- :param table: name of data table
119
- :type start_time: float
120
- :param start_time: time filter start timestamp (UTC milliseconds)
121
- :type end_time: float
122
- :param end_time: time filter end timestamp (UTC milliseconds)
123
- :type bridges_list: list
124
- :param bridges_list: list of bridges to filter the data by
125
- :type gateways_list: list
126
- :param gateways_list: list of gateways to filter the data by
127
- :type platform: bool
128
- :param platform: platform/management (true/false)
129
- :rtype: pandas DataFrame
130
- :return: dataframe of data from table
131
- """
132
- # TODO - debug the SQL Query with different types of lists
133
- devices_list = ()
134
- if bridges_list is not None:
135
- if len(bridges_list) == 1:
136
- devices_list = list(bridges_list) + [""]
137
- devices_list = tuple(devices_list)
138
- if gateways_list is not None:
139
- if len(gateways_list) == 1 and devices_list == ():
140
- devices_list = list(gateways_list) + [""]
141
- elif devices_list is not None:
142
- devices_list = devices_list + tuple(gateways_list)
143
- devices_list = tuple(devices_list)
144
-
145
- # adding search by date to improve search time (most tables in data bricks partition is by date)
146
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
147
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
148
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
149
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
150
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
151
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
152
- sql_method = f"""
153
- select *
154
- from {table}
155
- where date between {start_date} and {end_date}
156
- and start between {start_time} and {end_time}
157
- """
158
- if not platform:
159
- connectivity_event = "name = 'NTWK'"
160
- id_filter = "id"
161
- else:
162
- connectivity_event = "eventName = 'connectivity'"
163
- id_filter = "assetId"
164
-
165
- if devices_list is not None:
166
- sql_method = sql_method + f"""and ({connectivity_event} or {id_filter} in {devices_list})
167
- """
168
- else:
169
- sql_method = sql_method + f"""and {connectivity_event}"""
170
- if self.spark is not None:
171
- debug_print('Running SQL query...', center=True)
172
- debug_print(sql_method)
173
- query_data = self.spark.sql(sql_method)
174
- query_data = query_data.toPandas()
175
- return query_data
176
- else:
177
- raise EnvironmentError("SQL query can only run in databricks")
178
-
179
- def get_sequence_id_data(self, table, start_time, end_time, gateways_list=None):
180
- """
181
- does an SQL query of the packet data table between specified timestamps
182
- returns only sequence id
183
- :type table: str
184
- :param table: name of data table
185
- :type start_time: float
186
- :param start_time: time filter start timestamp (UTC milliseconds)
187
- :type end_time: float
188
- :param end_time: time filter end timestamp (UTC milliseconds)
189
- :type gateways_list: list
190
- :param gateways_list: list of gateways to filter the data by
191
- :rtype: pandas DataFrame
192
- :return: dataframe of data from table
193
- """
194
-
195
- if gateways_list is not None:
196
- if len(gateways_list) == 1:
197
- gateways_list = list(gateways_list) + [""]
198
- gateways_list = tuple(gateways_list)
199
- # adding search by date to improve search time (most tables in data bricks partition is by date)
200
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
201
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
202
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
203
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
204
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
205
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
206
- sql_method = f"""
207
- select sequenceId, timestamp, gatewayId, gatewayName
208
- from {table}
209
- where date between {start_date} and {end_date}
210
- and timestamp between {start_time} and {end_time}
211
- """
212
- if gateways_list is not None:
213
- sql_method = sql_method + f"""and gatewayId in {gateways_list}
214
- """
215
- if self.spark is not None:
216
- debug_print('Running SQL query...', center=True)
217
- debug_print(sql_method)
218
- query_data = self.spark.sql(sql_method)
219
- query_data = query_data.toPandas()
220
- return query_data
221
- else:
222
- raise EnvironmentError("SQL query can only run in databricks")
223
-
224
- def get_statistics_data(self, table, start_time, end_time, gateways_list=None):
225
- """
226
- does an SQL query of the statistics data table between specified timestamps
227
- :type table: str
228
- :param table: name of data table
229
- :type start_time: float
230
- :param start_time: time filter start timestamp (UTC milliseconds)
231
- :type end_time: float
232
- :param end_time: time filter end timestamp (UTC milliseconds)
233
- :type gateways_list: list
234
- :param gateways_list: list of gateways to filter the data by
235
- :rtype: pandas DataFrame
236
- :return: dataframe of data from table
237
- """
238
-
239
- if gateways_list is not None:
240
- if len(gateways_list) == 1:
241
- gateways_list = list(gateways_list) + [""]
242
- gateways_list = tuple(gateways_list)
243
- # adding search by date to improve search time (most tables in data bricks partition is by date)
244
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
245
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
246
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
247
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
248
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
249
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
250
- sql_method = f"""
251
- select *
252
- from {table}
253
- where date between {start_date} and {end_date}
254
- and timestamp between {start_time} and {end_time}
255
- """
256
- if gateways_list is not None:
257
- sql_method = sql_method + f"""and gatewayId in {gateways_list}
258
- """
259
- if self.spark is not None:
260
- debug_print('Running SQL query...', center=True)
261
- debug_print(sql_method)
262
- query_data = self.spark.sql(sql_method)
263
- query_data = query_data.toPandas()
264
- return query_data
265
- else:
266
- raise EnvironmentError("SQL query can only run in databricks")
267
-
268
- def get_heartbeat_data(self, table, start_time, end_time, gateways_list=None, bridges_list=None):
269
- """
270
- does an SQL query of the statistics data table between specified timestamps
271
- :type table: str
272
- :param table: name of data table
273
- :type start_time: float
274
- :param start_time: time filter start timestamp (UTC milliseconds)
275
- :type end_time: float
276
- :param end_time: time filter end timestamp (UTC milliseconds)
277
- :type gateways_list: list
278
- :param gateways_list: list of gateways to filter the data by
279
- :rtype: pandas DataFrame
280
- :return: dataframe of data from table
281
- """
282
-
283
- if gateways_list is not None:
284
- if len(gateways_list) == 1:
285
- gateways_list = list(gateways_list) + [""]
286
- gateways_list = tuple(gateways_list)
287
-
288
- if bridges_list is not None:
289
- if len(bridges_list) == 1:
290
- bridges_list = list(bridges_list) + [""]
291
- bridges_list = tuple(bridges_list)
292
-
293
- # adding search by date to improve search time (most tables in data bricks partition is by date)
294
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
295
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
296
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
297
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
298
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
299
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
300
- sql_method = f"""
301
- select date, timestamp, time, gatewayId, packetCount, bridgeId, connectedTagsCount, receivedPktsCount, receivedWiliotPktsCount, badCRCPktsCount, rssi, txQueueWatermark, effectivePacerIncrement, isDynamic, hbType
302
- from {table}
303
- where date between {start_date} and {end_date}
304
- and timestamp between {start_time} and {end_time}
305
- """
306
- if gateways_list is not None:
307
- sql_method = sql_method + f"and gatewayId in {gateways_list}\n"
308
- if bridges_list is not None:
309
- sql_method = sql_method + f"and bridgeId in {bridges_list}\n"
310
- if self.spark is not None:
311
- debug_print('Running SQL query...', center=True)
312
- debug_print(sql_method)
313
- query_data = self.spark.sql(sql_method)
314
- query_data = query_data.toPandas()
315
- return query_data
316
- else:
317
- raise EnvironmentError("SQL query can only run in databricks")
318
-
319
-
320
- def get_num_seen_bridges(self, table, start_time, end_time, gateways_list=None):
321
- """
322
- gets number of unique bridges seen by each gateways (bridge has to send data packets)
323
- :type table: str
324
- :param table: name of data table
325
- :type start_time: float
326
- :param start_time: time filter start timestamp (UTC milliseconds)
327
- :type end_time: float
328
- :param end_time: time filter end timestamp (UTC milliseconds)
329
- :type gateways_list: list
330
- :param gateways_list: list of gateways to filter the data by
331
- :return: dictionary of number of seen bridges per gateway
332
- """
333
- if gateways_list is not None:
334
- if len(gateways_list) == 1:
335
- gateways_list = list(gateways_list) + [""]
336
- gateways_list = tuple(gateways_list)
337
- # adding search by date to improve search time (most tables in data bricks partition is by date)
338
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
339
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
340
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
341
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
342
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
343
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
344
- sql_method = f"""
345
- select gatewayId, count(distinct bridgeId) as countBridge
346
- from {table}
347
- where date between {start_date} and {end_date}
348
- and timestamp between {start_time} and {end_time}
349
- """
350
- if gateways_list is not None:
351
- sql_method = sql_method + f"""and gatewayId in {gateways_list}
352
- """
353
- sql_method = sql_method + "group by gatewayId"
354
- if self.spark is not None:
355
- debug_print('Running SQL query...', center=True)
356
- debug_print(sql_method)
357
- query_data = self.spark.sql(sql_method)
358
- query_data = query_data.toPandas()
359
- query_data = dict(zip(query_data['gatewayId'], query_data['countBridge']))
360
- return query_data
361
- else:
362
- raise EnvironmentError("SQL query can only run in databricks")
363
-
364
-
365
- def get_sequence_id_loss(self, packets_table, statistics_table, start_time, end_time, gateways_list=None, sync_tables=True):
366
- """
367
- calculates sequence ID loss (takes reboot into consideration) per GW in table
368
- :type packets_table: str | DataFrame
369
- :param packets_table: name of data table | data table DataFrame
370
- :type statistics_table: str | DataFrame
371
- :param statistics_table: name of statistics table | statistics table DataFrame
372
- :type start_time: float
373
- :param start_time: time filter start timestamp (UTC milliseconds)
374
- :type end_time: float
375
- :param end_time: time filter end timestamp (UTC milliseconds)
376
- :type gateways_list: list
377
- :param gateways_list: list of gateways to filter the data by
378
- :type sync_tables: bool
379
- :param sync_tables: sync table start/end by timestamp (relevant for running directly after DB table updates)
380
- :return: dictionary of statistics
381
- """
382
-
383
- RESET_THRESHOLD = 1000
384
-
385
- def sync_data_and_statistics_tables(data_table, statistics_table, gw_list):
386
- """
387
- sync data and statistics table by timestamps
388
- """
389
- def get_first_stat_timestamp(statistics_table):
390
- first_stat_timestamp = 0
391
- for gw in statistics_table['gatewayId'].unique():
392
- gw_first_stat_timestamp = np.sort(statistics_table[statistics_table['gatewayId'] == gw]['timestamp'])[0]
393
- if gw_first_stat_timestamp > first_stat_timestamp:
394
- first_stat_timestamp = gw_first_stat_timestamp
395
- return first_stat_timestamp
396
-
397
- # 'sync' statistics table and data table latest timestamps:
398
- max_data_timestamp = data_table['timestamp'].max()
399
- max_stats_timestamp = statistics_table['timestamp'].max()
400
- last_synced = min(max_data_timestamp, max_stats_timestamp)
401
- # truncate end of data tables to timeslot
402
- data_table = data_table[data_table['timestamp']<= last_synced]
403
- statistics_table = statistics_table[statistics_table['timestamp'] <= last_synced]
404
-
405
- first_stat_timestamp = get_first_stat_timestamp(statistics_table)
406
- stat_start_timestamp = first_stat_timestamp - (datetime.timedelta(seconds = 60).total_seconds() * 1000)
407
- data_start_timestamp = data_table['timestamp'].min()
408
- while data_start_timestamp > stat_start_timestamp:
409
- statistics_table = statistics_table[statistics_table['timestamp']>first_stat_timestamp]
410
- first_stat_timestamp = get_first_stat_timestamp(statistics_table)
411
- stat_start_timestamp = first_stat_timestamp - (datetime.timedelta(seconds = 60).total_seconds() * 1000)
412
-
413
- return data_table, statistics_table
414
-
415
-
416
-
417
- def get_stats_for_continuous_sequence_ids(sequence):
418
- """
419
- generate statistics for contiuous sequence IDs
420
- :type sequence: list
421
- :param sequence: array of sequence ID
422
- :rtype: dict
423
- :return: dictionary of statistics
424
- """
425
- stats = {}
426
-
427
- # remove duplicates
428
- sequence = np.unique(sequence)
429
- # sort by descending order
430
- sequence = -np.sort(-sequence)
431
-
432
- s_max = np.max(sequence)
433
- s_min = np.min(sequence)
434
- s_rec = len(np.unique(sequence)) # TODO - Compare with len(sequence) / num of coupled packets from table
435
- s_expected = (s_max - s_min)+1
436
- stats['maxSequenceId'] = s_max
437
- stats['minSequenceId'] = s_min
438
- stats['receivedSequenceIds'] = s_rec
439
- stats['expectedSequenceIds'] = s_expected
440
- return stats
441
-
442
- def process_sequences(df_array, num_mgmt_packets=0):
443
- """
444
- generate statistics for (normalized by duration) for array of sequence IDs (compensated for GW Reboots)
445
- :type df_array: list of DataFrames
446
- :param df_array: list of DataFrames with continuous sequence IDs
447
- :type num_mgmt_packets: int
448
- :param num_mgmt_packets: number of management packets
449
- :rtype: dict
450
- :return: dictionary of statistics
451
- """
452
- total_stats = {}
453
- total_received_packets = num_mgmt_packets
454
- total_expected_packets = 0
455
- for df in df_array:
456
- stats = get_stats_for_continuous_sequence_ids(df['sequenceId'])
457
- total_received_packets += stats['receivedSequenceIds']
458
- total_expected_packets += stats['expectedSequenceIds']
459
- if total_expected_packets == 0:
460
- breakpoint()
461
- loss_percentage = (1 - (total_received_packets / total_expected_packets) )* 100
462
- total_stats['totalManagementPackets'] = num_mgmt_packets
463
- total_stats['totalDataPackets'] = total_received_packets - num_mgmt_packets
464
- total_stats['totalReceivedPackets'] = total_received_packets
465
- total_stats['totalExpectedPackets'] = total_expected_packets
466
- total_stats['lossPercentage'] = round(loss_percentage, 3)
467
- total_stats['numResets'] = len(df_array)-1
468
- return total_stats
469
-
470
- results = {}
471
- if isinstance(packets_table, pd.DataFrame):
472
- data = packets_table
473
- else:
474
- data = self.get_sequence_id_data(packets_table, start_time, end_time, gateways_list).sort_values(by='timestamp', ascending=False)
475
- gw_list = data['gatewayId'].unique()
476
- if gw_list is None:
477
- return None
478
- if isinstance(statistics_table, pd.DataFrame):
479
- statistics_data = statistics_table
480
- else:
481
- statistics_data = self.get_statistics_data(statistics_table, start_time, end_time, gw_list)
482
- if sync_tables:
483
- data, statistics_data = sync_data_and_statistics_tables(data, statistics_data, gw_list)
484
-
485
- for gw in gw_list:
486
- gw_name = data[data['gatewayId'] == gw]['gatewayName'].iloc[0]
487
- num_mgmt_packets = statistics_data[statistics_data['gatewayId'] == gw]['managementPktCount'].sum()
488
- gw_results = []
489
- gw_df = data[data['gatewayId'] == gw].reset_index()
490
- gw_df['diff'] = gw_df['sequenceId'].diff()
491
- gw_df['reset'] = np.where(gw_df['diff']>RESET_THRESHOLD, True, False)
492
- gw_resets = gw_df[gw_df['reset'] == True]
493
- sequences = np.array_split(gw_df, gw_resets.index)
494
- gw_results = process_sequences(sequences, num_mgmt_packets)
495
- gw_results.update({'gwName': gw_name})
496
- results[gw] = gw_results
497
- return results
498
-
499
-
500
-
501
- def get_amount_of_unique_tags_per_data_path(self, table, start_time, end_time, gateways_list=None,
502
- tags_to_ignore=None, bridges_list=None):
503
- """
504
- does an SQL query of the packet data table between specified timestamps
505
- returns amount of unique externalIds per data path (bridge->gw) in the given timeframe
506
- :type table: str
507
- :param table: name of data table
508
- :type start_time: float
509
- :param start_time: time filter start timestamp (UTC milliseconds)
510
- :type end_time: float
511
- :param end_time: time filter end timestamp (UTC milliseconds)
512
- :type tags_to_ignore: list
513
- :param tags_to_ignore: list of tags to ignore in the query (will not be counted)
514
- :type gateways_list: list
515
- :param gateways_list: list of gateways to filter the data by
516
- :type bridges_list: list
517
- :param bridges_list: list of bridges to filter the data by
518
- :rtype: pandas DataFrame
519
- :return: dataframe of data from table
520
- """
521
- if bridges_list is not None:
522
- if len(bridges_list) == 1:
523
- bridges_list = list(bridges_list) + [""]
524
- bridges_list = tuple(bridges_list)
525
- if gateways_list is not None:
526
- if len(gateways_list) == 1:
527
- gateways_list = list(gateways_list) + [""]
528
- gateways_list = tuple(gateways_list)
529
- if tags_to_ignore is not None:
530
- if len(tags_to_ignore) == 1:
531
- tags_to_ignore = list(tags_to_ignore) + [""]
532
- tags_to_ignore = tuple(tags_to_ignore)
533
- # adding search by date to improve search time (most tables in data bricks partition is by date)
534
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
535
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
536
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
537
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
538
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
539
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
540
- sql_method = f"""
541
- select gatewayId, bridgeId, count(distinct externalId)
542
- from {table}
543
- where date between {start_date} and {end_date}
544
- and timestamp between {start_time} and {end_time}
545
- """
546
- if tags_to_ignore is not None and tags_to_ignore != ():
547
- sql_method = sql_method + f"""and externalId not in {tags_to_ignore}
548
- """
549
- if bridges_list is not None and bridges_list != ():
550
- sql_method = sql_method + f"""and bridgeId in {bridges_list}
551
- """
552
- if gateways_list is not None and gateways_list != ():
553
- sql_method = sql_method + f"""and gatewayId in {gateways_list}
554
- """
555
- sql_method = sql_method + f"""group by gatewayId, bridgeId order by gatewayId, bridgeId"""
556
- if self.spark is not None:
557
- debug_print('Running SQL query...', center=True)
558
- debug_print(sql_method)
559
- query_data = self.spark.sql(sql_method)
560
- query_data = query_data.toPandas()
561
- return query_data
562
- else:
563
- raise EnvironmentError("SQL query can only run in databricks")
564
-
565
- def get_amount_of_unique_tags_per_brg(self, table, start_time, end_time, tags_to_ignore=None, bridges_list=None):
566
- """
567
- does an SQL query of the packet data table between specified timestamps
568
- returns amount of unique externalIds per data path (bridge->gw) in the given timeframe
569
- :type table: str
570
- :param table: name of data table
571
- :type start_time: float
572
- :param start_time: time filter start timestamp (UTC milliseconds)
573
- :type end_time: float
574
- :param end_time: time filter end timestamp (UTC milliseconds)
575
- :type tags_to_ignore: list
576
- :param tags_to_ignore: list of tags to ignore in the query (will not be counted)
577
- :type gateways_list: list
578
- :param gateways_list: list of gateways to filter the data by
579
- :type bridges_list: list
580
- :param bridges_list: list of bridges to filter the data by
581
- :rtype: pandas DataFrame
582
- :return: dataframe of data from table
583
- """
584
- if bridges_list is not None:
585
- if len(bridges_list) == 1:
586
- bridges_list = list(bridges_list) + [""]
587
- bridges_list = tuple(bridges_list)
588
- if tags_to_ignore is not None:
589
- if len(tags_to_ignore) == 1:
590
- tags_to_ignore = list(tags_to_ignore) + [""]
591
- tags_to_ignore = tuple(tags_to_ignore)
592
- # adding search by date to improve search time (most tables in data bricks partition is by date)
593
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
594
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
595
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
596
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
597
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
598
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
599
- sql_method = f"""
600
- select gatewayId, bridgeId, count(distinct externalId)
601
- from {table}
602
- where date between {start_date} and {end_date}
603
- and timestamp between {start_time} and {end_time}
604
- """
605
- if tags_to_ignore is not None and tags_to_ignore != ():
606
- sql_method = sql_method + f"""and externalId not in {tags_to_ignore}
607
- """
608
- if bridges_list is not None and bridges_list != ():
609
- sql_method = sql_method + f"""and bridgeId in {bridges_list}
610
- """
611
- sql_method = sql_method + f"""group by bridgeId order by bridgeId"""
612
- if self.spark is not None:
613
- debug_print('Running SQL query...', center=True)
614
- debug_print(sql_method)
615
- query_data = self.spark.sql(sql_method)
616
- query_data = query_data.toPandas()
617
- return query_data
618
- else:
619
- raise EnvironmentError("SQL query can only run in databricks")
620
-
621
- def get_amount_of_unique_tags_per_gw(self, table, start_time, end_time, gateways_list=None, tags_to_ignore=None):
622
- """
623
- does an SQL query of the packet data table between specified timestamps
624
- returns amount of unique externalIds per data path (bridge->gw) in the given timeframe
625
- :type table: str
626
- :param table: name of data table
627
- :type start_time: float
628
- :param start_time: time filter start timestamp (UTC milliseconds)
629
- :type end_time: float
630
- :param end_time: time filter end timestamp (UTC milliseconds)
631
- :type tags_to_ignore: list
632
- :param tags_to_ignore: list of tags to ignore in the query (will not be counted)
633
- :type gateways_list: list
634
- :param gateways_list: list of gateways to filter the data by
635
- :rtype: pandas DataFrame
636
- :return: dataframe of data from table
637
- """
638
-
639
- if gateways_list is not None:
640
- if len(gateways_list) == 1:
641
- gateways_list = list(gateways_list) + [""]
642
- gateways_list = tuple(gateways_list)
643
- if tags_to_ignore is not None:
644
- if len(tags_to_ignore) == 1:
645
- tags_to_ignore = list(tags_to_ignore) + [""]
646
- tags_to_ignore = tuple(tags_to_ignore)
647
- # adding search by date to improve search time (most tables in data bricks partition is by date)
648
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
649
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
650
- start_datetime = query_start_datetime - datetime.timedelta(hours=36)
651
- end_datetime = query_end_datetime + datetime.timedelta(hours=36)
652
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
653
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
654
- sql_method = f"""
655
- select gatewayId, count(distinct externalId)
656
- from {table}
657
- where date between {start_date} and {end_date}
658
- and timestamp between {start_time} and {end_time}
659
- """
660
- if tags_to_ignore is not None and tags_to_ignore != ():
661
- sql_method = sql_method + f"""and externalId not in {tags_to_ignore}
662
- """
663
- if gateways_list is not None and gateways_list != ():
664
- sql_method = sql_method + f"""and gatewayId in {gateways_list}
665
- """
666
- sql_method = sql_method + f"""group by gatewayId order by gatewayId"""
667
- if self.spark is not None:
668
- debug_print('Running SQL query...', center=True)
669
- debug_print(sql_method)
670
- query_data = self.spark.sql(sql_method)
671
- query_data = query_data.toPandas()
672
- return query_data
673
- else:
674
- raise EnvironmentError("SQL query can only run in databricks")
675
-
676
- def get_last_data(self, table, mins=60, tags_list=None, bridges_list=None, gateways_list=None):
677
- """
678
- function querys back specified minutes from data table (counts back from last entry in table)
679
- :type table: str
680
- :param table: databricks data table name
681
- :type mins: int
682
- :param mins: minutes to query back, defaults to 1 hour
683
- :param tags_list: tags list
684
- :type tags_list: list
685
- :param tags_list: list of tags to filter from the data
686
- :type bridges_list: list
687
- :param bridges_list: list of bridger to filter from the data
688
- :type gateways_list: list
689
- :param gateways_list: list of gateways to filter from the data
690
- :rtype: pandas DataFrame
691
- :return: dataframe of data from table
692
- """
693
- end_timestamp = self.get_last_entry_timestamp(table)
694
- end_datetime = mstimestamp_to_timezone(end_timestamp)
695
- start_timestamp = end_timestamp - (mins * 60 * 1000)
696
- start_datetime = mstimestamp_to_timezone(start_timestamp)
697
- debug_print(f'Getting last {mins} mins from {table}')
698
- debug_print(f'Last entry at {end_timestamp}')
699
- debug_print(f'querying {start_datetime} -> {end_datetime}')
700
- return self.get_seen_tags(table, start_timestamp, end_timestamp, tags_list, bridges_list, gateways_list)
701
-
702
- def get_last_entry_timestamp(self, table):
703
- """
704
- function gets name of data table and returns the timestamp of the last entry (in milliseconds)
705
- :type table: str
706
- :param table: name of table
707
- """
708
- today = datetime.datetime.now()
709
- yesterday = today-datetime.timedelta(days=1)
710
- today_date = datetime.datetime.strftime(today, '%Y%m%d')
711
- yesterday_date = datetime.datetime.strftime(yesterday, '%Y%m%d')
712
-
713
- # Try to get last timestamp from last day
714
- sql_method = f"""
715
- select MAX (timestamp)
716
- from {table}
717
- where date between {yesterday_date} and {today_date}
718
- """
719
- if self.spark is not None:
720
- query_data = self.spark.sql(sql_method)
721
- query_data = query_data.toPandas()
722
- last_entry_ts = query_data.iloc[0][0]
723
- if not np.isnan(last_entry_ts):
724
- return query_data.iloc[0][0]
725
- else:
726
- # Query all packets in table
727
- sql_method = ''.join(sql_method.split('\n')[:-2])
728
- query_data = self.spark.sql(sql_method)
729
- query_data = query_data.toPandas()
730
- last_entry_ts = query_data.iloc[0][0]
731
- if not np.isnan(last_entry_ts):
732
- return query_data.iloc[0][0]
733
- else:
734
- raise WiliotTableError(f'Cannot get last entry, no entries in data table {table}')
735
- else:
736
- raise EnvironmentError("Unable to detect dbutils function")
737
-
738
- def wait_for_data(self, table, requested_timestamp, timeout_mins=70):
739
- # TODO - implement date search
740
- """
741
- function waits for data timed at requested_timestamp (or later) to appear in the data table.
742
- :type table: str
743
- :param table: data table name
744
- :type requested_timestamp: int
745
- :param requested_timestamp: timestamp to wait to appear in the table
746
- """
747
- last_entry_timestamp = self.get_last_entry_timestamp(table)
748
- table_ready = last_entry_timestamp >= requested_timestamp
749
- start = current_timestamp()
750
- while not table_ready:
751
- if (current_timestamp() - start) >= 1000 * 60 * timeout_mins:
752
- debug_print(f"""{timeout_mins} minute timeout reached! Table still does not have the requested data
753
- Run test again or get raw data with wait_for_data=False""")
754
- exit_notebook(f'Wait for table timeout reached after {timeout_mins} minutes')
755
- debug_print(
756
- f'Waiting for table to get data. \n'
757
- f'Requested timestamp {requested_timestamp} | ({mstimestamp_to_timezone(requested_timestamp)}) \n'
758
- f'Table latest timestamp {last_entry_timestamp} | ({mstimestamp_to_timezone(last_entry_timestamp)})')
759
- mins_behind = (requested_timestamp - last_entry_timestamp) / 60000
760
- debug_print(f'Data table {mins_behind} minutes behind , trying again in 30 seconds', center=True)
761
- sleep(30)
762
- last_entry_timestamp = self.get_last_entry_timestamp(table)
763
- table_ready = last_entry_timestamp >= requested_timestamp
764
- debug_print(
765
- f'Table is updated. \n'
766
- f'Requested timestamp {requested_timestamp}, table latest timestamp {last_entry_timestamp} \n'
767
- f'Table latest timestamp {last_entry_timestamp} | ({mstimestamp_to_timezone(last_entry_timestamp)})')
768
- mins_ahead = (last_entry_timestamp - requested_timestamp) / 60000
769
- debug_print(f'Data table {mins_ahead} minutes ahead', center=True)
770
-
771
- def get_seen_edge_devices_from_packets(self, table, start_time, end_time):
772
- """
773
- does an SQL query of the packet data table between specified timestamps
774
- returns dictionary of seen gateways and bridges from slice of packet table
775
- :type table: str
776
- :param table: name of data table
777
- :type start_time: float
778
- :param start_time: time filter start timestamp (UTC milliseconds)
779
- :type end_time: float
780
- :param end_time: time filter end timestamp (UTC milliseconds)
781
- :rtype: pandas DataFrame
782
- :return: dataframe of data from table
783
- """
784
- # adding search by date to improve search time (most tables in data bricks partition is by date)
785
- query_start_datetime = convert_timestamp_to_datetime(str(start_time))
786
- query_end_datetime = convert_timestamp_to_datetime(str(end_time))
787
- start_datetime = query_start_datetime - datetime.timedelta(hours=24)
788
- end_datetime = query_end_datetime + datetime.timedelta(hours=24)
789
- start_date = datetime.datetime.strftime(start_datetime, '%Y%m%d')
790
- end_date = datetime.datetime.strftime(end_datetime, '%Y%m%d')
791
- sql_method = f"""
792
- select gatewayId, bridgeId, gatewayName, max(timestamp) as timestamp
793
- from {table}
794
- where date between {start_date} and {end_date}
795
- and timestamp between {start_time} and {end_time}
796
- group by gatewayId, gatewayName, bridgeId
797
- """
798
- if self.spark is not None:
799
- debug_print('Running SQL query...', center=True)
800
- debug_print(sql_method)
801
- query_data = self.spark.sql(sql_method).toPandas()
802
- query_data['gatewayId'] = query_data['gatewayId'].str.upper()
803
- query_data['bridgeId'] = query_data['bridgeId'].str.upper()
804
- return query_data
805
- else:
806
- raise EnvironmentError("SQL query can only run in databricks")
807
-
808
-
809
- def process_tagstats(params, tag_stats_df, working_directory, test_no):
810
- """
811
- function gets tag stats dataframe and creates downloadable cdf graph for specified parameters
812
- :type params: list
813
- :param params: tag stats metrics to create graphs for
814
- :type tag_stats_df: pandas DataFrame
815
- :param tag_stats_df: tag stats DataFrame (can be created using get_tagstats_from_test)
816
- :type working_directory: string
817
- :param working_directory: directory to save the graphs HTML files
818
- :type test_no: string
819
- :param test_no: test number
820
- """
821
- if params is None:
822
- debug_print('No parameters specified. Creating graphs for all possible parameters.')
823
- params = ['num_packets', 'num_cycles', 'sprinkler_counter_mean', 'sprinkler_counter_std',
824
- 'sprinkler_counter_min', 'sprinkler_counter_max', 'tbp_mean', 'tbp_std', 'tbp_min', 'tbp_max',
825
- 'tbp_num_vals', 'per_mean', 'per_std', 'rssi_mean', 'rssi_std', 'rssi_min', 'rssi_max', 'ttfp',
826
- 'ttfp_seconds', 'end_time', 'duration', 'rx_rate_normalized', 'rx_rate', 'charge_time_min',
827
- 'charge_time_max', 'packet_counter_min', 'packet_counter_max', 'packet_counter_first',
828
- 'estimated_packet_counter_resets', 'estimated_total_packet_rate', 'estimated_total_per',
829
- 'externalId']
830
- tests_list = tag_stats_df['testId'].unique()
831
- for param in params:
832
- debug_print(f"""*****************************{param}*****************************""")
833
- graph = pd.DataFrame()
834
- for test in tests_list:
835
- df = tag_stats_df[tag_stats_df.testId == test]
836
- test_curveset = cdf_dataframe(df, param, test)
837
- graph = pd.concat([test_curveset, graph])
838
- debug_print(f"""test {test} added to graph""")
839
- fig = px.scatter(graph, x=param, y='TagCount', color=['testName'])
840
- fig.update_layout(title=param)
841
- fig.update_yaxes()
842
- if param == 'ttfp_seconds':
843
- fig.update_xaxes(title_text='Time to first packet [seconds]')
844
- if param == 'estimated_total_packet_rate':
845
- fig.update_xaxes(title_text='Estimated total packet rate [counters/second]')
846
- filepath = f"""{working_directory}{test_no}_{param}_graph.html"""
847
- open(filepath, 'a').close()
848
- fig.write_html(filepath)
849
- # debug_print(filepath)
850
- # p = plot(fig, output_type='div')
851
- # display_html(p)
852
- create_download_link(filepath, file_name=f"""{param} graph""")
853
-
854
- def file_exists(path):
855
- """
856
- function checks if file exists
857
- :type path: str
858
- :param path: path to file
859
- :rtype: bool
860
- :return: file exists
861
- """
862
- path = path.replace('/dbfs', 'dbfs:')
863
- try:
864
- db_utils().fs.head(path)
865
- return True
866
- except Exception:
867
- try:
868
- return exists(path)
869
- except Exception:
870
- return False
871
-
872
- def massagedata(dataset, param):
873
- curveset = pd.DataFrame(columns=[param, 'TagCount'])
874
- count = 0
875
- reftme_stmp = None
876
- for index, row in dataset.iterrows():
877
- if pd.isna(row[param]):
878
- continue
879
- if reftme_stmp is None:
880
- reftme_stmp = row[param]
881
- if reftme_stmp == row[param]:
882
- count += 1
883
- else:
884
- curveset = curveset.append({param: reftme_stmp, 'TagCount': count}, ignore_index=True)
885
- reftme_stmp = row[param]
886
- count += 1
887
- curveset = curveset.append({param: reftme_stmp, 'TagCount': count}, ignore_index=True)
888
- return curveset
889
-
890
- def display_html(html):
891
- """
892
- Use databricks displayHTML from an external package
893
- :type html: string
894
- :param html: html document to display
895
- """
896
- for frame in inspect.getouterframes(inspect.currentframe()):
897
- global_names = set(frame.frame.f_globals)
898
- # Use multiple functions to reduce risk of mismatch
899
- if all(v in global_names for v in ["displayHTML", "display", "spark"]):
900
- return frame.frame.f_globals["displayHTML"](html)
901
- raise EnvironmentError("Unable to detect displayHTML function")
902
-
903
- def display_print(todisplay, console_only=False, **kwargs):
904
- """
905
- Use databricks display func from an external package.
906
- uses tabulate library to display dataframe in case running locally
907
- :type todisplay: pandas DataFrame
908
- :param todisplay: variable to display
909
- :type console_only: bool
910
- :param console_only: if true, prints the data table to console (even if running in DB notebook) using tabulate
911
- """
912
- if not console_only:
913
- for frame in inspect.getouterframes(inspect.currentframe()):
914
- # call dbutils display
915
- global_names = set(frame.frame.f_globals)
916
- if all(v in global_names for v in ["display"]):
917
- try:
918
- return frame.frame.f_globals["display"](todisplay)
919
- except ValueError:
920
- debug_print('ValueError when reading DataFrame! Trying to print in console', center=True)
921
- except KeyError:
922
- debug_print('KeyError when reading DataFrame! Trying to print in console', center=True)
923
- except TypeError:
924
- debug_print('TypeError when reading DataFrame! Trying to print in console', center=True)
925
- if isinstance(todisplay, pd.DataFrame) or console_only:
926
- debug_print('\n' + tabulate.tabulate(todisplay, **kwargs))
927
- return None
928
- raise EnvironmentError("Unable to detect Display function")
929
-
930
- def db_utils_rm(dbfs_path):
931
- """
932
- helper function to remove files/folders from dbfs
933
- """
934
- dbfs_path = dbfs_path.replace('/dbfs', 'dbfs:')
935
- return db_utils().fs.rm(dbfs_path)
936
10
 
937
11
  def db_utils():
938
12
  """
@@ -946,227 +20,6 @@ def db_utils():
946
20
  return frame.frame.f_globals["dbutils"]()
947
21
  raise EnvironmentError("Unable to detect dbutils function")
948
22
 
949
- def get_secret(scope, key):
950
- """
951
- get databrickst secret, return None if not running in databricks
952
- :param scope: secret scope
953
- :param key: secret key
954
- """
955
- try:
956
- secret = db_utils().secrets.get(scope=scope, key=key)
957
- return secret
958
- except EnvironmentError:
959
- raise(EnvironmentError('Cannot get secret when not running in databricks!'))
960
-
961
-
962
- def create_download_link(dbfs_path, file_name='file'):
963
- """
964
- accepts path to dbfs file, and creates a download link to the file in the notebook
965
- the function only works with files saved in /dbfs/FileStore
966
- :type dbfs_path: string
967
- :param dbfs_path: path to dbfs path (accepts either spark API or file API format)
968
- :type file_name: string
969
- :param file_name: name of file (this will be the name of the link created)
970
- """
971
- if is_databricks():
972
- if not ('/dbfs/FileStore' in dbfs_path or 'dbfs:/FileStore' in dbfs_path):
973
- raise ValueError('the path must start with /dbfs/FileStore or dbfs:/FileStore!')
974
- dbfs_path = dbfs_path.replace('/dbfs/FileStore', '/files')
975
- dbfs_path = dbfs_path.replace('dbfs:/FileStore', '/files')
976
- display_html(f"""\n<a href="{dbfs_path}" download>Download {file_name} </a>""")
977
- debug_print(f"""File available at {dbfs_path}""")
978
-
979
- def cdf_dataframe(dataset, param, dataset_name, groupby=None, comp=False):
980
- """
981
- function accepts dataframe as input the function returns dataframe of CDF
982
- complementary cumulative distribution function (of tag count) according to param,
983
- and labels it by dataset_name
984
- :type dataset: pandas DataFrame
985
- :param dataset: dataset to create CDF
986
- :type param: string
987
- :param param: parameter to create the CDF
988
- :type dataset_name: string
989
- :param dataset_name: name to add to the result
990
- :type groupby: str
991
- :param groupby: added parameter (in dataset) to group results by
992
- :type comp: bool
993
- :param comp: whether to create CDF (comp = False) or CCDF (comp = True)
994
- """
995
- curveset = pd.DataFrame(columns=[param, 'TagCount', groupby])
996
- dataset = dataset.sort_values(param, ascending=True)
997
- if groupby is None:
998
- iterations = [None]
999
- else:
1000
- iterations = dataset[groupby].unique()
1001
- for group in iterations:
1002
- group_curveset = pd.DataFrame(columns=[param, groupby])
1003
- if group is not None:
1004
- group_dataset = dataset[dataset[groupby] == group]
1005
- else:
1006
- group_dataset = dataset
1007
- values = group_dataset[param].unique()
1008
- group_curveset[param] = values
1009
- if group is not None:
1010
- group_curveset[groupby] = group
1011
- for index, value in group_curveset[param].items():
1012
- biggerthan = group_dataset[group_dataset[param] > value][param].count()
1013
- smallerorequals = group_dataset[group_dataset[param] <= value][param].count()
1014
- if comp:
1015
- group_curveset.at[index, 'TagCount'] = biggerthan
1016
- else:
1017
- group_curveset.at[index, 'TagCount'] = smallerorequals
1018
- if comp:
1019
- group_curveset['testName'] = dataset_name + '_CCDF'
1020
- else:
1021
- group_curveset['testName'] = dataset_name + '_CDF'
1022
- curveset = pd.concat([curveset, group_curveset])
1023
-
1024
- return curveset
1025
-
1026
- def multi_plot_dataframe(dataset, param, dataset_name, graph_type, tags_physical_param, groupby=None, comp=False):
1027
-
1028
- """
1029
- function accepts dataframe as input the function returns dataframe of CDF
1030
- complementary cumulative distribution function (of tag count) according to param,
1031
- and labels it by dataset_name
1032
- :type dataset: pandas DataFrame
1033
- :param dataset: dataset to create CDF
1034
- :type param: string
1035
- :param param: parameter to create the CDF
1036
- :type dataset_name: string
1037
- :param dataset_name: name to add to the result
1038
- :type groupby: str
1039
- :param groupby: added parameter (in dataset) to group results by
1040
- :type comp: bool
1041
- :param comp: whether to create CDF (comp = False) or CCDF (comp = True)
1042
- """
1043
- curveset = pd.DataFrame(columns=[param, 'TagCount', groupby])
1044
- dataset = dataset.sort_values(param, ascending=True)
1045
- if groupby is None:
1046
- iterations = [None]
1047
- else:
1048
- debug_print(f'dataset is: {dataset}')
1049
- debug_print(f'dataset[groupby] is: {dataset[groupby]}')
1050
- iterations = dataset[groupby].unique()
1051
- for group in iterations:
1052
- columns_group_curvset = [param, groupby, 'testId'] + tags_physical_param
1053
- group_curveset = pd.DataFrame(columns=columns_group_curvset)
1054
- if group is not None:
1055
- group_dataset = dataset[dataset[groupby] == group]
1056
- else:
1057
- group_dataset = dataset
1058
-
1059
- group_curveset[[param, 'testId'] + tags_physical_param] = group_dataset[[param, 'testId'] + tags_physical_param]
1060
-
1061
- if group is not None:
1062
- group_curveset[groupby] = group
1063
- for index, value in group_curveset[param].items():
1064
- biggerthan = group_dataset[group_dataset[param] > value][param].count()
1065
- smallerorequals = group_dataset[group_dataset[param] <= value][param].count()
1066
- if comp:
1067
- group_curveset.at[index, 'TagCount'] = biggerthan
1068
- else:
1069
- group_curveset.at[index, 'TagCount'] = smallerorequals
1070
-
1071
- if graph_type == 'overall_analysis':
1072
- curveset_to_plot = group_curveset
1073
-
1074
- if graph_type == 'location_analysis':
1075
- location_curvset = group_curveset.groupby(['testId', 'location']).apply(sort_and_add_index, param=param, comp=comp)
1076
- location_curvset.reset_index(drop=True, inplace=True)
1077
- curveset_to_plot = location_curvset
1078
-
1079
- if graph_type == 'position_analysis':
1080
- position_curveset = group_curveset
1081
- position_curveset['index'] = group_curveset.groupby(['surface', 'orientation', 'testId'])[param].rank(method='first').astype(int)
1082
- curveset_to_plot = position_curveset
1083
-
1084
- if comp:
1085
- curveset_to_plot['testName'] = dataset_name + '_CCDF'
1086
- else:
1087
- curveset_to_plot['testName'] = dataset_name + '_CDF'
1088
- curveset = pd.concat([curveset, curveset_to_plot])
1089
-
1090
- return curveset
1091
-
1092
- def sort_and_add_index(group, param, comp=False):
1093
- group['index'] = range(1, len(group) + 1)
1094
- return group.sort_values(param, ascending=not comp)
1095
-
1096
- def powerset(iterable):
1097
- s = list(iterable)
1098
- return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
1099
-
1100
- def create_zip(dir_path):
1101
- """
1102
- gets path to FileStore directory and creates a download link to a zip file of the files in given directory
1103
- zip file will be saved inside the dir_path directory
1104
- :type dir_path: string
1105
- :param dir_path: path to FileStore directory to zip
1106
- """
1107
- if dir_path[-1] == '/':
1108
- dir_path = dir_path[:-1]
1109
- dir_name = dir_path.rsplit('/', 1)[-1]
1110
- if file_exists(dir_path + '/' + dir_name + '.zip'):
1111
- db_utils().fs.rm(dir_path.replace('/dbfs', 'dbfs:') + '/' + dir_name + '.zip')
1112
- zip_tmp_path = shutil.make_archive('/tmp/zips/tmpzip', 'zip', dir_path)
1113
- zip_download_path = dir_path.replace('/dbfs', 'dbfs:') + '/' + dir_name + '.zip'
1114
- db_utils().fs.mv(f'file:{zip_tmp_path}', zip_download_path)
1115
- create_download_link(zip_download_path, f'{dir_name}.zip')
1116
-
1117
- def exit_notebook(message):
1118
- """
1119
- closes notebook and prints out error message
1120
- :type message: str
1121
- :param message: error message
1122
- """
1123
- debug_print(message, center=True)
1124
- try:
1125
- db_utils().notebook.exit(message)
1126
- except EnvironmentError as e:
1127
- raise Exception(message)
1128
-
1129
- def save_df(df, path, name=None, withindex=False, silent=False):
1130
- """
1131
- saves DataFrame to path, displays the DataFrame and creates a download link
1132
- :type df: pandas Dataframe
1133
- :param df: dataframe to save
1134
- :type path: str
1135
- :param path: path to save can be entered either as filename to save or directory to save in
1136
- :type name: str
1137
- :param name: name of dataframe (this displays in the download link and as a header before the link
1138
- :type withindex: bool
1139
- :param withindex: flag to choose if to export the dataframe with/without index
1140
- :type silent: bool
1141
- :param silent: if true does not generate HTML link
1142
- """
1143
- # save as dataframe.csv if no filename given
1144
- if path[-1] == '/':
1145
- path = path + 'dataframe'
1146
- if path[-4:] != '.csv':
1147
- path = path + '.csv'
1148
- if name is None:
1149
- name = path.split()[-1]
1150
- if is_databricks():
1151
- path = path.replace(' ', '_')
1152
- debug_print(f'Saving DataFrame at {path}')
1153
- if not is_databricks():
1154
- df.to_csv(path, index=withindex)
1155
- return True
1156
- try:
1157
- df.to_csv(path, index=withindex)
1158
- except OSError:
1159
- mkdirs_path = path.replace('/dbfs', 'dbfs:').rsplit('/', 1)[0]
1160
- db_utils().fs.mkdirs(mkdirs_path)
1161
- df.to_csv(path, index=withindex)
1162
- if not silent:
1163
- if is_databricks():
1164
- display_html(f'<h1>{name}</h1>')
1165
- create_download_link(dbfs_path=path, file_name=name)
1166
- display_print(pd.read_csv(path), headers="keys")
1167
- else:
1168
- debug_print(f'{name} available at {path}')
1169
-
1170
23
  def initialize_logger(working_directory=None):
1171
24
  """
1172
25
  initializes the logger to print to log and to logfile, which by default is named by the current timestamp (ms)
@@ -1187,269 +40,21 @@ def initialize_logger(working_directory=None):
1187
40
  except Exception:
1188
41
  pass
1189
42
  logger_filename = int(current_timestamp())
1190
- if is_databricks():
1191
- db_utils().fs.put(f'file:/databricks/driver/{logger_filename}.log', '', overwrite=True)
1192
- logging.getLogger("py4j.java_gateway").setLevel(logging.ERROR)
1193
- logging.basicConfig(level=logging.DEBUG,
1194
- format='%(asctime)s | %(levelname)s | %(message)s',
1195
- handlers=[
1196
- logging.FileHandler(f'{logger_filename}.log', 'a'),
1197
- # logging.handlers.RotatingFileHandler
1198
- logging.StreamHandler()
1199
- ], force=True)
1200
- logging.getLogger().handlers[0].setLevel(logging.DEBUG)
1201
- logging.getLogger().handlers[1].setLevel(logging.INFO)
1202
- debug_print(f'logger initialized at {logger_filename}', center=True)
1203
- debug_print(f'logfile located at file:/databricks/driver/{logger_filename}.log')
1204
- else:
1205
- if working_directory is None:
1206
- working_directory = os.path.join(user_data_dir(), 'wiliot', 'deployment_tools')
1207
- if not os.path.exists(working_directory):
1208
- os.makedirs(working_directory)
1209
- logging.basicConfig(level=logging.DEBUG,
1210
- format='%(asctime)s | %(levelname)s | %(message)s',
1211
- handlers=[
1212
- logging.FileHandler(f'{working_directory}/{logger_filename}.log', 'a'),
1213
- # logging.handlers.RotatingFileHandler
1214
- logging.StreamHandler()
1215
- ])
1216
- # filter stream to show info and up
1217
- logging.getLogger().handlers[0].setLevel(logging.DEBUG)
1218
- logging.getLogger().handlers[1].setLevel(logging.INFO)
1219
- debug_print(f'logger initialized at {logger_filename}', center=True)
1220
- debug_print(f'logfile located at {working_directory}/{logger_filename}.log')
43
+ if working_directory is None:
44
+ working_directory = os.path.join(user_data_dir(), 'wiliot', 'deployment_tools')
45
+ if not os.path.exists(working_directory):
46
+ os.makedirs(working_directory)
47
+ logging.basicConfig(level=logging.DEBUG,
48
+ format='%(asctime)s | %(levelname)s | %(message)s',
49
+ handlers=[
50
+ logging.FileHandler(f'{working_directory}/{logger_filename}.log', 'a'),
51
+ # logging.handlers.RotatingFileHandler
52
+ logging.StreamHandler()
53
+ ])
54
+ # filter stream to show info and up
55
+ logging.getLogger().handlers[0].setLevel(logging.DEBUG)
56
+ logging.getLogger().handlers[1].setLevel(logging.INFO)
57
+ debug_print(f'logger initialized at {logger_filename}', center=True)
58
+ debug_print(f'logfile located at {working_directory}/{logger_filename}.log')
1221
59
  logging.getLogger().setLevel(logging.DEBUG)
1222
60
  return logger_filename
1223
-
1224
- def create_logfile(logger_filename, working_directory, copy_filename):
1225
- """
1226
- function copies the logfile for recent run to the test working directory and creates link for download
1227
- :type logger_filename: str
1228
- :param logger_filename: filename of log file
1229
- :type working_directory: str
1230
- :param working_directory: directory to copy logfile to
1231
- :type copy_filename: str
1232
- :param copy_filename: name of logfile when copied
1233
- """
1234
- if not is_databricks():
1235
- debug_print('Cannot create logfile, logfile available already!')
1236
- debug_print('Creating Logfile...', center=True)
1237
- debug_print(f'cp {logger_filename}.log {copy_filename}.log')
1238
- subprocess.run(f'cp {logger_filename}.log {copy_filename}.log', shell=True)
1239
- copy_directory = working_directory.replace('/dbfs', 'dbfs:')
1240
- debug_print(f'copy directory {copy_directory}')
1241
- db_utils().fs.mkdirs(f'{copy_directory}')
1242
- debug_print(f'mkdirs {copy_directory}')
1243
- db_utils().fs.cp(f'file:/databricks/driver/{copy_filename}.log', copy_directory)
1244
- debug_print(f'cp file:/databricks/driver/{copy_filename}.log, copy_directory')
1245
- create_download_link(working_directory + f'{copy_filename}.log', f'{copy_filename}.log')
1246
- debug_print(f"create download link - {working_directory} + {copy_filename}.log")
1247
-
1248
- def get_packet_table_name(owner, env, platform=False, is_enriched=False):
1249
- """
1250
- function gets ownerId and environment and return the name of the packet table (in databricks)
1251
- :type owner: str
1252
- :param owner: ownerId
1253
- :type env: str
1254
- :param env: wiliot environment (prod/test/dev)
1255
- :type platform: bool
1256
- :param platform: wiliot platform
1257
- :rtype: str
1258
- :return: data table name
1259
- """
1260
- env = 'prod' if env is None else env
1261
- data_table = ''
1262
- if is_enriched:
1263
- data_table = owner + '' + f'_enriched_packets_data_{env}'
1264
- else:
1265
- data_table = owner + '' + f'_packet_data_{env}'
1266
- data_table = '_' + data_table
1267
- return data_table
1268
-
1269
- def get_event_table_name(owner, env=None, platform=False):
1270
- """
1271
- function gets ownerId and environment and return the name of the event table (in databricks)
1272
- :type owner: str
1273
- :param owner: ownerId
1274
- :type env: str
1275
- :param env: wiliot environment (prod/test/dev)
1276
- :type platform: bool
1277
- :param platform: wiliot platform
1278
- :rtype: str
1279
- :return: data table name
1280
- """
1281
- env = 'prod' if env is None else env
1282
- if not platform:
1283
- event_table = owner + '' + f'_event_data_{env}'
1284
- else:
1285
- event_table = owner + '' + f'_assets_metrics_data_{env}'
1286
- if owner.isnumeric():
1287
- event_table = '_' + event_table
1288
- return event_table
1289
-
1290
- def get_heartbeat_table_name(env=None):
1291
- """
1292
- function gets environment and returns the name of the heartbeat table (in databricks)
1293
- :type owner: str
1294
- :param owner: ownerId
1295
- :type env: str
1296
- :param env: wiliot environment (prod/test/dev)
1297
- :rtype: str
1298
- :return: hearbeat table name
1299
- """
1300
- env = 'prod' if env is None else env
1301
- hearbeat_table = f'_network_heartbeat_statistics_{env}'
1302
- return hearbeat_table
1303
-
1304
- def get_statistics_table_name(env=None):
1305
- """
1306
- function gets environment and returns the name of the heartbeat table (in databricks)
1307
- :type owner: str
1308
- :param owner: ownerId
1309
- :type env: str
1310
- :param env: wiliot environment (prod/test/dev)
1311
- :rtype: str
1312
- :return: hearbeat table name
1313
- """
1314
- env = 'prod' if env is None else env
1315
- hearbeat_table = f'_network_data_statistics_{env}'
1316
- return hearbeat_table
1317
-
1318
- def get_configuration_table_name(owner, env=None):
1319
- """
1320
- function gets ownerId and environment and return the name of the configuration table (in databricks)
1321
- :type owner: str
1322
- :param owner: ownerId
1323
- :type env: str
1324
- :param env: wiliot environment (prod/test/dev)
1325
- :rtype: str
1326
- :return: configuration table name
1327
- """
1328
- env = 'prod' if env is None else env
1329
- configuration_table = owner + '' + f'_network_configuration_{env}'
1330
- if owner.isnumeric():
1331
- configuration_table = '_' + configuration_table
1332
- return configuration_table
1333
-
1334
- def process_graph(fig, graph_name, display_graph=True, save_graph=False, directory=None, pass_as_is=False):
1335
- """
1336
- function processes graph - displays and saves the graph (according to entered flags)
1337
- :type fig: plotly Figure
1338
- :param fig: graph to display
1339
- :type graph_name: str
1340
- :param graph_name: name of graph, and the filename by which it will be saved
1341
- :type display_graph: bool
1342
- :param display_graph: flag to choose if to display the graph in DataBricks Notebook
1343
- :type save_graph: bool
1344
- :param save_graph: flag to choose if to save the graph
1345
- :type directory: str
1346
- :param directory: directory to save graph
1347
- :type pass_as_is: bool
1348
- :param pass_as_is: if true the fig entered is already a plot
1349
- """
1350
- debug_print(f'Processing Graph - {graph_name}', center=True)
1351
- if save_graph:
1352
- if directory is None:
1353
- exit_notebook('Need to supply directory to save graph!')
1354
- if directory[-1] != '/':
1355
- directory = directory + '/'
1356
- filepath = f"""{directory}{graph_name}.html"""
1357
- Path(directory).mkdir(parents=True, exist_ok=True)
1358
- open(filepath, 'a').close()
1359
- fig.write_html(filepath)
1360
- create_download_link(filepath, file_name=graph_name)
1361
- if display_graph:
1362
- if is_databricks():
1363
- if not pass_as_is:
1364
- fig = plot(fig, output_type='div')
1365
- display_html(fig)
1366
- else:
1367
- fig.show(renderer="browser")
1368
-
1369
- def get_spark():
1370
- try:
1371
- from pyspark.sql import SparkSession
1372
- return SparkSession.builder.getOrCreate()
1373
- except NameError:
1374
- raise NameError('Spark is not installed!')
1375
-
1376
- def flag_bugged_tags(rawdata_df):
1377
- """
1378
- function gets raw data dataframe, adds 'isBugged' column
1379
- 'isBugged' is true for each packet of a bugged tag, false otherwise
1380
- :type rawdata_df: pandas DataFrame
1381
- :param rawdata_df: raw packet data
1382
- """
1383
- rawdata_df['isBugged'] = None
1384
- unique_tags = rawdata_df['externalId'].unique()
1385
- for tag in unique_tags:
1386
- tmp_df = rawdata_df.loc[rawdata_df['externalId'] == tag]
1387
- tmp_df = tmp_df[['packet_counter', 'timestamp']]
1388
- tmp_df = tmp_df.sort_values(by=['timestamp'])
1389
- prev_packet_counter = 0
1390
- cycles = 0
1391
- orig_cycles = tmp_df['packet_counter'].unique()
1392
- for timestamp in tmp_df['timestamp'].unique():
1393
- # TODO - see when this function throws error
1394
- try:
1395
- packet_counter = tmp_df.loc[tmp_df['timestamp'] == timestamp, 'packet_counter'].unique().item()
1396
- except Exception:
1397
- continue
1398
- if packet_counter + 256 * cycles < prev_packet_counter:
1399
- cycles = cycles + 1
1400
- prev_packet_counter = packet_counter + 256 * cycles
1401
- tmp_df.loc[tmp_df['timestamp'] == timestamp, 'packet_counter'] = prev_packet_counter
1402
- tmp_df2 = tmp_df.diff(axis=0)
1403
- tmp_df2['rate'] = (1000 * tmp_df2['packet_counter'] / tmp_df2['timestamp'])
1404
- max_rate = tmp_df2['rate'].max()
1405
- is_bugged = False
1406
- if max_rate > 6:
1407
- is_bugged = True
1408
- debug_print(f'{tag} is bugged! Flagging packets')
1409
- rawdata_df.loc[rawdata_df['externalId'] == tag, 'isBugged'] = is_bugged
1410
-
1411
-
1412
- def parse_commastring(string):
1413
- """
1414
- parse string with comma or comma+space seperated values to list
1415
- :type string: str
1416
- :param string: input
1417
- :rtype: list
1418
- :return: list of values
1419
- """
1420
- if type(string) == float:
1421
- try:
1422
- if np.isnan(string):
1423
- return None
1424
- except Exception as e:
1425
- pass
1426
- if type(string) == list:
1427
- return string
1428
- if string is None:
1429
- return list()
1430
- cmd_list = ''.join(string.split()).split(',')
1431
- return cmd_list
1432
-
1433
-
1434
- def parse_commastring_array(array):
1435
- """
1436
- parses Pandas array (DataFrame column) to list of all unique values
1437
- :type array: Pandas array
1438
- :param array: array
1439
- :rtype: list
1440
- :return: list of unique values
1441
- """
1442
- if len(array) == 1:
1443
- return parse_commastring(array[0])
1444
- result = list()
1445
- for item in array:
1446
- item_values = parse_commastring(item)
1447
- if len(item_values) == 1:
1448
- item_values
1449
- if item_values[0] not in result:
1450
- result.extend(item_values)
1451
- else:
1452
- for value in item_values:
1453
- if value not in result:
1454
- result.extend([value])
1455
- return result