teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -28,21 +28,22 @@ def splitter(strr, delim=",", convert_to="str"):
28
28
  return [convert_to_type(i, convert_to) for i in strr.split(delim)]
29
29
 
30
30
  # Arguments to the Script.
31
- if len(sys.argv) != 4:
32
- # 4 arguments command line arguments should be passed to this file.
31
+ if len(sys.argv) != 3:
32
+ # 3 command line arguments should be passed to this file.
33
33
  # 1: file to be run
34
- # 2. Comma separated indices of partition columns.
35
- # 3. Comma separated types of all the data columns.
36
- # 4. Data columns information separted by "--" where each data column information is in the form
37
- # "<arg_name>-<comma separated data indices>-<comma separated data types>".
38
- sys.exit("4 arguments command line arguments should be passed: file to be run,"
39
- " comma separated indices of partition columns, comma separated types of all columns,"
40
- " data columns information separated by '--' where each data column information is"
41
- " in the form '<arg_name>-<comma separated data indices>-<comma separated data types>'.")
42
-
43
- db = sys.argv[0].split("/")[1]
44
- data_partition_column_indices = splitter(sys.argv[1], convert_to="int") # indices are integers.
45
- data_column_types = splitter(sys.argv[2], delim="--")
34
+ # 2. Model file prefix for lake system, None otherwise.
35
+ # 3. Flag to check the system type. True, means Lake, Enterprise otherwise.
36
+ sys.exit("3 arguments command line arguments should be passed: file to be run,"
37
+ " model file prefix used only for lake system and flag to check lake or enterprise.")
38
+
39
+ is_lake_system = eval(sys.argv[2])
40
+ if not is_lake_system:
41
+ db = sys.argv[0].split("/")[1]
42
+ else:
43
+ model_file_prefix = sys.argv[1]
44
+
45
+ data_partition_column_indices = <partition_cols_indices>
46
+ data_column_types = <types_of_data_cols>
46
47
 
47
48
  data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
48
49
 
@@ -52,7 +53,8 @@ data_args_indices_types = OrderedDict()
52
53
  # Data related arguments values - prepare dictionary and populate data later.
53
54
  data_args_values = {}
54
55
 
55
- for data_arg in sys.argv[3].split("--"):
56
+ data_args_info_str = <data_args_info_str>
57
+ for data_arg in data_args_info_str.split("--"):
56
58
  arg_name, indices, types = data_arg.split("-")
57
59
  indices = splitter(indices, convert_to="int")
58
60
  types = splitter(types)
@@ -79,6 +81,11 @@ while 1:
79
81
  data_partition_column_values.append(
80
82
  convert_to_type(values[val], typee=data_partition_column_types[i])
81
83
  )
84
+
85
+ # Prepare the corresponding model file name and extract model.
86
+ partition_join = "_".join([str(x) for x in data_partition_column_values])
87
+ # Replace '-' with '_' as '-' because partition_columns can be negative.
88
+ partition_join = partition_join.replace("-", "_")
82
89
 
83
90
  # Prepare data dictionary containing only arguments related to data.
84
91
  for arg_name in data_args_values:
@@ -105,4 +112,15 @@ all_args = {**data_args_values, **params}
105
112
  module_ = importlib.import_module(module_name)
106
113
  sklearn_model = getattr(module_, func_name)(**all_args)
107
114
 
108
- print(*(data_partition_column_values + [base64.b64encode(pickle.dumps(sklearn_model))]), sep=DELIMITER)
115
+ model_str = pickle.dumps(sklearn_model)
116
+
117
+ if is_lake_system:
118
+ model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
119
+
120
+ # Write to file in Vantage, to be used in predict/scoring.
121
+ with open(model_file_path, "wb") as fp:
122
+ fp.write(model_str)
123
+
124
+ model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
125
+
126
+ print(*(data_partition_column_values + [model_data]), sep=DELIMITER)
@@ -3,146 +3,164 @@ import math
3
3
  import sys
4
4
  import numpy as np
5
5
  import base64
6
+ from contextlib import contextmanager
7
+ import os
6
8
 
7
9
  DELIMITER = '\t'
8
10
 
9
- def get_values_list(values, types):
10
- ret_vals = []
11
- for i, val in enumerate(values):
12
- ret_vals.append(convert_to_type(val, types[i]))
13
- return ret_vals
14
-
15
- def convert_to_type(val, typee):
16
- if typee == 'int':
17
- return int(val) if val != "" else np.nan
18
- if typee == 'float':
19
- if isinstance(val, str):
20
- val = val.replace(' ', '')
21
- return float(val) if val != "" else np.nan
22
- if typee == 'bool':
23
- return eval(val) if val != "" else None
24
- return str(val) if val != "" else None
25
-
26
- def splitter(strr, delim=",", convert_to="str"):
11
+ @contextmanager
12
+ def suppress_stderr():
27
13
  """
28
- Split the string based on delimiter and convert to the type specified.
14
+ Function to suppress the warnings(lake systems treats warnings as errors).
29
15
  """
30
- if strr == "None":
31
- return []
32
- return [convert_to_type(i, convert_to) for i in strr.split(delim)]
33
-
34
-
35
- # Arguments to the Script
36
- if len(sys.argv) != 9:
37
- # 9 arguments command line arguments should be passed to this file.
38
- # 1: file to be run
39
- # 2. function name
40
- # 3. No of feature columns.
41
- # 4. No of class labels.
42
- # 5. No of group columns.
43
- # 6. Comma separated indices of partition columns.
44
- # 7. Comma separated types of all the data columns.
45
- # 8. Model file prefix to generated model file using partition columns.
46
- # 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
47
- sys.exit("9 arguments command line arguments should be passed: file to be run,"
48
- " function name, no of feature columns, no of class labels, no of group columns,"
49
- " comma separated indices of partition columns, comma separated types of all columns,"
50
- " model file prefix to generated model file using partition columns and flag to check"
51
- " lake or enterprise.")
52
-
53
-
54
- is_lake_system = eval(sys.argv[8])
55
- if not is_lake_system:
56
- db = sys.argv[0].split("/")[1]
57
- function_name = sys.argv[1]
58
- n_f_cols = int(sys.argv[2])
59
- n_c_labels = int(sys.argv[3])
60
- n_g_cols = int(sys.argv[4])
61
- data_column_types = splitter(sys.argv[6], delim="--")
62
- data_partition_column_indices = splitter(sys.argv[5], convert_to="int") # indices are integers.
63
- model_file_prefix = sys.argv[7]
64
-
65
- data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
66
-
67
- model = None
68
- data_partition_column_values = []
69
-
70
- # Data Format (n_features, k_labels, one data_partition_column):
71
- # feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
72
- # data_partition_columnn.
73
- # labels are optional.
74
-
75
- features = []
76
- labels = []
77
- groups = []
78
- while 1:
79
- try:
80
- line = input()
81
- if line == '': # Exit if user provides blank line
16
+ with open(os.devnull, "w") as devnull:
17
+ old_stderr = sys.stderr
18
+ sys.stderr = devnull
19
+ try:
20
+ yield
21
+ finally:
22
+ sys.stderr = old_stderr
23
+
24
+ ## On Lake system warnings raised by script are treated as a errors.
25
+ ## Hence, to suppress it putting the under suppress_stderr().
26
+ with suppress_stderr():
27
+ def get_values_list(values, types):
28
+ ret_vals = []
29
+ for i, val in enumerate(values):
30
+ ret_vals.append(convert_to_type(val, types[i]))
31
+ return ret_vals
32
+
33
+ def convert_to_type(val, typee):
34
+ if typee == 'int':
35
+ return int(val) if val != "" else np.nan
36
+ if typee == 'float':
37
+ if isinstance(val, str):
38
+ val = val.replace(' ', '')
39
+ return float(val) if val != "" else np.nan
40
+ if typee == 'bool':
41
+ return eval(val) if val != "" else None
42
+ return str(val) if val != "" else None
43
+
44
+ def splitter(strr, delim=",", convert_to="str"):
45
+ """
46
+ Split the string based on delimiter and convert to the type specified.
47
+ """
48
+ if strr == "None":
49
+ return []
50
+ return [convert_to_type(i, convert_to) for i in strr.split(delim)]
51
+
52
+
53
+ # Arguments to the Script
54
+ if len(sys.argv) != 9:
55
+ # 9 arguments command line arguments should be passed to this file.
56
+ # 1: file to be run
57
+ # 2. function name
58
+ # 3. No of feature columns.
59
+ # 4. No of class labels.
60
+ # 5. No of group columns.
61
+ # 6. Comma separated indices of partition columns.
62
+ # 7. Comma separated types of all the data columns.
63
+ # 8. Model file prefix to generated model file using partition columns.
64
+ # 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
65
+ sys.exit("9 arguments command line arguments should be passed: file to be run,"
66
+ " function name, no of feature columns, no of class labels, no of group columns,"
67
+ " comma separated indices of partition columns, comma separated types of all columns,"
68
+ " model file prefix to generated model file using partition columns and flag to check"
69
+ " lake or enterprise.")
70
+
71
+
72
+ is_lake_system = eval(sys.argv[8])
73
+ if not is_lake_system:
74
+ db = sys.argv[0].split("/")[1]
75
+ function_name = sys.argv[1]
76
+ n_f_cols = int(sys.argv[2])
77
+ n_c_labels = int(sys.argv[3])
78
+ n_g_cols = int(sys.argv[4])
79
+ data_column_types = splitter(sys.argv[6], delim="--")
80
+ data_partition_column_indices = splitter(sys.argv[5], convert_to="int") # indices are integers.
81
+ model_file_prefix = sys.argv[7]
82
+
83
+ data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
84
+
85
+ model = None
86
+ data_partition_column_values = []
87
+
88
+ # Data Format (n_features, k_labels, one data_partition_column):
89
+ # feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
90
+ # data_partition_columnn.
91
+ # labels are optional.
92
+
93
+ features = []
94
+ labels = []
95
+ groups = []
96
+ while 1:
97
+ try:
98
+ line = input()
99
+ if line == '': # Exit if user provides blank line
100
+ break
101
+ else:
102
+ values = line.split(DELIMITER)
103
+ values = get_values_list(values, data_column_types)
104
+ if not data_partition_column_values:
105
+ # Partition column values is same for all rows. Hence, only read once.
106
+ for i, val in enumerate(data_partition_column_indices):
107
+ data_partition_column_values.append(
108
+ convert_to_type(values[val], typee=data_partition_column_types[i])
109
+ )
110
+
111
+ # Prepare the corresponding model file name and extract model.
112
+ partition_join = "_".join([str(x) for x in data_partition_column_values])
113
+ # Replace '-' with '_' as '-' because partition_columns can be negative.
114
+ partition_join = partition_join.replace("-", "_")
115
+
116
+ model_file_path = f"{model_file_prefix}_{partition_join}" \
117
+ if is_lake_system else \
118
+ f"./{db}/{model_file_prefix}_{partition_join}"
119
+
120
+ with open(model_file_path, "rb") as fp:
121
+ model = pickle.loads(fp.read())
122
+
123
+ if not model:
124
+ sys.exit("Model file is not installed in Vantage.")
125
+
126
+ start = 0
127
+ if n_f_cols > 0:
128
+ features.append(values[:n_f_cols])
129
+ start = start + n_f_cols
130
+ if n_c_labels > 0:
131
+ labels.append(values[start:(start+n_c_labels)])
132
+ start = start + n_c_labels
133
+ if n_g_cols > 0:
134
+ groups.append(values[start:(start+n_g_cols)])
135
+
136
+ except EOFError: # Exit if reached EOF or CTRL-D
82
137
  break
83
- else:
84
- values = line.split(DELIMITER)
85
- values = get_values_list(values, data_column_types)
86
- if not data_partition_column_values:
87
- # Partition column values is same for all rows. Hence, only read once.
88
- for i, val in enumerate(data_partition_column_indices):
89
- data_partition_column_values.append(
90
- convert_to_type(values[val], typee=data_partition_column_types[i])
91
- )
92
-
93
- # Prepare the corresponding model file name and extract model.
94
- partition_join = "_".join([str(x) for x in data_partition_column_values])
95
- # Replace '-' with '_' as '-' because partition_columns can be negative.
96
- partition_join = partition_join.replace("-", "_")
97
-
98
- model_file_path = f"{model_file_prefix}_{partition_join}" \
99
- if is_lake_system else \
100
- f"./{db}/{model_file_prefix}_{partition_join}"
101
-
102
- with open(model_file_path, "rb") as fp:
103
- model = pickle.loads(fp.read())
104
-
105
- if not model:
106
- sys.exit("Model file is not installed in Vantage.")
107
-
108
- start = 0
109
- if n_f_cols > 0:
110
- features.append(values[:n_f_cols])
111
- start = start + n_f_cols
112
- if n_c_labels > 0:
113
- labels.append(values[start:(start+n_c_labels)])
114
- start = start + n_c_labels
115
- if n_g_cols > 0:
116
- groups.append(values[start:(start+n_g_cols)])
117
-
118
- except EOFError: # Exit if reached EOF or CTRL-D
119
- break
120
-
121
- if len(features) == 0:
122
- sys.exit(0)
123
-
124
- features = np.array(features) if len(features) > 0 else None
125
- labels = np.array(labels).flatten() if len(labels) > 0 else None
126
- groups = np.array(groups).flatten() if len(groups) > 0 else None
127
-
128
- if function_name == "split":
129
- # Printing both train and test data instead of just indices unlike sklearn.
130
- # Generator is created based on split_id and type of split (train/test) in client.
131
- split_id = 1
132
- for train_idx, test_idx in model.split(features, labels, groups):
133
- X_train, X_test = features[train_idx], features[test_idx]
134
- y_train, y_test = labels[train_idx], labels[test_idx]
135
- for X, y in zip(X_train, y_train):
136
- print(*(data_partition_column_values + [split_id, "train"] +
137
- ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
138
- for val in X] + [y]
139
- ), sep=DELIMITER)
140
- for X, y in zip(X_test, y_test):
141
- print(*(data_partition_column_values + [split_id, "test"] +
142
- ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
143
- for val in X] + [y]
144
- ), sep=DELIMITER)
145
- split_id += 1
146
- else:
147
- val = getattr(model, function_name)(features, labels, groups)
148
- print(*(data_partition_column_values + [val]), sep=DELIMITER)
138
+
139
+ if len(features) == 0:
140
+ sys.exit(0)
141
+
142
+ features = np.array(features) if len(features) > 0 else None
143
+ labels = np.array(labels).flatten() if len(labels) > 0 else None
144
+ groups = np.array(groups).flatten() if len(groups) > 0 else None
145
+
146
+ if function_name == "split":
147
+ # Printing both train and test data instead of just indices unlike sklearn.
148
+ # Generator is created based on split_id and type of split (train/test) in client.
149
+ split_id = 1
150
+ for train_idx, test_idx in model.split(features, labels, groups):
151
+ X_train, X_test = features[train_idx], features[test_idx]
152
+ y_train, y_test = labels[train_idx], labels[test_idx]
153
+ for X, y in zip(X_train, y_train):
154
+ print(*(data_partition_column_values + [split_id, "train"] +
155
+ ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
156
+ for val in X] + [y]
157
+ ), sep=DELIMITER)
158
+ for X, y in zip(X_test, y_test):
159
+ print(*(data_partition_column_values + [split_id, "test"] +
160
+ ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
161
+ for val in X] + [y]
162
+ ), sep=DELIMITER)
163
+ split_id += 1
164
+ else:
165
+ val = getattr(model, function_name)(features, labels, groups)
166
+ print(*(data_partition_column_values + [val]), sep=DELIMITER)
@@ -116,7 +116,7 @@ while 1:
116
116
 
117
117
  f_ = values[:n_f_cols]
118
118
  if f_:
119
- output = getattr(model, func_name)(np.array([f_]), **arguments)
119
+ output = getattr(model, func_name)([f_], **arguments)
120
120
  else:
121
121
  output = getattr(model, func_name)(**arguments)
122
122
  result_list = f_
@@ -110,10 +110,19 @@ while 1:
110
110
  if len(features) == 0:
111
111
  sys.exit(0)
112
112
 
113
+
114
+ model_name = model.__class__.__name__
115
+ np_func_list = ["MultiOutputClassifier", "GaussianMixture"]
116
+
117
+ if model_name in np_func_list:
118
+ features = np.array(features)
119
+
113
120
  if labels:
114
- val = getattr(model, func_name)(np.array(features), np.array(labels))
121
+ if model_name in np_func_list:
122
+ labels = np.array(labels)
123
+ val = getattr(model, func_name)(features, labels)
115
124
  else:
116
- val = getattr(model, func_name)(np.array(features))
125
+ val = getattr(model, func_name)(features)
117
126
 
118
127
  result_val = ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val]
119
- print(*(data_partition_column_values + result_val), sep=DELIMITER)
128
+ print(*(data_partition_column_values + result_val), sep=DELIMITER)