teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (200) hide show
  1. teradataml/LICENSE.pdf +0 -0
  2. teradataml/README.md +112 -0
  3. teradataml/__init__.py +6 -3
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/__init__.py +3 -2
  6. teradataml/analytics/analytic_function_executor.py +224 -16
  7. teradataml/analytics/analytic_query_generator.py +92 -0
  8. teradataml/analytics/byom/__init__.py +3 -2
  9. teradataml/analytics/json_parser/metadata.py +1 -0
  10. teradataml/analytics/json_parser/utils.py +6 -4
  11. teradataml/analytics/meta_class.py +40 -1
  12. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  13. teradataml/analytics/sqle/__init__.py +10 -2
  14. teradataml/analytics/table_operator/__init__.py +3 -2
  15. teradataml/analytics/uaf/__init__.py +21 -2
  16. teradataml/analytics/utils.py +62 -1
  17. teradataml/analytics/valib.py +1 -1
  18. teradataml/automl/__init__.py +1502 -323
  19. teradataml/automl/custom_json_utils.py +139 -61
  20. teradataml/automl/data_preparation.py +245 -306
  21. teradataml/automl/data_transformation.py +32 -12
  22. teradataml/automl/feature_engineering.py +313 -82
  23. teradataml/automl/model_evaluation.py +44 -35
  24. teradataml/automl/model_training.py +109 -146
  25. teradataml/catalog/byom.py +8 -8
  26. teradataml/clients/pkce_client.py +1 -1
  27. teradataml/common/constants.py +37 -0
  28. teradataml/common/deprecations.py +13 -7
  29. teradataml/common/garbagecollector.py +151 -120
  30. teradataml/common/messagecodes.py +4 -1
  31. teradataml/common/messages.py +2 -1
  32. teradataml/common/sqlbundle.py +1 -1
  33. teradataml/common/utils.py +97 -11
  34. teradataml/common/wrapper_utils.py +1 -1
  35. teradataml/context/context.py +72 -2
  36. teradataml/data/complaints_test_tokenized.csv +353 -0
  37. teradataml/data/complaints_tokens_model.csv +348 -0
  38. teradataml/data/covid_confirm_sd.csv +83 -0
  39. teradataml/data/dataframe_example.json +10 -0
  40. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  41. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  42. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  43. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  44. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  45. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  46. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  47. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  48. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  49. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  51. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  52. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  53. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  54. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  55. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  57. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  58. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  59. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  60. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  61. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  62. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  63. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  64. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  65. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  67. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  68. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  69. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  70. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  71. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  72. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  74. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  75. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  76. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  77. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  78. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  79. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  80. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  81. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  82. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  83. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  84. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  85. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  86. teradataml/data/dwt2d_dataTable.csv +65 -0
  87. teradataml/data/dwt_dataTable.csv +8 -0
  88. teradataml/data/dwt_filterTable.csv +3 -0
  89. teradataml/data/finance_data4.csv +13 -0
  90. teradataml/data/grocery_transaction.csv +19 -0
  91. teradataml/data/idwt2d_dataTable.csv +5 -0
  92. teradataml/data/idwt_dataTable.csv +8 -0
  93. teradataml/data/idwt_filterTable.csv +3 -0
  94. teradataml/data/interval_data.csv +5 -0
  95. teradataml/data/jsons/paired_functions.json +14 -0
  96. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  97. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  98. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  99. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  100. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  101. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  102. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  103. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  104. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  105. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  106. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  107. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  108. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  109. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  110. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  111. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  112. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  113. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  114. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  115. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  116. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  117. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  118. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  119. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  120. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  121. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  122. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  123. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  124. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  125. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  126. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  127. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  128. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  129. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  130. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  131. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  132. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  133. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  134. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  135. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  136. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  137. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  138. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  139. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  140. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  141. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  142. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  143. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  144. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  145. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  146. teradataml/data/load_example_data.py +8 -2
  147. teradataml/data/naivebayestextclassifier_example.json +1 -1
  148. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  149. teradataml/data/peppers.png +0 -0
  150. teradataml/data/real_values.csv +14 -0
  151. teradataml/data/sax_example.json +8 -0
  152. teradataml/data/scripts/deploy_script.py +1 -1
  153. teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
  154. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
  155. teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
  156. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  157. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  158. teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
  159. teradataml/data/star_pivot.csv +8 -0
  160. teradataml/data/templates/open_source_ml.json +2 -1
  161. teradataml/data/teradataml_example.json +20 -1
  162. teradataml/data/timestamp_data.csv +4 -0
  163. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  164. teradataml/data/uaf_example.json +55 -1
  165. teradataml/data/unpivot_example.json +15 -0
  166. teradataml/data/url_data.csv +9 -0
  167. teradataml/data/windowdfft.csv +16 -0
  168. teradataml/dataframe/copy_to.py +1 -1
  169. teradataml/dataframe/data_transfer.py +5 -3
  170. teradataml/dataframe/dataframe.py +474 -41
  171. teradataml/dataframe/fastload.py +3 -3
  172. teradataml/dataframe/functions.py +339 -0
  173. teradataml/dataframe/row.py +160 -0
  174. teradataml/dataframe/setop.py +2 -2
  175. teradataml/dataframe/sql.py +658 -20
  176. teradataml/dataframe/window.py +1 -1
  177. teradataml/dbutils/dbutils.py +322 -16
  178. teradataml/geospatial/geodataframe.py +1 -1
  179. teradataml/geospatial/geodataframecolumn.py +1 -1
  180. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  181. teradataml/lib/aed_0_1.dll +0 -0
  182. teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
  183. teradataml/options/__init__.py +3 -1
  184. teradataml/options/configure.py +14 -2
  185. teradataml/options/display.py +2 -2
  186. teradataml/plot/axis.py +4 -4
  187. teradataml/scriptmgmt/UserEnv.py +10 -6
  188. teradataml/scriptmgmt/lls_utils.py +3 -2
  189. teradataml/table_operators/Script.py +2 -2
  190. teradataml/table_operators/TableOperator.py +106 -20
  191. teradataml/table_operators/table_operator_util.py +88 -41
  192. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  193. teradataml/telemetry_utils/__init__.py +0 -0
  194. teradataml/telemetry_utils/queryband.py +52 -0
  195. teradataml/utils/validators.py +1 -1
  196. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
  197. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
  198. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  199. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  200. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -60,7 +60,7 @@ if not len(features):
60
60
  sys.exit(0)
61
61
 
62
62
  X = np.array(features)
63
- y = np.array(labels)
63
+ y = np.array(labels).ravel()
64
64
 
65
65
  clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
66
66
  clf.fit(X, y)
@@ -138,22 +138,29 @@ if not len(features):
138
138
  # Fit/partial_fit the model to the data.
139
139
  if function_name == "partial_fit":
140
140
  if labels and classes:
141
- model.partial_fit(np.array(features), np.array(labels), classes=classes)
141
+ model.partial_fit(features, labels, classes=classes)
142
142
  elif labels:
143
- model.partial_fit(np.array(features), np.array(labels))
143
+ model.partial_fit(features, labels)
144
144
  elif classes:
145
- model.partial_fit(np.array(features), classes=classes)
145
+ model.partial_fit(features, classes=classes)
146
146
  else:
147
- model.partial_fit(np.array(features))
147
+ model.partial_fit(features)
148
148
  elif function_name == "fit":
149
- # For IsotonicRegression, fit() accepts training target as
150
- # y: array-like of shape (n_samples,).
149
+ model_name = model.__class__.__name__
150
+ np_func_list = ["OneVsRestClassifier", "LabelBinarizer", "TSNE"]
151
151
  if labels:
152
- labels = np.array(labels).reshape(-1) \
153
- if model.__class__.__name__ == "IsotonicRegression" else np.array(labels)
154
- model.fit(np.array(features), labels)
152
+ # For IsotonicRegression, fit() accepts training target as
153
+ # y: array-like of shape (n_samples,).
154
+ if model_name in ["IsotonicRegression", "LinearSVC"]:
155
+ labels = np.array(labels).reshape(-1)
156
+ if model_name in np_func_list:
157
+ labels = np.array(labels)
158
+ features = np.array(features)
159
+ model.fit(features, labels)
155
160
  else:
156
- model.fit(np.array(features))
161
+ if model_name in np_func_list:
162
+ features = np.array(features)
163
+ model.fit(features)
157
164
 
158
165
  model_str = pickle.dumps(model)
159
166
 
@@ -110,9 +110,9 @@ if not len(features):
110
110
 
111
111
  # write code to call fit_predict with features and labels when n_c_labels > 0
112
112
  if n_c_labels > 0:
113
- predictions = model.fit_predict(np.array(features), np.array(labels))
113
+ predictions = model.fit_predict(features, labels)
114
114
  else:
115
- predictions = model.fit_predict(np.array(features))
115
+ predictions = model.fit_predict(features)
116
116
 
117
117
  # Export results to to the Databse through standard output
118
118
  for i in range(len(predictions)):
@@ -28,19 +28,26 @@ def splitter(strr, delim=",", convert_to="str"):
28
28
  return [convert_to_type(i, convert_to) for i in strr.split(delim)]
29
29
 
30
30
  # Arguments to the Script.
31
- if len(sys.argv) != 4:
32
- # 4 arguments command line arguments should be passed to this file.
31
+ if len(sys.argv) != 6:
32
+ # 5 arguments command line arguments should be passed to this file.
33
33
  # 1: file to be run
34
34
  # 2. Comma separated indices of partition columns.
35
35
  # 3. Comma separated types of all the data columns.
36
36
  # 4. Data columns information separted by "--" where each data column information is in the form
37
37
  # "<arg_name>-<comma separated data indices>-<comma separated data types>".
38
- sys.exit("4 arguments command line arguments should be passed: file to be run,"
38
+ # 5. Flag to check the system type. True, means Lake, Enterprise otherwise.
39
+ # 6. Model file prefix for lake system, None otherwise.
40
+ sys.exit("5 arguments command line arguments should be passed: file to be run,"
39
41
  " comma separated indices of partition columns, comma separated types of all columns,"
40
42
  " data columns information separated by '--' where each data column information is"
41
- " in the form '<arg_name>-<comma separated data indices>-<comma separated data types>'.")
42
-
43
- db = sys.argv[0].split("/")[1]
43
+ " in the form '<arg_name>-<comma separated data indices>-<comma separated data types>',"
44
+ " flag to check lake or enterprise and model file prefix used only for lake system.")
45
+
46
+ is_lake_system = eval(sys.argv[4])
47
+ if not is_lake_system:
48
+ db = sys.argv[0].split("/")[1]
49
+ else:
50
+ model_file_prefix = sys.argv[5]
44
51
  data_partition_column_indices = splitter(sys.argv[1], convert_to="int") # indices are integers.
45
52
  data_column_types = splitter(sys.argv[2], delim="--")
46
53
 
@@ -79,6 +86,11 @@ while 1:
79
86
  data_partition_column_values.append(
80
87
  convert_to_type(values[val], typee=data_partition_column_types[i])
81
88
  )
89
+
90
+ # Prepare the corresponding model file name and extract model.
91
+ partition_join = "_".join([str(x) for x in data_partition_column_values])
92
+ # Replace '-' with '_' as '-' because partition_columns can be negative.
93
+ partition_join = partition_join.replace("-", "_")
82
94
 
83
95
  # Prepare data dictionary containing only arguments related to data.
84
96
  for arg_name in data_args_values:
@@ -105,4 +117,15 @@ all_args = {**data_args_values, **params}
105
117
  module_ = importlib.import_module(module_name)
106
118
  sklearn_model = getattr(module_, func_name)(**all_args)
107
119
 
108
- print(*(data_partition_column_values + [base64.b64encode(pickle.dumps(sklearn_model))]), sep=DELIMITER)
120
+ model_str = pickle.dumps(sklearn_model)
121
+
122
+ if is_lake_system:
123
+ model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
124
+
125
+ # Write to file in Vantage, to be used in predict/scoring.
126
+ with open(model_file_path, "wb") as fp:
127
+ fp.write(model_str)
128
+
129
+ model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
130
+
131
+ print(*(data_partition_column_values + [model_data]), sep=DELIMITER)
@@ -116,7 +116,7 @@ while 1:
116
116
 
117
117
  f_ = values[:n_f_cols]
118
118
  if f_:
119
- output = getattr(model, func_name)(np.array([f_]), **arguments)
119
+ output = getattr(model, func_name)([f_], **arguments)
120
120
  else:
121
121
  output = getattr(model, func_name)(**arguments)
122
122
  result_list = f_
@@ -110,10 +110,19 @@ while 1:
110
110
  if len(features) == 0:
111
111
  sys.exit(0)
112
112
 
113
+
114
+ model_name = model.__class__.__name__
115
+ np_func_list = ["MultiOutputClassifier", "GaussianMixture"]
116
+
117
+ if model_name in np_func_list:
118
+ features = np.array(features)
119
+
113
120
  if labels:
114
- val = getattr(model, func_name)(np.array(features), np.array(labels))
121
+ if model_name in np_func_list:
122
+ labels = np.array(labels)
123
+ val = getattr(model, func_name)(features, labels)
115
124
  else:
116
- val = getattr(model, func_name)(np.array(features))
125
+ val = getattr(model, func_name)(features)
117
126
 
118
127
  result_val = ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val]
119
- print(*(data_partition_column_values + result_val), sep=DELIMITER)
128
+ print(*(data_partition_column_values + result_val), sep=DELIMITER)
@@ -92,11 +92,14 @@ data_partition_column_types = [data_column_types[idx] for idx in data_partition_
92
92
  model = None
93
93
  data_partition_column_values = []
94
94
 
95
+ missing_indicator_input = []
96
+
95
97
  # Data Format:
96
98
  # feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
97
99
  # data_partition_columnn.
98
100
  # label is optional (it is present when label_exists is not "None")
99
101
 
102
+ model_name = ""
100
103
  while 1:
101
104
  try:
102
105
  line = input()
@@ -128,9 +131,33 @@ while 1:
128
131
  sys.exit("Model file is not installed in Vantage.")
129
132
 
130
133
  f_ = values[:n_f_cols]
134
+
135
+ model_name = model.__class__.__name__
136
+ np_func_list = ["ClassifierChain", "EllipticEnvelope", "MinCovDet",
137
+ "FeatureAgglomeration", "LabelBinarizer", "MultiLabelBinarizer"]
138
+
139
+ # MissingIndicator requires processing the entire dataset simultaneously,
140
+ # rather than on a row-by-row basis.
141
+
142
+ # Error getting during row-by-row processing -
143
+ # "ValueError: MissingIndicator does not support data with dtype <U13.
144
+ # Please provide either a numeric array (with a floating point or
145
+ i# integer dtype) or categorical data represented ei
146
+ if model_name == "MissingIndicator" and func_name == "transform":
147
+ missing_indicator_input.append(f_)
148
+ continue
149
+
150
+ f__ = np.array([f_]) if model_name in np_func_list or \
151
+ (model_name == "SimpleImputer" and func_name == "inverse_transform")\
152
+ else [f_]
153
+
131
154
  if n_c_labels > 0:
132
155
  # Labels are present in last column.
133
156
  l_ = values[n_f_cols:n_f_cols+n_c_labels]
157
+
158
+ l__ = np.array([l_]) if model_name in np_func_list or \
159
+ (model_name == "SimpleImputer" and func_name == "inverse_transform")\
160
+ else [l_]
134
161
  # predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
135
162
  # in function call. Generally, 'y' is passed to return y along with actual output.
136
163
  try:
@@ -138,17 +165,17 @@ while 1:
138
165
  # used 'in' in if constion, as model.__module__ is giving
139
166
  # 'sklearn.cross_decomposition._pls'.
140
167
  if "cross_decomposition" in model.__module__:
141
- trans_values = getattr(model, func_name)(X=np.array([f_]), Y=np.array([l_]))
168
+ trans_values = getattr(model, func_name)(X=f__, Y=l__)
142
169
  else:
143
- trans_values = getattr(model, func_name)(X=np.array([f_]), y=np.array([l_]))
170
+ trans_values = getattr(model, func_name)(X=f__, y=l__)
144
171
 
145
172
  except TypeError as ex:
146
173
  # Function which does not accept 'y' like predict_proba() raises error like
147
174
  # "TypeError: predict_proba() takes 2 positional arguments but 3 were given".
148
- trans_values = getattr(model, func_name)(np.array([f_]))
175
+ trans_values = getattr(model, func_name)(f__)
149
176
  else:
150
177
  # If class labels do not exist in data, don't read labels, read just features.
151
- trans_values = getattr(model, func_name)(np.array([f_]))
178
+ trans_values = getattr(model, func_name)(f__)
152
179
 
153
180
  result_list = f_
154
181
  if n_c_labels > 0 and func_name in ["predict", "decision_function"]:
@@ -169,3 +196,27 @@ while 1:
169
196
 
170
197
  except EOFError: # Exit if reached EOF or CTRL-D
171
198
  break
199
+
200
+
201
+ # MissingIndicator needs processing of all the dataset at the same time, instead of row by row.
202
+ # Hence, handling it outside of the while loop
203
+ if model_name == "MissingIndicator" and func_name == "transform":
204
+ m_out = model.transform(missing_indicator_input)
205
+
206
+ for j, vals in enumerate(missing_indicator_input):
207
+
208
+ m_out_list = get_output_data(trans_values=m_out[j], func_name=func_name,
209
+ model_obj=model, n_c_labels=n_c_labels)
210
+
211
+ result_list = missing_indicator_input[j] + m_out_list
212
+
213
+ for i, val in enumerate(result_list):
214
+ if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
215
+ result_list[i] = ""
216
+ # MissingIndicator returns boolean values. Convert them to 0/1.
217
+ elif val == False:
218
+ result_list[i] = 0
219
+ elif val == True:
220
+ result_list[i] = 1
221
+
222
+ print(*(data_partition_column_values + result_list), sep=DELIMITER)
@@ -0,0 +1,8 @@
1
+ country,state,yr,qtr,sales,cogs,rating
2
+ USA,CA,2001,Q1,30,15,A
3
+ USA,NY,2001,Q1,45,25,D
4
+ USA,CA,2001,Q2,50,20,A
5
+ USA,CA,2001,Q2,5,5,B
6
+ Canada,ON,2001,Q2,10,0,B
7
+ Canada,BC,2001,Q3,15,0,A
8
+ Canada,BC,2001,Q3,10,0,A
@@ -2,7 +2,8 @@
2
2
  "env_specs": [
3
3
  {
4
4
  "env_name": "openml_env",
5
- "libs": "scikit-learn",
5
+ "libs": ["scikit-learn==1.5.1", "joblib==1.4.2", "numpy==2.0.0",
6
+ "scipy==1.14.0", "threadpoolctl==3.5.0"],
6
7
  "desc": "DONT DELETE: OpenML environment"
7
8
  }
8
9
  ]
@@ -1348,6 +1348,25 @@
1348
1348
  "radio":"FLOAT",
1349
1349
  "newspaper":"FLOAT",
1350
1350
  "sales":"FLOAT"
1351
+ },
1352
+ "timestamp_data":{
1353
+ "id": "INTEGER",
1354
+ "timestamp_col": "VARCHAR(50)",
1355
+ "timestamp_col1": "BIGINT",
1356
+ "format_col": "VARCHAR(50)",
1357
+ "timezone_col": "VARCHAR(50)"
1358
+ },
1359
+ "interval_data":{
1360
+ "id": "INTEGER",
1361
+ "int_col": "BIGINT",
1362
+ "value_col": "VARCHAR(30)",
1363
+ "value_col1": "VARCHAR(30)",
1364
+ "str_col1": "VARCHAR(30)",
1365
+ "str_col2": "VARCHAR(30)"
1366
+ },
1367
+ "url_data": {
1368
+ "id": "INTEGER",
1369
+ "urls": "VARCHAR(60)",
1370
+ "part": "VARCHAR(20)"
1351
1371
  }
1352
-
1353
1372
  }
@@ -0,0 +1,4 @@
1
+ id,timestamp_col,timestamp_col1,format_col,timezone_col
2
+ 0,"2015-01-08 00:00:12.2",123456,"YYYY-MM-DD HH24:MI:SS.FF6","GMT"
3
+ 1,"2015-01-08 13:00",878986,"YYYY-MM-DD HH24:MI","America Pacific"
4
+ 2,"2015-01-08 00:00:12.2+10:00",45678910234,"YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM","GMT+10"
@@ -0,0 +1,19 @@
1
+ passenger,AttributeName,AttributeValue,survived
2
+ 61,pclass,3,0
3
+ 1000,pclass,3,1
4
+ 40,pclass,3,1
5
+ 21,pclass,2,0
6
+ 61,gender,male,0
7
+ 1000,gender,,1
8
+ 40,gender,female,1
9
+ 21,gender,male,0
10
+ 2,pclass,1,1
11
+ 16,pclass,2,1
12
+ 7,pclass,1,0
13
+ 2,gender,female,1
14
+ 16,gender,female,1
15
+ 7,gender,male,0
16
+ 10,pclass,2,1
17
+ 4,pclass,1,1
18
+ 10,gender,female,1
19
+ 4,gender,female,1
@@ -471,5 +471,59 @@
471
471
  "CONF_OFF_v": "FLOAT",
472
472
  "CONF_LOW_v": "FLOAT",
473
473
  "CONF_HI_v": "FLOAT"
474
+ },
475
+ "dwt_dataTable":{
476
+ "id": "INTEGER",
477
+ "rowi": "INTEGER",
478
+ "v": "FLOAT"
479
+ },
480
+ "dwt_filterTable":{
481
+ "id": "INTEGER",
482
+ "seq": "INTEGER",
483
+ "lo": "FLOAT",
484
+ "hi": "FLOAT"
485
+ },
486
+ "idwt_dataTable":{
487
+ "id": "INTEGER",
488
+ "rowi": "INTEGER",
489
+ "approx": "FLOAT",
490
+ "detail": "FLOAT"
491
+ },
492
+ "idwt_filterTable":{
493
+ "id": "INTEGER",
494
+ "seq": "INTEGER",
495
+ "lo": "FLOAT",
496
+ "hi": "FLOAT"
497
+ },
498
+ "dwt2d_dataTable":{
499
+ "id": "INTEGER",
500
+ "x": "INTEGER",
501
+ "y": "INTEGER",
502
+ "v": "FLOAT"
503
+ },
504
+ "idwt2d_dataTable":{
505
+ "id": "INTEGER",
506
+ "x": "INTEGER",
507
+ "y": "INTEGER",
508
+ "v": "FLOAT"
509
+ },
510
+ "covid_confirm_sd":{
511
+ "city": "VARCHAR(15)",
512
+ "row_axis": "INTEGER",
513
+ "cnumber": "INTEGER"
514
+ },
515
+ "real_values":{
516
+ "TD_TIMECODE": "TIMESTAMP(0)",
517
+ "id": "INTEGER",
518
+ "val": "FLOAT",
519
+ "<PTI_CLAUSE>": "(TIMESTAMP(0), DATE '2020-01-01', HOURS(1), COLUMNS(id), nonsequenced)"
520
+ },
521
+ "windowdfft":{
522
+ "id": "INTEGER",
523
+ "row_i": "INTEGER",
524
+ "v1": "FLOAT",
525
+ "v2": "FLOAT",
526
+ "v3": "FLOAT",
527
+ "v4": "FLOAT"
474
528
  }
475
- }
529
+ }
@@ -6,5 +6,20 @@
6
6
  "temp": "integer",
7
7
  "pressure": "real",
8
8
  "dewpoint": "varchar(30)"
9
+ },
10
+ "titanic_dataset_unpivoted":{
11
+ "passenger": "integer",
12
+ "AttributeName": "varchar(30)",
13
+ "AttributeValue": "varchar(30)",
14
+ "survived": "integer"
15
+ },
16
+ "star_pivot":{
17
+ "country": "varchar(30)",
18
+ "state": "varchar(30)",
19
+ "yr": "integer",
20
+ "qtr": "varchar(30)",
21
+ "sales": "integer",
22
+ "cogs": "integer",
23
+ "rating": "varchar(30)"
9
24
  }
10
25
  }
@@ -0,0 +1,9 @@
1
+ "id","urls","part"
2
+ 0,"http://example.com:8080/path","FILE"
3
+ 1,"ftp://example.net:21/path","PATH"
4
+ 2,"https://example.net/path4/path5/path6?query4=value4#fragment3","REF"
5
+ 3,"https://www.facebook.com","HOST"
6
+ 4,"https://teracloud-pod-services-pod-account-service.dummyvalue.production.pods.teracloud.ninja/v1/accounts/acc-dummyvalue/user-environment-service/api/v1/","QUERY"
7
+ 5,"http://pg.example.ml/path150#fragment90","AUTHORITY"
8
+ 6,"smtp://user:password@smtp.example.com:21/file.txt","USERINFO"
9
+ 7,"https://www.google.com","PROTOCOL"
@@ -0,0 +1,16 @@
1
+ id,row_i,v1,v2,v3,v4
2
+ 3,1,0.0,1.4,1.0,1.0
3
+ 3,2,1.0,2.4,2.0,2.0
4
+ 3,3,2.0,3.4,3.0,3.0
5
+ 3,4,3.0,4.6,4.0,4.0
6
+ 3,5,0.0,5.9,5.0,5.0
7
+ 3,6,1.0,6.7,6.0,6.0
8
+ 3,7,2.0,7.7,7.0,7.0
9
+ 3,8,3.0,8.7,8.0,8.0
10
+ 3,9,0.0,9.9,9.0,9.0
11
+ 3,10,1.0,10.2,10.0,10.0
12
+ 3,11,2.0,11.2,11.0,11.0
13
+ 3,12,3.0,12.2,12.0,12.0
14
+ 3,13,1.0,10.2,13.0,13.0
15
+ 3,14,2.0,11.2,14.0,14.0
16
+ 3,15,3.0,12.2,15.0,15.0
@@ -30,7 +30,7 @@ from teradatasql import OperationalError
30
30
  from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
31
31
  from teradataml.utils.utils import execute_sql
32
32
  from teradataml.utils.validators import _Validators
33
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
33
+ from teradataml.telemetry_utils.queryband import collect_queryband
34
34
 
35
35
 
36
36
  @collect_queryband(queryband="CpToSql")
@@ -27,7 +27,7 @@ from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
27
27
  from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
28
28
  from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
29
29
  from teradataml.utils.validators import _Validators
30
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
30
+ from teradataml.telemetry_utils.queryband import collect_queryband
31
31
 
32
32
 
33
33
  @collect_queryband(queryband="fstExprt")
@@ -1958,7 +1958,8 @@ class _DataTransferUtils():
1958
1958
  dt_obj = _DataTransferUtils(df)
1959
1959
  ins_query = dt_obj._table_exists()
1960
1960
  """
1961
- return con.dialect.has_table(get_connection(), self.table_name, self.schema_name)
1961
+ return con.dialect.has_table(get_connection(), self.table_name, self.schema_name,
1962
+ table_only=True)
1962
1963
 
1963
1964
  def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
1964
1965
  """
@@ -2144,7 +2145,8 @@ class _DataTransferUtils():
2144
2145
  # drop the tables created by FastloadCSV.
2145
2146
  if not self.save_errors:
2146
2147
  for table in fastloadcsv_err_tables:
2147
- if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name):
2148
+ if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name,
2149
+ table_only=True):
2148
2150
  UtilFuncs._drop_table(self._get_fully_qualified_table_name(table))
2149
2151
  err_warn_dict.update({"fastloadcsv_error_tables": []})
2150
2152
  return err_warn_dict