teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +183 -0
  4. teradataml/__init__.py +6 -3
  5. teradataml/_version.py +2 -2
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +275 -40
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +1 -0
  11. teradataml/analytics/json_parser/utils.py +17 -21
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +10 -2
  15. teradataml/analytics/table_operator/__init__.py +3 -2
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +62 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1553 -319
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +276 -319
  22. teradataml/automl/data_transformation.py +163 -81
  23. teradataml/automl/feature_engineering.py +402 -239
  24. teradataml/automl/feature_exploration.py +9 -2
  25. teradataml/automl/model_evaluation.py +48 -51
  26. teradataml/automl/model_training.py +291 -189
  27. teradataml/catalog/byom.py +8 -8
  28. teradataml/catalog/model_cataloging_utils.py +1 -1
  29. teradataml/clients/auth_client.py +133 -0
  30. teradataml/clients/pkce_client.py +1 -1
  31. teradataml/common/aed_utils.py +3 -2
  32. teradataml/common/constants.py +48 -6
  33. teradataml/common/deprecations.py +13 -7
  34. teradataml/common/garbagecollector.py +156 -120
  35. teradataml/common/messagecodes.py +6 -1
  36. teradataml/common/messages.py +3 -1
  37. teradataml/common/sqlbundle.py +1 -1
  38. teradataml/common/utils.py +103 -11
  39. teradataml/common/wrapper_utils.py +1 -1
  40. teradataml/context/context.py +121 -31
  41. teradataml/data/advertising.csv +201 -0
  42. teradataml/data/bank_marketing.csv +11163 -0
  43. teradataml/data/bike_sharing.csv +732 -0
  44. teradataml/data/boston2cols.csv +721 -0
  45. teradataml/data/breast_cancer.csv +570 -0
  46. teradataml/data/complaints_test_tokenized.csv +353 -0
  47. teradataml/data/complaints_tokens_model.csv +348 -0
  48. teradataml/data/covid_confirm_sd.csv +83 -0
  49. teradataml/data/customer_segmentation_test.csv +2628 -0
  50. teradataml/data/customer_segmentation_train.csv +8069 -0
  51. teradataml/data/dataframe_example.json +10 -0
  52. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  53. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  54. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  55. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  56. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  57. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  58. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  59. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  60. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  61. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  62. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  63. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  64. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  65. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  66. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  67. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  68. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  69. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  70. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  71. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  72. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  73. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  74. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  75. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  76. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  77. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  78. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  79. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  80. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  81. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  82. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  83. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  84. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  85. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  86. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  87. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  88. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  89. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  90. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  91. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  92. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  93. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  94. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  95. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  96. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  97. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  98. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  99. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  100. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  101. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  102. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  103. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  104. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  105. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  106. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  107. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  108. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  109. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  110. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  111. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  112. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  113. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  114. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  115. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  116. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  117. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  118. teradataml/data/dwt2d_dataTable.csv +65 -0
  119. teradataml/data/dwt_dataTable.csv +8 -0
  120. teradataml/data/dwt_filterTable.csv +3 -0
  121. teradataml/data/finance_data4.csv +13 -0
  122. teradataml/data/glm_example.json +28 -1
  123. teradataml/data/grocery_transaction.csv +19 -0
  124. teradataml/data/housing_train_segment.csv +201 -0
  125. teradataml/data/idwt2d_dataTable.csv +5 -0
  126. teradataml/data/idwt_dataTable.csv +8 -0
  127. teradataml/data/idwt_filterTable.csv +3 -0
  128. teradataml/data/insect2Cols.csv +61 -0
  129. teradataml/data/interval_data.csv +5 -0
  130. teradataml/data/jsons/paired_functions.json +14 -0
  131. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  132. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  133. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  134. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  135. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  136. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  137. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  138. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  139. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  140. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  141. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  142. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  143. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  144. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  145. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  146. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  147. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  148. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  149. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  150. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  151. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  152. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  153. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  154. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  155. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  156. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  157. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  158. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  159. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  160. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  161. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  162. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  163. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  164. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  165. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  166. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  167. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  168. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  169. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  170. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  171. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  172. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  173. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  174. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  175. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  176. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  177. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  178. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  179. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  180. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  181. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  182. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  183. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  184. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  185. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  186. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  187. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  188. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  189. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  190. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  191. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  192. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  193. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  194. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  195. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  196. teradataml/data/kmeans_example.json +5 -0
  197. teradataml/data/kmeans_table.csv +10 -0
  198. teradataml/data/load_example_data.py +8 -2
  199. teradataml/data/naivebayestextclassifier_example.json +1 -1
  200. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  201. teradataml/data/onehot_encoder_train.csv +4 -0
  202. teradataml/data/openml_example.json +29 -0
  203. teradataml/data/peppers.png +0 -0
  204. teradataml/data/real_values.csv +14 -0
  205. teradataml/data/sax_example.json +8 -0
  206. teradataml/data/scale_attributes.csv +3 -0
  207. teradataml/data/scale_example.json +52 -1
  208. teradataml/data/scale_input_part_sparse.csv +31 -0
  209. teradataml/data/scale_input_partitioned.csv +16 -0
  210. teradataml/data/scale_input_sparse.csv +11 -0
  211. teradataml/data/scale_parameters.csv +3 -0
  212. teradataml/data/scripts/deploy_script.py +21 -2
  213. teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
  214. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
  215. teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
  216. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  217. teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
  218. teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
  219. teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
  220. teradataml/data/star_pivot.csv +8 -0
  221. teradataml/data/templates/open_source_ml.json +2 -1
  222. teradataml/data/teradataml_example.json +97 -1
  223. teradataml/data/timestamp_data.csv +4 -0
  224. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  225. teradataml/data/uaf_example.json +55 -1
  226. teradataml/data/unpivot_example.json +15 -0
  227. teradataml/data/url_data.csv +9 -0
  228. teradataml/data/windowdfft.csv +16 -0
  229. teradataml/data/ztest_example.json +16 -0
  230. teradataml/dataframe/copy_to.py +9 -4
  231. teradataml/dataframe/data_transfer.py +125 -64
  232. teradataml/dataframe/dataframe.py +575 -57
  233. teradataml/dataframe/dataframe_utils.py +47 -9
  234. teradataml/dataframe/fastload.py +273 -90
  235. teradataml/dataframe/functions.py +339 -0
  236. teradataml/dataframe/row.py +160 -0
  237. teradataml/dataframe/setop.py +2 -2
  238. teradataml/dataframe/sql.py +740 -18
  239. teradataml/dataframe/window.py +1 -1
  240. teradataml/dbutils/dbutils.py +324 -18
  241. teradataml/geospatial/geodataframe.py +1 -1
  242. teradataml/geospatial/geodataframecolumn.py +1 -1
  243. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  244. teradataml/lib/aed_0_1.dll +0 -0
  245. teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
  246. teradataml/options/__init__.py +16 -5
  247. teradataml/options/configure.py +39 -6
  248. teradataml/options/display.py +2 -2
  249. teradataml/plot/axis.py +4 -4
  250. teradataml/scriptmgmt/UserEnv.py +26 -19
  251. teradataml/scriptmgmt/lls_utils.py +120 -16
  252. teradataml/table_operators/Script.py +4 -5
  253. teradataml/table_operators/TableOperator.py +160 -26
  254. teradataml/table_operators/table_operator_util.py +88 -41
  255. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  256. teradataml/telemetry_utils/__init__.py +0 -0
  257. teradataml/telemetry_utils/queryband.py +52 -0
  258. teradataml/utils/validators.py +41 -3
  259. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
  260. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
  261. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  262. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  263. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -6,35 +6,22 @@ import base64
6
6
 
7
7
  DELIMITER = '\t'
8
8
 
9
-
10
- def get_value(value):
11
- ret_val = value
12
- try:
13
- ret_val = round(float("".join(value.split())), 2)
14
- except Exception as ex:
15
- # If the value can't be converted to float, then it is string.
16
- pass
17
- return ret_val
18
-
19
-
20
- def get_values_list(values, ignore_none=True):
9
+ def get_values_list(values, types):
21
10
  ret_vals = []
22
- for val in values:
23
- if val == "" and ignore_none:
24
- # Empty cell value in the database table.
25
- continue
26
- ret_vals.append(get_value(val))
27
-
11
+ for i, val in enumerate(values):
12
+ ret_vals.append(convert_to_type(val, types[i]))
28
13
  return ret_vals
29
14
 
30
15
  def convert_to_type(val, typee):
31
16
  if typee == 'int':
32
- return int(val)
17
+ return int(val) if val != "" else np.nan
33
18
  if typee == 'float':
34
- return float(val)
19
+ if isinstance(val, str):
20
+ val = val.replace(' ', '')
21
+ return float(val) if val != "" else np.nan
35
22
  if typee == 'bool':
36
- return eval(val)
37
- return str(val)
23
+ return eval(val) if val != "" else None
24
+ return str(val) if val != "" else None
38
25
 
39
26
  def splitter(strr, delim=",", convert_to="str"):
40
27
  """
@@ -54,13 +41,14 @@ if len(sys.argv) != 9:
54
41
  # 4. No of class labels.
55
42
  # 5. No of group columns.
56
43
  # 6. Comma separated indices of partition columns.
57
- # 7. Comma separated types of the partition columns.
44
+ # 7. Comma separated types of all the data columns.
58
45
  # 8. Model file prefix to generated model file using partition columns.
59
46
  # 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
60
47
  sys.exit("9 arguments command line arguments should be passed: file to be run,"
61
48
  " function name, no of feature columns, no of class labels, no of group columns,"
62
- " comma separated indices and types of partition columns, model file prefix to"
63
- " generated model file using partition columns and flag to check lake or enterprise.")
49
+ " comma separated indices of partition columns, comma separated types of all columns,"
50
+ " model file prefix to generated model file using partition columns and flag to check"
51
+ " lake or enterprise.")
64
52
 
65
53
 
66
54
  is_lake_system = eval(sys.argv[8])
@@ -70,10 +58,11 @@ function_name = sys.argv[1]
70
58
  n_f_cols = int(sys.argv[2])
71
59
  n_c_labels = int(sys.argv[3])
72
60
  n_g_cols = int(sys.argv[4])
73
- data_partition_column_types = splitter(sys.argv[6])
61
+ data_column_types = splitter(sys.argv[6], delim="--")
74
62
  data_partition_column_indices = splitter(sys.argv[5], convert_to="int") # indices are integers.
75
63
  model_file_prefix = sys.argv[7]
76
64
 
65
+ data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
77
66
 
78
67
  model = None
79
68
  data_partition_column_values = []
@@ -93,6 +82,7 @@ while 1:
93
82
  break
94
83
  else:
95
84
  values = line.split(DELIMITER)
85
+ values = get_values_list(values, data_column_types)
96
86
  if not data_partition_column_values:
97
87
  # Partition column values is same for all rows. Hence, only read once.
98
88
  for i, val in enumerate(data_partition_column_indices):
@@ -117,13 +107,13 @@ while 1:
117
107
 
118
108
  start = 0
119
109
  if n_f_cols > 0:
120
- features.append(get_values_list(values[:n_f_cols]))
110
+ features.append(values[:n_f_cols])
121
111
  start = start + n_f_cols
122
112
  if n_c_labels > 0:
123
- labels.append(get_values_list(values[start:(start+n_c_labels)]))
113
+ labels.append(values[start:(start+n_c_labels)])
124
114
  start = start + n_c_labels
125
115
  if n_g_cols > 0:
126
- groups.append(get_values_list(values[start:(start+n_g_cols)]))
116
+ groups.append(values[start:(start+n_g_cols)])
127
117
 
128
118
  except EOFError: # Exit if reached EOF or CTRL-D
129
119
  break
@@ -144,14 +134,14 @@ if function_name == "split":
144
134
  y_train, y_test = labels[train_idx], labels[test_idx]
145
135
  for X, y in zip(X_train, y_train):
146
136
  print(*(data_partition_column_values + [split_id, "train"] +
147
- ['' if (val is None or math.isnan(val) or math.isinf(val)) else val
137
+ ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
148
138
  for val in X] + [y]
149
- ),sep=DELIMITER)
139
+ ), sep=DELIMITER)
150
140
  for X, y in zip(X_test, y_test):
151
141
  print(*(data_partition_column_values + [split_id, "test"] +
152
- ['' if (val is None or math.isnan(val) or math.isinf(val)) else val
142
+ ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val
153
143
  for val in X] + [y]
154
- ),sep=DELIMITER)
144
+ ), sep=DELIMITER)
155
145
  split_id += 1
156
146
  else:
157
147
  val = getattr(model, function_name)(features, labels, groups)
@@ -12,34 +12,22 @@ from scipy.sparse.csr import csr_matrix
12
12
  DELIMITER = '\t'
13
13
 
14
14
 
15
- def get_value(value):
16
- ret_val = value
17
- try:
18
- ret_val = float(value.replace(' ', ''))
19
- except Exception as ex:
20
- # If the value can't be converted to float, then it is string.
21
- pass
22
- return ret_val
23
-
24
-
25
- def get_values_list(values, ignore_none=True):
15
+ def get_values_list(values, types):
26
16
  ret_vals = []
27
- for val in values:
28
- if val == "" and ignore_none:
29
- # Empty cell value in the database table.
30
- continue
31
- ret_vals.append(get_value(val))
32
-
17
+ for i, val in enumerate(values):
18
+ ret_vals.append(convert_to_type(val, types[i]))
33
19
  return ret_vals
34
20
 
35
21
  def convert_to_type(val, typee):
36
22
  if typee == 'int':
37
- return int(val)
23
+ return int(val) if val != "" else np.nan
38
24
  if typee == 'float':
39
- return float(val)
25
+ if isinstance(val, str):
26
+ val = val.replace(' ', '')
27
+ return float(val) if val != "" else np.nan
40
28
  if typee == 'bool':
41
- return eval(val)
42
- return str(val)
29
+ return eval(val) if val != "" else None
30
+ return str(val) if val != "" else None
43
31
 
44
32
  def splitter(strr, delim=",", convert_to="str"):
45
33
  """
@@ -57,15 +45,15 @@ if len(sys.argv) < 7:
57
45
  # 2. function name.
58
46
  # 3. No of feature columns.
59
47
  # 4. Comma separated indices of partition columns.
60
- # 5. Comma separated types of the partition columns.
48
+ # 5. Comma separated types of all the data columns.
61
49
  # 6. Model file prefix to generate model file using partition columns.
62
50
  # 7. Flag to check the system type. True, means Lake, Enterprise otherwise.
63
51
  # 8. OPTIONAL - Arguments in string format like "return_distance True-bool",
64
52
  # "n_neighbors 3-int", "radius 3.4-float" etc.
65
53
  sys.exit("At least 7 arguments should be passed to this file - file to be run, function name, "\
66
- "no of feature columns, comma separated indices and types of partition columns, "\
67
- "model file prefix to generate model file using partition columns, flag to check "\
68
- "lake or enterprise and optional arguments in string format.")
54
+ "no of feature columns, comma separated indices of partition columns, comma "\
55
+ "separated types of all columns, model file prefix to generate model file using "\
56
+ "partition columns, flag to check lake or enterprise and optional arguments in string format.")
69
57
 
70
58
  convert_to_int = lambda x: int(x) if x != "None" else None
71
59
 
@@ -74,7 +62,7 @@ if not is_lake_system:
74
62
  db = sys.argv[0].split("/")[1]
75
63
  func_name = sys.argv[1]
76
64
  n_f_cols = convert_to_int(sys.argv[2])
77
- data_partition_column_types = splitter(sys.argv[4])
65
+ data_column_types = splitter(sys.argv[4], delim="--")
78
66
  data_partition_column_indices = splitter(sys.argv[3], convert_to="int") # indices are integers.
79
67
  model_file_prefix = sys.argv[5]
80
68
  # Extract arguments from string.
@@ -83,6 +71,8 @@ for i in range(7, len(sys.argv), 2):
83
71
  value = sys.argv[i + 1].split("-", 1)
84
72
  arguments[sys.argv[i]] = convert_to_type(value[0], value[1])
85
73
 
74
+ data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
75
+
86
76
  model = None
87
77
  data_partition_column_values = []
88
78
 
@@ -101,6 +91,7 @@ while 1:
101
91
  break
102
92
  else:
103
93
  values = line.split(DELIMITER)
94
+ values = get_values_list(values, data_column_types)
104
95
  if not data_partition_column_values:
105
96
  # Partition column values is same for all rows. Hence, only read once.
106
97
  for i, val in enumerate(data_partition_column_indices):
@@ -123,9 +114,9 @@ while 1:
123
114
  if not model:
124
115
  sys.exit("Model file is not installed in Vantage.")
125
116
 
126
- f_ = get_values_list(values[:n_f_cols])
117
+ f_ = values[:n_f_cols]
127
118
  if f_:
128
- output = getattr(model, func_name)(np.array([f_]), **arguments)
119
+ output = getattr(model, func_name)([f_], **arguments)
129
120
  else:
130
121
  output = getattr(model, func_name)(**arguments)
131
122
  result_list = f_
@@ -6,34 +6,22 @@ import numpy as np
6
6
  DELIMITER = '\t'
7
7
 
8
8
 
9
- def get_value(value):
10
- ret_val = value
11
- try:
12
- ret_val = float("".join(value.split()))
13
- except Exception as ex:
14
- # If the value can't be converted to float, then it is string.
15
- pass
16
- return ret_val
17
-
18
-
19
- def get_values_list(values, ignore_none=True):
9
+ def get_values_list(values, types):
20
10
  ret_vals = []
21
- for val in values:
22
- if val == "" and ignore_none:
23
- # Empty cell value in the database table.
24
- continue
25
- ret_vals.append(get_value(val))
26
-
11
+ for i, val in enumerate(values):
12
+ ret_vals.append(convert_to_type(val, types[i]))
27
13
  return ret_vals
28
14
 
29
15
  def convert_to_type(val, typee):
30
16
  if typee == 'int':
31
- return int(val)
17
+ return int(val) if val != "" else np.nan
32
18
  if typee == 'float':
33
- return float(val)
19
+ if isinstance(val, str):
20
+ val = val.replace(' ', '')
21
+ return float(val) if val != "" else np.nan
34
22
  if typee == 'bool':
35
- return bool(val)
36
- return str(val)
23
+ return eval(val) if val != "" else None
24
+ return str(val) if val != "" else None
37
25
 
38
26
  def splitter(strr, delim=",", convert_to="str"):
39
27
  """
@@ -51,13 +39,13 @@ if len(sys.argv) != 8:
51
39
  # 3. No of feature columns.
52
40
  # 4. No of class labels.
53
41
  # 5. Comma separated indices of partition columns.
54
- # 6. Comma separated types of the partition columns.
42
+ # 6. Comma separated types of all the data columns.
55
43
  # 7. Model file prefix to generated model file using partition columns.
56
44
  # 8. Flag to check the system type. True, means Lake, Enterprise otherwise.
57
45
  sys.exit("8 arguments should be passed to this file - file to be run, function name, "\
58
- "no of feature columns, no of class labels, comma separated indices and types of "\
59
- "partition columns, model file prefix to generate model file using partition "\
60
- "columns and flag to check lake or enterprise.")
46
+ "no of feature columns, no of class labels, comma separated indices "
47
+ "of partition columns, comma separated types of all columns, model file prefix to "\
48
+ "generate model file using partition columns and flag to check lake or enterprise.")
61
49
 
62
50
  is_lake_system = eval(sys.argv[7])
63
51
  if not is_lake_system:
@@ -65,10 +53,12 @@ if not is_lake_system:
65
53
  func_name = sys.argv[1]
66
54
  n_f_cols = int(sys.argv[2])
67
55
  n_c_labels = int(sys.argv[3])
68
- data_partition_column_types = splitter(sys.argv[5])
56
+ data_column_types = splitter(sys.argv[5], delim="--")
69
57
  data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
70
58
  model_file_prefix = sys.argv[6]
71
59
 
60
+ data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
61
+
72
62
  model = None
73
63
 
74
64
  # Data Format (n_features, k_labels, one data_partition_column):
@@ -87,9 +77,10 @@ while 1:
87
77
  break
88
78
  else:
89
79
  values = line.split(DELIMITER)
90
- features.append(get_values_list(values[:n_f_cols]))
80
+ values = get_values_list(values, data_column_types)
81
+ features.append(values[:n_f_cols])
91
82
  if n_c_labels > 0:
92
- labels.append(get_values_list(values[n_f_cols:(n_f_cols+n_c_labels)]))
83
+ labels.append(values[n_f_cols:(n_f_cols+n_c_labels)])
93
84
 
94
85
  if not data_partition_column_values:
95
86
  # Partition column values is same for all rows. Hence, only read once.
@@ -119,10 +110,19 @@ while 1:
119
110
  if len(features) == 0:
120
111
  sys.exit(0)
121
112
 
113
+
114
+ model_name = model.__class__.__name__
115
+ np_func_list = ["MultiOutputClassifier", "GaussianMixture"]
116
+
117
+ if model_name in np_func_list:
118
+ features = np.array(features)
119
+
122
120
  if labels:
123
- val = getattr(model, func_name)(np.array(features), np.array(labels))
121
+ if model_name in np_func_list:
122
+ labels = np.array(labels)
123
+ val = getattr(model, func_name)(features, labels)
124
124
  else:
125
- val = getattr(model, func_name)(np.array(features))
125
+ val = getattr(model, func_name)(features)
126
126
 
127
- result_val = ['' if (val is None or math.isnan(val) or math.isinf(val)) else val]
128
- print(*(data_partition_column_values + result_val), sep=DELIMITER)
127
+ result_val = ['' if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))) else val]
128
+ print(*(data_partition_column_values + result_val), sep=DELIMITER)
@@ -4,41 +4,24 @@ import os
4
4
  import sys
5
5
  import numpy as np
6
6
 
7
- # The below import is needed to convert sparse matrix to dense array as sparse matrices are NOT
8
- # supported in Vantage.
9
- # This is in scipy 1.10.0. Might vary based on scipy version.
10
- from scipy.sparse import csr_matrix
11
-
12
7
  DELIMITER = '\t'
13
8
 
14
- def get_value(value):
15
- ret_val = value
16
- try:
17
- ret_val = float(value.replace(' ', ''))
18
- except Exception as ex:
19
- # If the value can't be converted to float, then it is string.
20
- pass
21
- return ret_val
22
-
23
-
24
- def get_values_list(values, ignore_none=True):
9
+ def get_values_list(values, types):
25
10
  ret_vals = []
26
- for val in values:
27
- if val == "" and ignore_none:
28
- # Empty cell value in the database table.
29
- continue
30
- ret_vals.append(get_value(val))
31
-
11
+ for i, val in enumerate(values):
12
+ ret_vals.append(convert_to_type(val, types[i]))
32
13
  return ret_vals
33
14
 
34
15
  def convert_to_type(val, typee):
35
16
  if typee == 'int':
36
- return int(val)
17
+ return int(val) if val != "" else np.nan
37
18
  if typee == 'float':
38
- return float(val)
19
+ if isinstance(val, str):
20
+ val = val.replace(' ', '')
21
+ return float(val) if val != "" else np.nan
39
22
  if typee == 'bool':
40
- return eval(val)
41
- return str(val)
23
+ return eval(val) if val != "" else None
24
+ return str(val) if val != "" else None
42
25
 
43
26
  def splitter(strr, delim=",", convert_to="str"):
44
27
  """
@@ -54,7 +37,7 @@ def get_output_data(trans_values, func_name, model_obj, n_c_labels):
54
37
  # supported in Vantage.
55
38
  module_name = model_obj.__module__.split("._")[0]
56
39
 
57
- if isinstance(trans_values, csr_matrix):
40
+ if type(trans_values).__name__ in ["csr_matrix", "csc_matrix"]:
58
41
  trans_values = trans_values.toarray()
59
42
 
60
43
  if module_name == "sklearn.cross_decomposition" and n_c_labels > 0 and func_name == "transform":
@@ -86,13 +69,13 @@ if len(sys.argv) != 8:
86
69
  # 3. No of feature columns.
87
70
  # 4. No of class labels.
88
71
  # 5. Comma separated indices of partition columns.
89
- # 6. Comma separated types of the partition columns.
72
+ # 6. Comma separated types of all the data columns.
90
73
  # 7. Model file prefix to generated model file using partition columns.
91
74
  # 8. Flag to check the system type. True, means Lake, Enterprise otherwise.
92
75
  sys.exit("8 arguments should be passed to this file - file to be run, function name, "\
93
- "no of feature columns, no of class labels, comma separated indices and types of "\
94
- "partition columns, model file prefix to generate model file using partition "\
95
- "columns and flag to check lake or enterprise.")
76
+ "no of feature columns, no of class labels, comma separated indices of partition "\
77
+ "columns, comma separated types of all columns, model file prefix to generate model "\
78
+ "file using partition columns and flag to check lake or enterprise.")
96
79
 
97
80
  is_lake_system = eval(sys.argv[7])
98
81
  if not is_lake_system:
@@ -100,18 +83,23 @@ if not is_lake_system:
100
83
  func_name = sys.argv[1]
101
84
  n_f_cols = int(sys.argv[2])
102
85
  n_c_labels = int(sys.argv[3])
103
- data_partition_column_types = splitter(sys.argv[5])
86
+ data_column_types = splitter(sys.argv[5], delim="--")
104
87
  data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
105
88
  model_file_prefix = sys.argv[6]
106
89
 
90
+ data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
91
+
107
92
  model = None
108
93
  data_partition_column_values = []
109
94
 
95
+ missing_indicator_input = []
96
+
110
97
  # Data Format:
111
98
  # feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
112
99
  # data_partition_columnn.
113
100
  # label is optional (it is present when label_exists is not "None")
114
101
 
102
+ model_name = ""
115
103
  while 1:
116
104
  try:
117
105
  line = input()
@@ -119,6 +107,7 @@ while 1:
119
107
  break
120
108
  else:
121
109
  values = line.split(DELIMITER)
110
+ values = get_values_list(values, data_column_types)
122
111
  if not data_partition_column_values:
123
112
  # Partition column values is same for all rows. Hence, only read once.
124
113
  for i, val in enumerate(data_partition_column_indices):
@@ -141,10 +130,34 @@ while 1:
141
130
  if not model:
142
131
  sys.exit("Model file is not installed in Vantage.")
143
132
 
144
- f_ = get_values_list(values[:n_f_cols])
133
+ f_ = values[:n_f_cols]
134
+
135
+ model_name = model.__class__.__name__
136
+ np_func_list = ["ClassifierChain", "EllipticEnvelope", "MinCovDet",
137
+ "FeatureAgglomeration", "LabelBinarizer", "MultiLabelBinarizer"]
138
+
139
+ # MissingIndicator requires processing the entire dataset simultaneously,
140
+ # rather than on a row-by-row basis.
141
+
142
+ # Error getting during row-by-row processing -
143
+ # "ValueError: MissingIndicator does not support data with dtype <U13.
144
+ # Please provide either a numeric array (with a floating point or
145
+ i# integer dtype) or categorical data represented ei
146
+ if model_name == "MissingIndicator" and func_name == "transform":
147
+ missing_indicator_input.append(f_)
148
+ continue
149
+
150
+ f__ = np.array([f_]) if model_name in np_func_list or \
151
+ (model_name == "SimpleImputer" and func_name == "inverse_transform")\
152
+ else [f_]
153
+
145
154
  if n_c_labels > 0:
146
155
  # Labels are present in last column.
147
- l_ = get_values_list(values[n_f_cols:n_f_cols+n_c_labels])
156
+ l_ = values[n_f_cols:n_f_cols+n_c_labels]
157
+
158
+ l__ = np.array([l_]) if model_name in np_func_list or \
159
+ (model_name == "SimpleImputer" and func_name == "inverse_transform")\
160
+ else [l_]
148
161
  # predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
149
162
  # in function call. Generally, 'y' is passed to return y along with actual output.
150
163
  try:
@@ -152,17 +165,17 @@ while 1:
152
165
  # used 'in' in if constion, as model.__module__ is giving
153
166
  # 'sklearn.cross_decomposition._pls'.
154
167
  if "cross_decomposition" in model.__module__:
155
- trans_values = getattr(model, func_name)(X=np.array([f_]), Y=np.array([l_]))
168
+ trans_values = getattr(model, func_name)(X=f__, Y=l__)
156
169
  else:
157
- trans_values = getattr(model, func_name)(X=np.array([f_]), y=np.array([l_]))
170
+ trans_values = getattr(model, func_name)(X=f__, y=l__)
158
171
 
159
172
  except TypeError as ex:
160
173
  # Function which does not accept 'y' like predict_proba() raises error like
161
174
  # "TypeError: predict_proba() takes 2 positional arguments but 3 were given".
162
- trans_values = getattr(model, func_name)(np.array([f_]))
175
+ trans_values = getattr(model, func_name)(f__)
163
176
  else:
164
177
  # If class labels do not exist in data, don't read labels, read just features.
165
- trans_values = getattr(model, func_name)(np.array([f_]))
178
+ trans_values = getattr(model, func_name)(f__)
166
179
 
167
180
  result_list = f_
168
181
  if n_c_labels > 0 and func_name in ["predict", "decision_function"]:
@@ -170,10 +183,40 @@ while 1:
170
183
  result_list += get_output_data(trans_values=trans_values, func_name=func_name,
171
184
  model_obj=model, n_c_labels=n_c_labels)
172
185
 
173
- print(*(data_partition_column_values +
174
- ['' if (val is None or math.isnan(val) or math.isinf(val))
175
- else val for val in result_list]),
176
- sep=DELIMITER)
186
+ for i, val in enumerate(result_list):
187
+ if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
188
+ result_list[i] = ""
189
+ # MissingIndicator returns boolean values. Convert them to 0/1.
190
+ elif val == False:
191
+ result_list[i] = 0
192
+ elif val == True:
193
+ result_list[i] = 1
194
+
195
+ print(*(data_partition_column_values + result_list), sep=DELIMITER)
177
196
 
178
197
  except EOFError: # Exit if reached EOF or CTRL-D
179
198
  break
199
+
200
+
201
+ # MissingIndicator needs processing of all the dataset at the same time, instead of row by row.
202
+ # Hence, handling it outside of the while loop
203
+ if model_name == "MissingIndicator" and func_name == "transform":
204
+ m_out = model.transform(missing_indicator_input)
205
+
206
+ for j, vals in enumerate(missing_indicator_input):
207
+
208
+ m_out_list = get_output_data(trans_values=m_out[j], func_name=func_name,
209
+ model_obj=model, n_c_labels=n_c_labels)
210
+
211
+ result_list = missing_indicator_input[j] + m_out_list
212
+
213
+ for i, val in enumerate(result_list):
214
+ if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
215
+ result_list[i] = ""
216
+ # MissingIndicator returns boolean values. Convert them to 0/1.
217
+ elif val == False:
218
+ result_list[i] = 0
219
+ elif val == True:
220
+ result_list[i] = 1
221
+
222
+ print(*(data_partition_column_values + result_list), sep=DELIMITER)
@@ -0,0 +1,8 @@
1
+ country,state,yr,qtr,sales,cogs,rating
2
+ USA,CA,2001,Q1,30,15,A
3
+ USA,NY,2001,Q1,45,25,D
4
+ USA,CA,2001,Q2,50,20,A
5
+ USA,CA,2001,Q2,5,5,B
6
+ Canada,ON,2001,Q2,10,0,B
7
+ Canada,BC,2001,Q3,15,0,A
8
+ Canada,BC,2001,Q3,10,0,A
@@ -2,7 +2,8 @@
2
2
  "env_specs": [
3
3
  {
4
4
  "env_name": "openml_env",
5
- "libs": "scikit-learn",
5
+ "libs": ["scikit-learn==1.5.1", "joblib==1.4.2", "numpy==2.0.0",
6
+ "scipy==1.14.0", "threadpoolctl==3.5.0"],
6
7
  "desc": "DONT DELETE: OpenML environment"
7
8
  }
8
9
  ]
@@ -1271,6 +1271,102 @@
1271
1271
  "height":"INTEGER",
1272
1272
  "weight":"INTEGER",
1273
1273
  "bmi":"INTEGER"
1274
+ },
1275
+ "breast_cancer":{
1276
+ "id":"BIGINT",
1277
+ "diagnosis":"VARCHAR(20)",
1278
+ "radius_mean":"FLOAT",
1279
+ "texture_mean":"FLOAT",
1280
+ "perimeter_mean":"FLOAT",
1281
+ "area_mean":"FLOAT",
1282
+ "smoothness_mean":"FLOAT",
1283
+ "compactness_mean":"FLOAT",
1284
+ "concavity_mean":"FLOAT",
1285
+ "concave_points_mean":"FLOAT",
1286
+ "symmetry_mean":"FLOAT",
1287
+ "fractal_dimension_mean":"FLOAT",
1288
+ "radius_se":"FLOAT",
1289
+ "texture_se":"FLOAT",
1290
+ "perimeter_se":"FLOAT",
1291
+ "area_se":"FLOAT",
1292
+ "smoothness_se":"FLOAT",
1293
+ "compactness_se":"FLOAT",
1294
+ "concavity_se":"FLOAT",
1295
+ "concave_points_se":"FLOAT",
1296
+ "symmetry_se":"FLOAT",
1297
+ "fractal_dimension_se":"FLOAT",
1298
+ "radius_worst":"FLOAT",
1299
+ "texture_worst":"FLOAT",
1300
+ "perimeter_worst":"FLOAT",
1301
+ "area_worst":"FLOAT",
1302
+ "smoothness_worst":"FLOAT",
1303
+ "compactness_worst":"FLOAT",
1304
+ "concavity_worst":"FLOAT",
1305
+ "concave_points_worst":"FLOAT",
1306
+ "symmetry_worst":"FLOAT",
1307
+ "fractal_dimension_worst":"FLOAT"
1308
+ },
1309
+ "bike_sharing" :{
1310
+ "instant":"BIGINT",
1311
+ "dteday":"DATE FORMAT 'dd-mm-yyyy'",
1312
+ "season":"BIGINT",
1313
+ "yr":"BIGINT",
1314
+ "mnth":"BIGINT",
1315
+ "holiday":"BIGINT",
1316
+ "weekday":"BIGINT",
1317
+ "workingday":"BIGINT",
1318
+ "weathersit":"BIGINT",
1319
+ "temp":"FLOAT",
1320
+ "atemp":"FLOAT",
1321
+ "hum":"FLOAT",
1322
+ "windspeed":"FLOAT",
1323
+ "casual":"BIGINT",
1324
+ "registered":"BIGINT",
1325
+ "cnt":"BIGINT"
1326
+ },
1327
+ "bank_marketing":{
1328
+ "age":"BIGINT",
1329
+ "job":"VARCHAR(20)",
1330
+ "marital":"VARCHAR(20)",
1331
+ "education":"VARCHAR(20)",
1332
+ "default_value":"VARCHAR(20)",
1333
+ "balance":"BIGINT",
1334
+ "housing":"VARCHAR(20)",
1335
+ "loan":"VARCHAR(20)",
1336
+ "contact":"VARCHAR(20)",
1337
+ "day_of_month":"BIGINT",
1338
+ "month_of_year":"VARCHAR(20)",
1339
+ "duration":"BIGINT",
1340
+ "campaign":"BIGINT",
1341
+ "pdays":"BIGINT",
1342
+ "previous":"BIGINT",
1343
+ "poutcome":"VARCHAR(20)",
1344
+ "deposit":"VARCHAR(20)"
1345
+ },
1346
+ "advertising":{
1347
+ "TV":"FLOAT",
1348
+ "radio":"FLOAT",
1349
+ "newspaper":"FLOAT",
1350
+ "sales":"FLOAT"
1351
+ },
1352
+ "timestamp_data":{
1353
+ "id": "INTEGER",
1354
+ "timestamp_col": "VARCHAR(50)",
1355
+ "timestamp_col1": "BIGINT",
1356
+ "format_col": "VARCHAR(50)",
1357
+ "timezone_col": "VARCHAR(50)"
1358
+ },
1359
+ "interval_data":{
1360
+ "id": "INTEGER",
1361
+ "int_col": "BIGINT",
1362
+ "value_col": "VARCHAR(30)",
1363
+ "value_col1": "VARCHAR(30)",
1364
+ "str_col1": "VARCHAR(30)",
1365
+ "str_col2": "VARCHAR(30)"
1366
+ },
1367
+ "url_data": {
1368
+ "id": "INTEGER",
1369
+ "urls": "VARCHAR(60)",
1370
+ "part": "VARCHAR(20)"
1274
1371
  }
1275
-
1276
1372
  }