teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -0,0 +1,101 @@
1
+ patient_id,record_timestamp,glucose,blood_pressure,insulin,diabetes_pedigree_function,outcome
2
+ 0,2024-04-10 11:10:59,148,72,0,0.627,1
3
+ 1,2024-04-10 11:10:59,85,66,0,0.351,0
4
+ 2,2024-04-10 11:10:59,183,64,0,0.672,1
5
+ 3,2024-04-10 11:10:59,89,66,94,0.167,0
6
+ 4,2024-04-10 11:10:59,137,40,168,2.288,1
7
+ 5,2024-04-10 11:10:59,116,74,0,0.201,0
8
+ 6,2024-04-10 11:10:59,78,50,88,0.248,1
9
+ 7,2024-04-10 11:10:59,115,0,0,0.134,0
10
+ 8,2024-04-10 11:10:59,197,70,543,0.158,1
11
+ 9,2024-04-10 11:10:59,125,96,0,0.232,1
12
+ 10,2024-04-10 11:10:59,110,92,0,0.191,0
13
+ 11,2024-04-10 11:10:59,168,74,0,0.537,1
14
+ 12,2024-04-10 11:10:59,139,80,0,1.441,0
15
+ 13,2024-04-10 11:10:59,189,60,846,0.398,1
16
+ 14,2024-04-10 11:10:59,166,72,175,0.587,1
17
+ 15,2024-04-10 11:10:59,100,0,0,0.484,1
18
+ 16,2024-04-10 11:10:59,118,84,230,0.551,1
19
+ 17,2024-04-10 11:10:59,107,74,0,0.254,1
20
+ 18,2024-04-10 11:10:59,103,30,83,0.183,0
21
+ 19,2024-04-10 11:10:59,115,70,96,0.529,1
22
+ 20,2024-04-10 11:10:59,126,88,235,0.704,0
23
+ 21,2024-04-10 11:10:59,99,84,0,0.388,0
24
+ 22,2024-04-10 11:10:59,196,90,0,0.451,1
25
+ 23,2024-04-10 11:10:59,119,80,0,0.263,1
26
+ 24,2024-04-10 11:10:59,143,94,146,0.254,1
27
+ 25,2024-04-10 11:10:59,125,70,115,0.205,1
28
+ 26,2024-04-10 11:10:59,147,76,0,0.257,1
29
+ 27,2024-04-10 11:10:59,97,66,140,0.487,0
30
+ 28,2024-04-10 11:10:59,145,82,110,0.245,0
31
+ 29,2024-04-10 11:10:59,117,92,0,0.337,0
32
+ 30,2024-04-10 11:10:59,109,75,0,0.546,0
33
+ 31,2024-04-10 11:10:59,158,76,245,0.851,1
34
+ 32,2024-04-10 11:10:59,88,58,54,0.267,0
35
+ 33,2024-04-10 11:10:59,92,92,0,0.188,0
36
+ 34,2024-04-10 11:10:59,122,78,0,0.512,0
37
+ 35,2024-04-10 11:10:59,103,60,192,0.966,0
38
+ 36,2024-04-10 11:10:59,138,76,0,0.42,0
39
+ 37,2024-04-10 11:10:59,102,76,0,0.665,1
40
+ 38,2024-04-10 11:10:59,90,68,0,0.503,1
41
+ 39,2024-04-10 11:10:59,111,72,207,1.39,1
42
+ 40,2024-04-10 11:10:59,180,64,70,0.271,0
43
+ 41,2024-04-10 11:10:59,133,84,0,0.696,0
44
+ 42,2024-04-10 11:10:59,106,92,0,0.235,0
45
+ 43,2024-04-10 11:10:59,171,110,240,0.721,1
46
+ 44,2024-04-10 11:10:59,159,64,0,0.294,0
47
+ 45,2024-04-10 11:10:59,180,66,0,1.893,1
48
+ 46,2024-04-10 11:10:59,146,56,0,0.564,0
49
+ 47,2024-04-10 11:10:59,71,70,0,0.586,0
50
+ 48,2024-04-10 11:10:59,103,66,0,0.344,1
51
+ 49,2024-04-10 11:10:59,105,0,0,0.305,0
52
+ 50,2024-04-10 11:10:59,103,80,82,0.491,0
53
+ 51,2024-04-10 11:10:59,101,50,36,0.526,0
54
+ 52,2024-04-10 11:10:59,88,66,23,0.342,0
55
+ 53,2024-04-10 11:10:59,176,90,300,0.467,1
56
+ 54,2024-04-10 11:10:59,150,66,342,0.718,0
57
+ 55,2024-04-10 11:10:59,73,50,0,0.248,0
58
+ 56,2024-04-10 11:10:59,187,68,304,0.254,1
59
+ 57,2024-04-10 11:10:59,100,88,110,0.962,0
60
+ 58,2024-04-10 11:10:59,146,82,0,1.781,0
61
+ 59,2024-04-10 11:10:59,105,64,142,0.173,0
62
+ 60,2024-04-10 11:10:59,84,0,0,0.304,0
63
+ 61,2024-04-10 11:10:59,133,72,0,0.27,1
64
+ 62,2024-04-10 11:10:59,44,62,0,0.587,0
65
+ 63,2024-04-10 11:10:59,141,58,128,0.699,0
66
+ 64,2024-04-10 11:10:59,114,66,0,0.258,1
67
+ 65,2024-04-10 11:10:59,99,74,0,0.203,0
68
+ 66,2024-04-10 11:10:59,109,88,0,0.855,1
69
+ 67,2024-04-10 11:10:59,109,92,0,0.845,0
70
+ 68,2024-04-10 11:10:59,95,66,38,0.334,0
71
+ 69,2024-04-10 11:10:59,146,85,100,0.189,0
72
+ 70,2024-04-10 11:10:59,100,66,90,0.867,1
73
+ 71,2024-04-10 11:10:59,139,64,140,0.411,0
74
+ 72,2024-04-10 11:10:59,126,90,0,0.583,1
75
+ 73,2024-04-10 11:10:59,129,86,270,0.231,0
76
+ 74,2024-04-10 11:10:59,79,75,0,0.396,0
77
+ 75,2024-04-10 11:10:59,0,48,0,0.14,0
78
+ 76,2024-04-10 11:10:59,62,78,0,0.391,0
79
+ 77,2024-04-10 11:10:59,95,72,0,0.37,0
80
+ 78,2024-04-10 11:10:59,131,0,0,0.27,1
81
+ 79,2024-04-10 11:10:59,112,66,0,0.307,0
82
+ 80,2024-04-10 11:10:59,113,44,0,0.14,0
83
+ 81,2024-04-10 11:10:59,74,0,0,0.102,0
84
+ 82,2024-04-10 11:10:59,83,78,71,0.767,0
85
+ 83,2024-04-10 11:10:59,101,65,0,0.237,0
86
+ 84,2024-04-10 11:10:59,137,108,0,0.227,1
87
+ 85,2024-04-10 11:10:59,110,74,125,0.698,0
88
+ 86,2024-04-10 11:10:59,106,72,0,0.178,0
89
+ 87,2024-04-10 11:10:59,100,68,71,0.324,0
90
+ 88,2024-04-10 11:10:59,136,70,110,0.153,1
91
+ 89,2024-04-10 11:10:59,107,68,0,0.165,0
92
+ 90,2024-04-10 11:10:59,80,55,0,0.258,0
93
+ 91,2024-04-10 11:10:59,123,80,176,0.443,0
94
+ 92,2024-04-10 11:10:59,81,78,48,0.261,0
95
+ 93,2024-04-10 11:10:59,134,72,0,0.277,1
96
+ 94,2024-04-10 11:10:59,142,82,64,0.761,0
97
+ 95,2024-04-10 11:10:59,144,72,228,0.255,0
98
+ 96,2024-04-10 11:10:59,92,62,0,0.13,0
99
+ 97,2024-04-10 11:10:59,71,48,76,0.323,0
100
+ 98,2024-04-10 11:10:59,93,50,64,0.356,0
101
+ 99,2024-04-10 11:10:59,122,90,220,0.325,1
@@ -1,7 +1,7 @@
1
1
  {
2
2
 
3
3
  "token_table" :{
4
- "doc_id" : "varchar(30)",
4
+ "doc_id" : "INTEGER",
5
5
  "token" : "varchar(30)",
6
6
  "category" : "varchar(30)"
7
7
  }
@@ -17,5 +17,16 @@
17
17
  "doc_id" : "integer",
18
18
  "text_data" : "varchar(3000)",
19
19
  "category" : "varchar(20)"
20
+ },
21
+ "complaints_test_tokenized":{
22
+ "doc_id": "integer",
23
+ "doc_name": "varchar(30)",
24
+ "sn": "integer",
25
+ "token": "varchar(1024) CHARACTER SET UNICODE NOT CASESPECIFIC"
26
+ },
27
+ "complaints_tokens_model": {
28
+ "token": "varchar(100)",
29
+ "category": "varchar(100)",
30
+ "prob": "double precision"
20
31
  }
21
32
  }
@@ -0,0 +1,101 @@
1
+ patient_id,record_timestamp,pregnancies,age,bmi,skin_thickness
2
+ 0,2024-04-10 11:10:59,6,50,33.6,35
3
+ 1,2024-04-10 11:10:59,1,31,26.6,29
4
+ 2,2024-04-10 11:10:59,8,32,23.3,0
5
+ 3,2024-04-10 11:10:59,1,21,28.1,23
6
+ 4,2024-04-10 11:10:59,0,33,43.1,35
7
+ 5,2024-04-10 11:10:59,5,30,25.6,0
8
+ 6,2024-04-10 11:10:59,3,26,31.0,32
9
+ 7,2024-04-10 11:10:59,10,29,35.3,0
10
+ 8,2024-04-10 11:10:59,2,53,30.5,45
11
+ 9,2024-04-10 11:10:59,8,54,0.0,0
12
+ 10,2024-04-10 11:10:59,4,30,37.6,0
13
+ 11,2024-04-10 11:10:59,10,34,38.0,0
14
+ 12,2024-04-10 11:10:59,10,57,27.1,0
15
+ 13,2024-04-10 11:10:59,1,59,30.1,23
16
+ 14,2024-04-10 11:10:59,5,51,25.8,19
17
+ 15,2024-04-10 11:10:59,7,32,30.0,0
18
+ 16,2024-04-10 11:10:59,0,31,45.8,47
19
+ 17,2024-04-10 11:10:59,7,31,29.6,0
20
+ 18,2024-04-10 11:10:59,1,33,43.3,38
21
+ 19,2024-04-10 11:10:59,1,32,34.6,30
22
+ 20,2024-04-10 11:10:59,3,27,39.3,41
23
+ 21,2024-04-10 11:10:59,8,50,35.4,0
24
+ 22,2024-04-10 11:10:59,7,41,39.8,0
25
+ 23,2024-04-10 11:10:59,9,29,29.0,35
26
+ 24,2024-04-10 11:10:59,11,51,36.6,33
27
+ 25,2024-04-10 11:10:59,10,41,31.1,26
28
+ 26,2024-04-10 11:10:59,7,43,39.4,0
29
+ 27,2024-04-10 11:10:59,1,22,23.2,15
30
+ 28,2024-04-10 11:10:59,13,57,22.2,19
31
+ 29,2024-04-10 11:10:59,5,38,34.1,0
32
+ 30,2024-04-10 11:10:59,5,60,36.0,26
33
+ 31,2024-04-10 11:10:59,3,28,31.6,36
34
+ 32,2024-04-10 11:10:59,3,22,24.8,11
35
+ 33,2024-04-10 11:10:59,6,28,19.9,0
36
+ 34,2024-04-10 11:10:59,10,45,27.6,31
37
+ 35,2024-04-10 11:10:59,4,33,24.0,33
38
+ 36,2024-04-10 11:10:59,11,35,33.2,0
39
+ 37,2024-04-10 11:10:59,9,46,32.9,37
40
+ 38,2024-04-10 11:10:59,2,27,38.2,42
41
+ 39,2024-04-10 11:10:59,4,56,37.1,47
42
+ 40,2024-04-10 11:10:59,3,26,34.0,25
43
+ 41,2024-04-10 11:10:59,7,37,40.2,0
44
+ 42,2024-04-10 11:10:59,7,48,22.7,18
45
+ 43,2024-04-10 11:10:59,9,54,45.4,24
46
+ 44,2024-04-10 11:10:59,7,40,27.4,0
47
+ 45,2024-04-10 11:10:59,0,25,42.0,39
48
+ 46,2024-04-10 11:10:59,1,29,29.7,0
49
+ 47,2024-04-10 11:10:59,2,22,28.0,27
50
+ 48,2024-04-10 11:10:59,7,31,39.1,32
51
+ 49,2024-04-10 11:10:59,7,24,0.0,0
52
+ 50,2024-04-10 11:10:59,1,22,19.4,11
53
+ 51,2024-04-10 11:10:59,1,26,24.2,15
54
+ 52,2024-04-10 11:10:59,5,30,24.4,21
55
+ 53,2024-04-10 11:10:59,8,58,33.7,34
56
+ 54,2024-04-10 11:10:59,7,42,34.7,42
57
+ 55,2024-04-10 11:10:59,1,21,23.0,10
58
+ 56,2024-04-10 11:10:59,7,41,37.7,39
59
+ 57,2024-04-10 11:10:59,0,31,46.8,60
60
+ 58,2024-04-10 11:10:59,0,44,40.5,0
61
+ 59,2024-04-10 11:10:59,0,22,41.5,41
62
+ 60,2024-04-10 11:10:59,2,21,0.0,0
63
+ 61,2024-04-10 11:10:59,8,39,32.9,0
64
+ 62,2024-04-10 11:10:59,5,36,25.0,0
65
+ 63,2024-04-10 11:10:59,2,24,25.4,34
66
+ 64,2024-04-10 11:10:59,7,42,32.8,0
67
+ 65,2024-04-10 11:10:59,5,32,29.0,27
68
+ 66,2024-04-10 11:10:59,0,38,32.5,30
69
+ 67,2024-04-10 11:10:59,2,54,42.7,0
70
+ 68,2024-04-10 11:10:59,1,25,19.6,13
71
+ 69,2024-04-10 11:10:59,4,27,28.9,27
72
+ 70,2024-04-10 11:10:59,2,28,32.9,20
73
+ 71,2024-04-10 11:10:59,5,26,28.6,35
74
+ 72,2024-04-10 11:10:59,13,42,43.4,0
75
+ 73,2024-04-10 11:10:59,4,23,35.1,20
76
+ 74,2024-04-10 11:10:59,1,22,32.0,30
77
+ 75,2024-04-10 11:10:59,1,22,24.7,20
78
+ 76,2024-04-10 11:10:59,7,41,32.6,0
79
+ 77,2024-04-10 11:10:59,5,27,37.7,33
80
+ 78,2024-04-10 11:10:59,0,26,43.2,0
81
+ 79,2024-04-10 11:10:59,2,24,25.0,22
82
+ 80,2024-04-10 11:10:59,3,22,22.4,13
83
+ 81,2024-04-10 11:10:59,2,22,0.0,0
84
+ 82,2024-04-10 11:10:59,7,36,29.3,26
85
+ 83,2024-04-10 11:10:59,0,22,24.6,28
86
+ 84,2024-04-10 11:10:59,5,37,48.8,0
87
+ 85,2024-04-10 11:10:59,2,27,32.4,29
88
+ 86,2024-04-10 11:10:59,13,45,36.6,54
89
+ 87,2024-04-10 11:10:59,2,26,38.5,25
90
+ 88,2024-04-10 11:10:59,15,43,37.1,32
91
+ 89,2024-04-10 11:10:59,1,24,26.5,19
92
+ 90,2024-04-10 11:10:59,1,21,19.1,0
93
+ 91,2024-04-10 11:10:59,4,34,32.0,15
94
+ 92,2024-04-10 11:10:59,7,42,46.7,40
95
+ 93,2024-04-10 11:10:59,4,60,23.8,0
96
+ 94,2024-04-10 11:10:59,2,21,24.7,18
97
+ 95,2024-04-10 11:10:59,6,40,33.9,27
98
+ 96,2024-04-10 11:10:59,2,24,31.6,28
99
+ 97,2024-04-10 11:10:59,1,22,20.4,18
100
+ 98,2024-04-10 11:10:59,6,23,28.7,30
101
+ 99,2024-04-10 11:10:59,1,31,49.7,51
Binary file
@@ -0,0 +1,14 @@
1
+ "TD_TIMECODE","id","val"
2
+ 2020-01-01 08:00:00,33,1.2e+02
3
+ 2020-02-01 08:00:00,33,1.95e+02
4
+ 2020-03-01 08:00:00,33,8e+02
5
+ 2020-04-01 08:00:00,33,6.6e+01
6
+ 2020-05-01 08:00:00,33,1.44e+02
7
+ 2020-06-01 08:00:00,33,2.1e+04
8
+ 2020-07-01 08:00:00,33,3.2e+02
9
+ 2020-08-01 08:00:00,33,1.44e+02
10
+ 2020-09-01 08:00:00,33,2.2e+02
11
+ 2020-10-01 08:00:00,33,2.1e+02
12
+ 2020-11-01 08:00:00,33,1.34e+02
13
+ 2020-12-01 08:00:00,33,1.84e+02
14
+ 2020-12-02 08:00:00,33,1.98e+02
@@ -5,5 +5,13 @@
5
5
  "expenditure": "integer",
6
6
  "income": "integer",
7
7
  "investment": "integer"
8
+ },
9
+ "finance_data4":{
10
+ "id": "integer",
11
+ "period": "integer",
12
+ "expenditure": "float",
13
+ "income": "float",
14
+ "investment": "float"
15
+
8
16
  }
9
17
  }
@@ -60,7 +60,7 @@ if not len(features):
60
60
  sys.exit(0)
61
61
 
62
62
  X = np.array(features)
63
- y = np.array(labels)
63
+ y = np.array(labels).ravel()
64
64
 
65
65
  clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
66
66
  clf.fit(X, y)
@@ -0,0 +1,157 @@
1
+ import pandas as pd
2
+ import pickle
3
+ import json
4
+ import numpy as np
5
+ import ast
6
+ import sys
7
+ from collections import OrderedDict
8
+ import base64
9
+ from importlib import import_module
10
+ import sys
11
+
12
+ DELIMITER = "\t"
13
+
14
+ def convert_to_type(val, typee):
15
+ if typee == 'int':
16
+ return int(val) if val != "" else np.nan
17
+ if typee == 'float':
18
+ if isinstance(val, str):
19
+ val = val.replace(' ', '')
20
+ return float(val) if val != "" else np.nan
21
+ if typee == 'bool':
22
+ return eval(val) if val != "" else None
23
+ return str(val) if val != "" else None
24
+
25
+ def splitter(strr, delim=",", convert_to="str"):
26
+ """
27
+ Split the string based on delimiter and convert to the type specified.
28
+ """
29
+ if strr == "None":
30
+ return []
31
+ return [convert_to_type(i, convert_to) for i in strr.split(delim)]
32
+
33
+
34
+ is_lake_system = eval(sys.argv[2])
35
+ model_file_prefix = sys.argv[1]
36
+ if not is_lake_system:
37
+ db = sys.argv[0].split("/")[1]
38
+
39
+ ### Start of data related arguments processing
40
+ data_partition_column_values = []
41
+ data_present = False
42
+ partition_join = ""
43
+ model = None
44
+
45
+ # Data related arguments information of indices and types.
46
+ data_args_indices_types = OrderedDict()
47
+
48
+ func_name = <func_name>
49
+ module_name = <module_name>
50
+ class_name = <class_name>
51
+ all_col_names = <all_col_names>
52
+ all_col_types = <types_of_data_cols>
53
+ data_partition_column_indices = <partition_cols_indices>
54
+ data_partition_column_types = [all_col_types[idx] for idx in data_partition_column_indices]
55
+
56
+ # Data related arguments values - prepare dictionary and populate data later.
57
+ data_args_values = {}
58
+
59
+ data_args_info_str = <data_args_info_str>
60
+
61
+ for data_arg in data_args_info_str.split("--"):
62
+ _arg_name, _indices, _types = data_arg.split("-")
63
+ _indices = splitter(_indices, convert_to="int")
64
+ types = [type_ for idx, type_ in enumerate(all_col_types) if idx in _indices]
65
+
66
+ data_args_indices_types[_arg_name] = {"indices": _indices, "types": types}
67
+ data_args_values[_arg_name] = [] # Keeping empty for each data arg name and populate data later.
68
+
69
+ ### End of data related arguments processing
70
+
71
+
72
+ ### Start of other arguments processing
73
+ params = json.loads('<params>')
74
+ ### End of other arguments processing
75
+
76
+
77
+ # Read data - columns information is passed as command line argument and stored in
78
+ # data_args_indices_types dictionary.
79
+ while 1:
80
+ try:
81
+ line = input()
82
+ if line == '': # Exit if user provides blank line
83
+ break
84
+ else:
85
+ data_present = True
86
+ values = line.split(DELIMITER)
87
+ if not data_partition_column_values:
88
+ # Partition column values is same for all rows. Hence, only read once.
89
+ for i, val in enumerate(data_partition_column_indices):
90
+ data_partition_column_values.append(
91
+ convert_to_type(values[val], typee=data_partition_column_types[i])
92
+ )
93
+
94
+ # Prepare the corresponding model file name and extract model.
95
+ partition_join = "_".join([str(x) for x in data_partition_column_values])
96
+ # Replace '-' with '_' as '-' because partition_columns can be negative.
97
+ partition_join = partition_join.replace("-", "_")
98
+
99
+ model_file_path = f"{model_file_prefix}_{partition_join}"\
100
+ if is_lake_system else \
101
+ f"./{db}/{model_file_prefix}_{partition_join}"
102
+
103
+ # Prepare data dictionary containing only arguments related to data.
104
+ for arg_name in data_args_values:
105
+ data_indices = data_args_indices_types[arg_name]["indices"]
106
+ types = data_args_indices_types[arg_name]["types"]
107
+ cur_row = []
108
+ for idx, data_idx in enumerate(data_indices):
109
+ cur_row.append(convert_to_type(values[data_idx], types[idx]))
110
+ data_args_values[arg_name].append(cur_row)
111
+ except EOFError: # Exit if reached EOF or CTRL-D
112
+ break
113
+
114
+ if not data_present:
115
+ sys.exit(0)
116
+
117
+ for key, value in data_args_values.items():
118
+ col_names = [all_col_names[idx] for idx in data_args_indices_types[key]["indices"]]
119
+ data_args_values[key] = pd.DataFrame(value, columns=col_names)
120
+
121
+ # If reference argument (is a Dataset object) present in params, then it contains
122
+ # the prefix of the file path which contains the reference Dataset object.
123
+ if "reference" in params.keys() and params["reference"] is not None:
124
+ reference_dataset_file_prefix = params["reference"]
125
+ reference_arg_file_path = f"{reference_dataset_file_prefix}_{partition_join}"\
126
+ if is_lake_system else \
127
+ f"./{db}/{reference_dataset_file_prefix}_{partition_join}"
128
+ with open(reference_arg_file_path, "rb") as f:
129
+ params["reference"] = pickle.load(f)
130
+
131
+ if not func_name:
132
+ # Create DataSet object if no function of Dataset class is called.
133
+ lib = import_module(module_name)
134
+ class_instance = getattr(lib, class_name)
135
+ obj = class_instance(**{**data_args_values, **params})
136
+ else:
137
+ # If function of Dataset object is called, then call the function on model object.
138
+ with open(model_file_path, "rb") as fp:
139
+ model = pickle.loads(fp.read())
140
+
141
+ if not model:
142
+ sys.exit("Model file is not installed in Vantage.")
143
+
144
+ obj = getattr(model, func_name)(**{**data_args_values, **params})
145
+
146
+ model_str = pickle.dumps(obj)
147
+
148
+ if is_lake_system:
149
+ model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
150
+
151
+ # Save DataSet object to binary file
152
+ with open(model_file_path, "wb") as f:
153
+ f.write(model_str)
154
+
155
+ model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
156
+
157
+ print(*(data_partition_column_values + [model_data]), sep=DELIMITER)
@@ -0,0 +1,247 @@
1
+ import base64
2
+ import io
3
+ import math
4
+ import os
5
+ import pickle
6
+ import sys
7
+
8
+ import numpy as np
9
+
10
+ DELIMITER = '\t'
11
+
12
+ def get_values_list(values, types):
13
+ ret_vals = []
14
+ for i, val in enumerate(values):
15
+ ret_vals.append(convert_to_type(val, types[i]))
16
+ return ret_vals
17
+
18
+ def convert_to_type(val, typee):
19
+ if typee == 'int':
20
+ return int(val) if val != "" else np.nan
21
+ if typee == 'float':
22
+ if isinstance(val, str):
23
+ val = val.replace(' ', '')
24
+ return float(val) if val != "" else np.nan
25
+ if typee == 'bool':
26
+ return eval(val) if val != "" else None
27
+ return str(val) if val != "" else None
28
+
29
+ def splitter(strr, delim=",", convert_to="str"):
30
+ """
31
+ Split the string based on delimiter and convert to the type specified.
32
+ """
33
+ if strr == "None":
34
+ return []
35
+ return [convert_to_type(i, convert_to) for i in strr.split(delim)]
36
+
37
+ def should_convert(t_val, py_type):
38
+ """
39
+ Function to check type of value and whether value is nan and infinity.
40
+ """
41
+ return not isinstance(t_val, eval(py_type)) and not math.isinf(t_val) and not math.isnan(t_val)
42
+
43
+ def convert_value(t_val, py_type):
44
+ """
45
+ Function to convert value to specified python type.
46
+ """
47
+ return convert_to_type(t_val, py_type) if should_convert(t_val, py_type) else t_val
48
+
49
+ # Process output returned by sklearn function.
50
+ def get_output_data(trans_values, func_name, n_c_labels, n_out_columns):
51
+ # Converting sparse matrix to dense array as sparse matrices are NOT
52
+ # supported in Vantage.
53
+ # module_name = model_obj.__module__.split("._")[0]
54
+
55
+ # Converting the translated values into corresponding the return column's
56
+ # python type.
57
+ if (return_columns_python_types is None or not isinstance(trans_values, np.ndarray)):
58
+ trans_values_list = trans_values
59
+ else:
60
+ # Conversion.
61
+ trans_values_list = []
62
+ for trans_value in trans_values.tolist():
63
+ if not isinstance(trans_value, list):
64
+ trans_value = [trans_value]
65
+
66
+ converted_list = []
67
+ if len(return_columns_python_types) == len(trans_value):
68
+ for t_val, py_type in zip(trans_value, return_columns_python_types):
69
+ converted_list.append(convert_value(t_val, py_type))
70
+ ## transform() is having only 1 python return type, But it actually returns more than 1 column.
71
+ else:
72
+ for t_val in trans_value:
73
+ converted_list.append(convert_value(t_val, return_columns_python_types[0]))
74
+
75
+ trans_values_list.append(converted_list)
76
+
77
+ if type(trans_values_list).__name__ in ["csr_matrix", "csc_matrix"]:
78
+ trans_values_list = trans_values_list.toarray()
79
+
80
+ if isinstance(trans_values_list[0], np.ndarray) \
81
+ or isinstance(trans_values_list[0], list) \
82
+ or isinstance(trans_values_list[0], tuple):
83
+ # Here, the value returned by sklearn function is list type.
84
+ opt_list = list(trans_values_list[0])
85
+
86
+ if len(opt_list) < n_out_columns:
87
+ # If the output list is less than the required number of columns, append
88
+ # empty strings to the list.
89
+ opt_list += [""] * (n_out_columns - len(opt_list))
90
+
91
+ return opt_list
92
+
93
+ # Only one element is returned by the function.
94
+ return [trans_values_list[0]]
95
+
96
+ # Arguments to the Script
97
+ if len(sys.argv) != 10:
98
+ # 10 arguments command line arguments should be passed to this file.
99
+ # 1: file to be run
100
+ # 2. function name (Eg. predict, fit etc)
101
+ # 3. No of feature columns.
102
+ # 4. No of class labels.
103
+ # 5. Comma separated indices of partition columns.
104
+ # 6. Comma separated types of all the data columns.
105
+ # 7. Model file prefix to generated model file using partition columns.
106
+ # 8. Number of columns to be returned by the sklearn's transform function.
107
+ # 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
108
+ # 10. Python types of returned/transfromed columns.
109
+ sys.exit("10 arguments should be passed to this file - file to be run, function name, "\
110
+ "no of feature columns, no of class labels, comma separated indices of partition "\
111
+ "columns, comma separated types of all columns, model file prefix to generate model "\
112
+ "file using partition columns, number of columns to be returnd by sklearn's "\
113
+ "transform function, flag to check lake or enterprise and Python types of "\
114
+ "returned/transfromed columns.")
115
+
116
+ is_lake_system = eval(sys.argv[8])
117
+ if not is_lake_system:
118
+ db = sys.argv[0].split("/")[1]
119
+ func_name = sys.argv[1]
120
+ n_f_cols = int(sys.argv[2])
121
+ n_c_labels = int(sys.argv[3])
122
+ data_column_types = splitter(sys.argv[5], delim="--")
123
+ data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
124
+ model_file_prefix = sys.argv[6]
125
+ # sys.argv[9] will contain a string of python datatypes with '--'
126
+ # separator OR a single datatype OR None in string format.
127
+ ret_col_argv = sys.argv[9]
128
+ if ret_col_argv == "None":
129
+ return_columns_python_types = eval(ret_col_argv)
130
+ else:
131
+ return_columns_python_types = splitter(ret_col_argv, delim="--")
132
+
133
+ no_of_output_columns = int(sys.argv[7])
134
+
135
+ data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
136
+
137
+ model = None
138
+ data_partition_column_values = []
139
+
140
+ all_x_rows = []
141
+ all_y_rows = []
142
+
143
+ # Data Format:
144
+ # feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
145
+ # data_partition_columnn.
146
+ # label is optional (it is present when label_exists is not "None")
147
+
148
+ model_name = ""
149
+ while 1:
150
+ try:
151
+ line = input()
152
+ if line == '': # Exit if user provides blank line
153
+ break
154
+ else:
155
+ values = line.split(DELIMITER)
156
+ values = get_values_list(values, data_column_types)
157
+ if not data_partition_column_values:
158
+ # Partition column values is same for all rows. Hence, only read once.
159
+ for i, val in enumerate(data_partition_column_indices):
160
+ data_partition_column_values.append(
161
+ convert_to_type(values[val], typee=data_partition_column_types[i])
162
+ )
163
+
164
+ # Prepare the corresponding model file name and extract model.
165
+ partition_join = "_".join([str(x) for x in data_partition_column_values])
166
+ # Replace '-' with '_' as '-' because partition_columns can be negative.
167
+ partition_join = partition_join.replace("-", "_")
168
+
169
+ model_file_path = f"{model_file_prefix}_{partition_join}" \
170
+ if is_lake_system else \
171
+ f"./{db}/{model_file_prefix}_{partition_join}"
172
+
173
+ with open(model_file_path, "rb") as fp:
174
+ model = pickle.loads(fp.read())
175
+
176
+ if not model:
177
+ sys.exit("Model file is not installed in Vantage.")
178
+
179
+ f_ = values[:n_f_cols]
180
+ f__ = np.array([f_])
181
+
182
+ if n_c_labels > 0:
183
+ l_ = values[n_f_cols:n_f_cols+n_c_labels]
184
+ l__ = np.array([l_])
185
+
186
+ if func_name == "refit":
187
+ # refit() needs all data at once. Hence, read all data at once and call refit().
188
+ all_x_rows.append(f_)
189
+ all_y_rows.append(l_)
190
+ continue
191
+
192
+ # Because `predict` function does not accept 'y' as input, we need to handle it separately.
193
+ if n_c_labels > 0 and func_name not in ["predict"]:
194
+ # Labels are present in last column.
195
+ trans_values = getattr(model, func_name)(f__, l__, **params)
196
+ else:
197
+ # If class labels do not exist in data, don't read labels, read just features.
198
+ trans_values = getattr(model, func_name)(f__, **params)
199
+
200
+ result_list = f_
201
+ if n_c_labels > 0 and func_name in ["predict", "decision_function"]:
202
+ result_list += l_
203
+ result_list += get_output_data(trans_values=trans_values, func_name=func_name,
204
+ n_c_labels=n_c_labels, n_out_columns=no_of_output_columns)
205
+
206
+ for i, val in enumerate(result_list):
207
+ if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
208
+ result_list[i] = ""
209
+ elif val == False:
210
+ result_list[i] = 0
211
+ elif val == True:
212
+ result_list[i] = 1
213
+
214
+ print(*(data_partition_column_values + result_list), sep=DELIMITER)
215
+
216
+ except EOFError: # Exit if reached EOF or CTRL-D
217
+ break
218
+
219
+
220
+ if func_name == "refit":
221
+ result = ""
222
+ stdout = None
223
+ try:
224
+ stdout = sys.stdout
225
+ new_stdout = io.StringIO()
226
+ sys.stdout = new_stdout
227
+ trained_model = getattr(model, func_name)(all_x_rows, all_y_rows, **params)
228
+ result = new_stdout.getvalue()
229
+ except Exception:
230
+ raise
231
+ finally:
232
+ sys.stdout = stdout
233
+
234
+ model_str = pickle.dumps(trained_model)
235
+
236
+
237
+ if is_lake_system:
238
+ model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
239
+
240
+ # Write to trained model file in Vantage.
241
+ with open(model_file_path, "wb") as fp:
242
+ fp.write(model_str)
243
+
244
+ model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
245
+ console_output = base64.b64encode(result.encode())
246
+
247
+ print(*(data_partition_column_values + [model_data, console_output]), sep="..")