teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +196 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +79 -4
  6. teradataml/analytics/json_parser/metadata.py +12 -3
  7. teradataml/analytics/json_parser/utils.py +7 -2
  8. teradataml/analytics/sqle/__init__.py +1 -0
  9. teradataml/analytics/table_operator/__init__.py +1 -1
  10. teradataml/analytics/uaf/__init__.py +1 -1
  11. teradataml/analytics/utils.py +4 -0
  12. teradataml/automl/data_preparation.py +3 -2
  13. teradataml/automl/feature_engineering.py +15 -7
  14. teradataml/automl/model_training.py +39 -33
  15. teradataml/common/__init__.py +2 -1
  16. teradataml/common/constants.py +35 -0
  17. teradataml/common/garbagecollector.py +2 -1
  18. teradataml/common/messagecodes.py +8 -2
  19. teradataml/common/messages.py +3 -1
  20. teradataml/common/sqlbundle.py +25 -3
  21. teradataml/common/utils.py +134 -9
  22. teradataml/context/context.py +20 -10
  23. teradataml/data/SQL_Fundamentals.pdf +0 -0
  24. teradataml/data/dataframe_example.json +18 -2
  25. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  26. teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
  27. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  29. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  30. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  31. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  32. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  33. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  34. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  35. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  36. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  37. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  38. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  39. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  40. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  41. teradataml/data/medical_readings.csv +101 -0
  42. teradataml/data/patient_profile.csv +101 -0
  43. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  44. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  45. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  46. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  47. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  48. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  49. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  50. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  51. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  52. teradataml/data/target_udt_data.csv +8 -0
  53. teradataml/data/templates/open_source_ml.json +3 -2
  54. teradataml/data/vectordistance_example.json +4 -0
  55. teradataml/dataframe/dataframe.py +543 -175
  56. teradataml/dataframe/functions.py +553 -25
  57. teradataml/dataframe/sql.py +184 -15
  58. teradataml/dbutils/dbutils.py +556 -18
  59. teradataml/dbutils/filemgr.py +48 -1
  60. teradataml/lib/aed_0_1.dll +0 -0
  61. teradataml/opensource/__init__.py +1 -1
  62. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  63. teradataml/opensource/_lightgbm.py +950 -0
  64. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  65. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  66. teradataml/opensource/sklearn/__init__.py +0 -1
  67. teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
  68. teradataml/options/__init__.py +7 -23
  69. teradataml/options/configure.py +29 -3
  70. teradataml/scriptmgmt/UserEnv.py +3 -3
  71. teradataml/scriptmgmt/lls_utils.py +74 -21
  72. teradataml/store/__init__.py +13 -0
  73. teradataml/store/feature_store/__init__.py +0 -0
  74. teradataml/store/feature_store/constants.py +291 -0
  75. teradataml/store/feature_store/feature_store.py +2223 -0
  76. teradataml/store/feature_store/models.py +1505 -0
  77. teradataml/store/vector_store/__init__.py +1586 -0
  78. teradataml/table_operators/query_generator.py +3 -0
  79. teradataml/table_operators/table_operator_query_generator.py +3 -1
  80. teradataml/table_operators/table_operator_util.py +37 -38
  81. teradataml/table_operators/templates/dataframe_register.template +69 -0
  82. teradataml/utils/dtypes.py +4 -2
  83. teradataml/utils/validators.py +33 -1
  84. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
  85. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
  86. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  87. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  88. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -11,7 +11,7 @@
11
11
  "MaxInputFiles": 1,
12
12
  "Input": [
13
13
  {
14
- "Type": "SERIES",
14
+ "Type": ["SERIES", "ART"],
15
15
  "Description": [
16
16
  "This section outlines the syntax associated with invoking the TD_DICKEY_FULLER function. The function takes in a single logical-runtime series as an input. The series is only permitted to have univariate elements. "
17
17
  ],
@@ -47,19 +47,15 @@
47
47
  "PermittedValues": [
48
48
  "NONE",
49
49
  "DRIFT",
50
- "TREND",
51
- "DRIFTNTREND",
52
- "FORMULA"
50
+ "SQUARED",
51
+ "DRIFTNTREND"
53
52
  ],
54
53
  "Description": [
55
54
  "An enumerated type with values of: NONE, DRIFT, TREND, DRIFTNTREND, or FORMULA, which influences the type of regression that will be run for the test.",
56
- "NONE - Random Walk",
57
- "DRIFT - Random Walk with Drift",
58
- "TREND - Random Walk with Linear Trend",
59
- "DRIFTNTREND - Random Walk with Drift and Trend",
60
- "DRIFTNTREND & MAXLAGS - Random Walk with Drift and Trend and auxiliary lags",
61
- "FORMULA & MAXLAGS - Random Walk with roll-your-own on Drift and Trend; plus auxiliary lags"
62
- ]
55
+ "NONE: Random walk",
56
+ "DRIFT: Random walk with drift",
57
+ "DRIFTNTREND: Random walk with drift and trend",
58
+ "SQUARED: Random walk with drift, trend, and quadratic trend."]
63
59
  },
64
60
  {
65
61
  "Name": "MAXLAGS",
@@ -67,19 +63,14 @@
67
63
  "Optional": true,
68
64
  "LowerBound": 0,
69
65
  "LowerBoundType": "INCLUSIVE",
66
+ "UpperBound": 100,
67
+ "UpperBoundType": "INCLUSIVE",
68
+ "DefaultValue": 0,
70
69
  "AllowNaN": false,
71
70
  "Description": [
72
71
  "The presence of the MAXLAGS parameter means the data scientist wishes to run the augmented Dickey-Fuller test. This is the maximum number of lags that will be used to form the regression equation. "
73
72
  ],
74
73
  "LangName": "max_lags"
75
- },
76
- {
77
- "Name": "DRIFT_TREND_FORMULA",
78
- "Type": "<td_formula>",
79
- "Optional": true,
80
- "Description": [
81
- "A Teradata formula string that stores the formula used to represent the drift and trend portions of the regression. The formula is only valid when used in conjunction with ALGORITHM (FORMULA). It uses the Teradata formula syntax and is expected to be of the form: b_1 + b_2X_1 + b_3X_1^2 + … etc; which the UAF function interprets as: : b_1 + b_2t + b_3t^2 + … etc "
82
- ]
83
74
  }
84
75
  ],
85
76
  "InputFmt": false,
@@ -47,6 +47,7 @@
47
47
  "GLOBAL",
48
48
  "SLIDING"
49
49
  ],
50
+ "DefaultValue": "GLOBAL",
50
51
  "Description": [
51
52
  "[Optional] If not specified, the GLOBAL type is the default.",
52
53
  "Specifies the window type used in the SAX transformation."
@@ -56,11 +57,12 @@
56
57
  "Name": "OUTPUT_TYPE",
57
58
  "Type": "string",
58
59
  "Optional": true,
59
- "PermittedValues": [
60
+ "PermittedValues": [
60
61
  "STRING",
61
62
  "BITMAP",
62
63
  "O_CHARS"
63
64
  ],
65
+ "DefaultValue": "STRING",
64
66
  "Description": [
65
67
  "[Optional] If not specified, the STRING type is the default.",
66
68
  "The output format of the result can be string, char or bitmap."
@@ -137,12 +137,14 @@
137
137
  {
138
138
  "Name" : "WINDOW",
139
139
  "Type" : "record",
140
+ "Optional" : true,
140
141
  "Description": "",
141
142
  "NestedParams" :
142
143
  [
143
144
  {
144
145
  "Name" : "SIZE",
145
146
  "Type" : "record",
147
+ "Optional" : true,
146
148
  "Description": "",
147
149
  "NestedParams" :
148
150
  [
@@ -231,6 +233,7 @@
231
233
  {
232
234
  "Name" : "EXPONENTIAL",
233
235
  "Type" : "record",
236
+ "Optional" : true,
234
237
  "Description": "",
235
238
  "NestedParams" :
236
239
  [
@@ -256,6 +259,7 @@
256
259
  {
257
260
  "Name" : "GAUSSIAN",
258
261
  "Type" : "record",
262
+ "Optional" : true,
259
263
  "Description": "",
260
264
  "NestedParams" :
261
265
  [
@@ -272,6 +276,7 @@
272
276
  {
273
277
  "Name" : "GENERAL_COSINE",
274
278
  "Type" : "record",
279
+ "Optional" : true,
275
280
  "Description": "",
276
281
  "NestedParams" :
277
282
  [
@@ -289,6 +294,7 @@
289
294
  {
290
295
  "Name" : "GENERAL_GAUSSIAN",
291
296
  "Type" : "record",
297
+ "Optional" : true,
292
298
  "Description": "",
293
299
  "NestedParams" :
294
300
  [
@@ -298,7 +304,7 @@
298
304
  "Optional" : true,
299
305
  "Description": [
300
306
  "The gaussian shape, and the value is 1. Required parameter when WINDOW(TYPE(GENERAL_GUASSIAN) is specified."
301
- ]
307
+ ]
302
308
  },
303
309
  {
304
310
  "Name" : "SIGMA",
@@ -306,13 +312,14 @@
306
312
  "Optional" : true,
307
313
  "Description": [
308
314
  "The standard deviation value. Required parameter when WINDOW(TYPE(GENERAL_GUASSIAN) is specified."
309
- ]
315
+ ]
310
316
  }
311
317
  ]
312
318
  },
313
319
  {
314
320
  "Name" : "GENERAL_HAMMING",
315
321
  "Type" : "record",
322
+ "Optional" : true,
316
323
  "Description": "",
317
324
  "NestedParams" :
318
325
  [
@@ -322,13 +329,14 @@
322
329
  "Optional" : true,
323
330
  "Description": [
324
331
  "The value of the window coefficient. Required parameter when WINDOW( TYPE( GENERAL_HAMMING ) is specified."
325
- ]
332
+ ]
326
333
  }
327
334
  ]
328
335
  },
329
336
  {
330
337
  "Name" : "KAISER",
331
338
  "Type" : "record",
339
+ "Optional" : true,
332
340
  "Description": "",
333
341
  "NestedParams" :
334
342
  [
@@ -338,12 +346,13 @@
338
346
  "Optional" : true,
339
347
  "Description": [
340
348
  "The value for the shape between the main lobe width and side lobe level. Required parameter when WINDOW(TYPE(KAISER)) is specified."
341
- ]
349
+ ]
342
350
  }
343
351
  ]
344
352
  },
345
353
  {
346
354
  "Name" : "TAYLOR",
355
+ "Optional" : true,
347
356
  "Type" : "record",
348
357
  "Description": "",
349
358
  "NestedParams" :
@@ -381,6 +390,7 @@
381
390
  {
382
391
  "Name" : "TUKEY",
383
392
  "Type" : "record",
393
+ "Optional" : true,
384
394
  "Description": "",
385
395
  "NestedParams" :
386
396
  [
@@ -397,4 +407,4 @@
397
407
  ]
398
408
  }
399
409
  ]
400
- }
410
+ }
@@ -0,0 +1,101 @@
1
+ patient_id,record_timestamp,glucose,blood_pressure,insulin,diabetes_pedigree_function,outcome
2
+ 0,2024-04-10 11:10:59,148,72,0,0.627,1
3
+ 1,2024-04-10 11:10:59,85,66,0,0.351,0
4
+ 2,2024-04-10 11:10:59,183,64,0,0.672,1
5
+ 3,2024-04-10 11:10:59,89,66,94,0.167,0
6
+ 4,2024-04-10 11:10:59,137,40,168,2.288,1
7
+ 5,2024-04-10 11:10:59,116,74,0,0.201,0
8
+ 6,2024-04-10 11:10:59,78,50,88,0.248,1
9
+ 7,2024-04-10 11:10:59,115,0,0,0.134,0
10
+ 8,2024-04-10 11:10:59,197,70,543,0.158,1
11
+ 9,2024-04-10 11:10:59,125,96,0,0.232,1
12
+ 10,2024-04-10 11:10:59,110,92,0,0.191,0
13
+ 11,2024-04-10 11:10:59,168,74,0,0.537,1
14
+ 12,2024-04-10 11:10:59,139,80,0,1.441,0
15
+ 13,2024-04-10 11:10:59,189,60,846,0.398,1
16
+ 14,2024-04-10 11:10:59,166,72,175,0.587,1
17
+ 15,2024-04-10 11:10:59,100,0,0,0.484,1
18
+ 16,2024-04-10 11:10:59,118,84,230,0.551,1
19
+ 17,2024-04-10 11:10:59,107,74,0,0.254,1
20
+ 18,2024-04-10 11:10:59,103,30,83,0.183,0
21
+ 19,2024-04-10 11:10:59,115,70,96,0.529,1
22
+ 20,2024-04-10 11:10:59,126,88,235,0.704,0
23
+ 21,2024-04-10 11:10:59,99,84,0,0.388,0
24
+ 22,2024-04-10 11:10:59,196,90,0,0.451,1
25
+ 23,2024-04-10 11:10:59,119,80,0,0.263,1
26
+ 24,2024-04-10 11:10:59,143,94,146,0.254,1
27
+ 25,2024-04-10 11:10:59,125,70,115,0.205,1
28
+ 26,2024-04-10 11:10:59,147,76,0,0.257,1
29
+ 27,2024-04-10 11:10:59,97,66,140,0.487,0
30
+ 28,2024-04-10 11:10:59,145,82,110,0.245,0
31
+ 29,2024-04-10 11:10:59,117,92,0,0.337,0
32
+ 30,2024-04-10 11:10:59,109,75,0,0.546,0
33
+ 31,2024-04-10 11:10:59,158,76,245,0.851,1
34
+ 32,2024-04-10 11:10:59,88,58,54,0.267,0
35
+ 33,2024-04-10 11:10:59,92,92,0,0.188,0
36
+ 34,2024-04-10 11:10:59,122,78,0,0.512,0
37
+ 35,2024-04-10 11:10:59,103,60,192,0.966,0
38
+ 36,2024-04-10 11:10:59,138,76,0,0.42,0
39
+ 37,2024-04-10 11:10:59,102,76,0,0.665,1
40
+ 38,2024-04-10 11:10:59,90,68,0,0.503,1
41
+ 39,2024-04-10 11:10:59,111,72,207,1.39,1
42
+ 40,2024-04-10 11:10:59,180,64,70,0.271,0
43
+ 41,2024-04-10 11:10:59,133,84,0,0.696,0
44
+ 42,2024-04-10 11:10:59,106,92,0,0.235,0
45
+ 43,2024-04-10 11:10:59,171,110,240,0.721,1
46
+ 44,2024-04-10 11:10:59,159,64,0,0.294,0
47
+ 45,2024-04-10 11:10:59,180,66,0,1.893,1
48
+ 46,2024-04-10 11:10:59,146,56,0,0.564,0
49
+ 47,2024-04-10 11:10:59,71,70,0,0.586,0
50
+ 48,2024-04-10 11:10:59,103,66,0,0.344,1
51
+ 49,2024-04-10 11:10:59,105,0,0,0.305,0
52
+ 50,2024-04-10 11:10:59,103,80,82,0.491,0
53
+ 51,2024-04-10 11:10:59,101,50,36,0.526,0
54
+ 52,2024-04-10 11:10:59,88,66,23,0.342,0
55
+ 53,2024-04-10 11:10:59,176,90,300,0.467,1
56
+ 54,2024-04-10 11:10:59,150,66,342,0.718,0
57
+ 55,2024-04-10 11:10:59,73,50,0,0.248,0
58
+ 56,2024-04-10 11:10:59,187,68,304,0.254,1
59
+ 57,2024-04-10 11:10:59,100,88,110,0.962,0
60
+ 58,2024-04-10 11:10:59,146,82,0,1.781,0
61
+ 59,2024-04-10 11:10:59,105,64,142,0.173,0
62
+ 60,2024-04-10 11:10:59,84,0,0,0.304,0
63
+ 61,2024-04-10 11:10:59,133,72,0,0.27,1
64
+ 62,2024-04-10 11:10:59,44,62,0,0.587,0
65
+ 63,2024-04-10 11:10:59,141,58,128,0.699,0
66
+ 64,2024-04-10 11:10:59,114,66,0,0.258,1
67
+ 65,2024-04-10 11:10:59,99,74,0,0.203,0
68
+ 66,2024-04-10 11:10:59,109,88,0,0.855,1
69
+ 67,2024-04-10 11:10:59,109,92,0,0.845,0
70
+ 68,2024-04-10 11:10:59,95,66,38,0.334,0
71
+ 69,2024-04-10 11:10:59,146,85,100,0.189,0
72
+ 70,2024-04-10 11:10:59,100,66,90,0.867,1
73
+ 71,2024-04-10 11:10:59,139,64,140,0.411,0
74
+ 72,2024-04-10 11:10:59,126,90,0,0.583,1
75
+ 73,2024-04-10 11:10:59,129,86,270,0.231,0
76
+ 74,2024-04-10 11:10:59,79,75,0,0.396,0
77
+ 75,2024-04-10 11:10:59,0,48,0,0.14,0
78
+ 76,2024-04-10 11:10:59,62,78,0,0.391,0
79
+ 77,2024-04-10 11:10:59,95,72,0,0.37,0
80
+ 78,2024-04-10 11:10:59,131,0,0,0.27,1
81
+ 79,2024-04-10 11:10:59,112,66,0,0.307,0
82
+ 80,2024-04-10 11:10:59,113,44,0,0.14,0
83
+ 81,2024-04-10 11:10:59,74,0,0,0.102,0
84
+ 82,2024-04-10 11:10:59,83,78,71,0.767,0
85
+ 83,2024-04-10 11:10:59,101,65,0,0.237,0
86
+ 84,2024-04-10 11:10:59,137,108,0,0.227,1
87
+ 85,2024-04-10 11:10:59,110,74,125,0.698,0
88
+ 86,2024-04-10 11:10:59,106,72,0,0.178,0
89
+ 87,2024-04-10 11:10:59,100,68,71,0.324,0
90
+ 88,2024-04-10 11:10:59,136,70,110,0.153,1
91
+ 89,2024-04-10 11:10:59,107,68,0,0.165,0
92
+ 90,2024-04-10 11:10:59,80,55,0,0.258,0
93
+ 91,2024-04-10 11:10:59,123,80,176,0.443,0
94
+ 92,2024-04-10 11:10:59,81,78,48,0.261,0
95
+ 93,2024-04-10 11:10:59,134,72,0,0.277,1
96
+ 94,2024-04-10 11:10:59,142,82,64,0.761,0
97
+ 95,2024-04-10 11:10:59,144,72,228,0.255,0
98
+ 96,2024-04-10 11:10:59,92,62,0,0.13,0
99
+ 97,2024-04-10 11:10:59,71,48,76,0.323,0
100
+ 98,2024-04-10 11:10:59,93,50,64,0.356,0
101
+ 99,2024-04-10 11:10:59,122,90,220,0.325,1
@@ -0,0 +1,101 @@
1
+ patient_id,record_timestamp,pregnancies,age,bmi,skin_thickness
2
+ 0,2024-04-10 11:10:59,6,50,33.6,35
3
+ 1,2024-04-10 11:10:59,1,31,26.6,29
4
+ 2,2024-04-10 11:10:59,8,32,23.3,0
5
+ 3,2024-04-10 11:10:59,1,21,28.1,23
6
+ 4,2024-04-10 11:10:59,0,33,43.1,35
7
+ 5,2024-04-10 11:10:59,5,30,25.6,0
8
+ 6,2024-04-10 11:10:59,3,26,31.0,32
9
+ 7,2024-04-10 11:10:59,10,29,35.3,0
10
+ 8,2024-04-10 11:10:59,2,53,30.5,45
11
+ 9,2024-04-10 11:10:59,8,54,0.0,0
12
+ 10,2024-04-10 11:10:59,4,30,37.6,0
13
+ 11,2024-04-10 11:10:59,10,34,38.0,0
14
+ 12,2024-04-10 11:10:59,10,57,27.1,0
15
+ 13,2024-04-10 11:10:59,1,59,30.1,23
16
+ 14,2024-04-10 11:10:59,5,51,25.8,19
17
+ 15,2024-04-10 11:10:59,7,32,30.0,0
18
+ 16,2024-04-10 11:10:59,0,31,45.8,47
19
+ 17,2024-04-10 11:10:59,7,31,29.6,0
20
+ 18,2024-04-10 11:10:59,1,33,43.3,38
21
+ 19,2024-04-10 11:10:59,1,32,34.6,30
22
+ 20,2024-04-10 11:10:59,3,27,39.3,41
23
+ 21,2024-04-10 11:10:59,8,50,35.4,0
24
+ 22,2024-04-10 11:10:59,7,41,39.8,0
25
+ 23,2024-04-10 11:10:59,9,29,29.0,35
26
+ 24,2024-04-10 11:10:59,11,51,36.6,33
27
+ 25,2024-04-10 11:10:59,10,41,31.1,26
28
+ 26,2024-04-10 11:10:59,7,43,39.4,0
29
+ 27,2024-04-10 11:10:59,1,22,23.2,15
30
+ 28,2024-04-10 11:10:59,13,57,22.2,19
31
+ 29,2024-04-10 11:10:59,5,38,34.1,0
32
+ 30,2024-04-10 11:10:59,5,60,36.0,26
33
+ 31,2024-04-10 11:10:59,3,28,31.6,36
34
+ 32,2024-04-10 11:10:59,3,22,24.8,11
35
+ 33,2024-04-10 11:10:59,6,28,19.9,0
36
+ 34,2024-04-10 11:10:59,10,45,27.6,31
37
+ 35,2024-04-10 11:10:59,4,33,24.0,33
38
+ 36,2024-04-10 11:10:59,11,35,33.2,0
39
+ 37,2024-04-10 11:10:59,9,46,32.9,37
40
+ 38,2024-04-10 11:10:59,2,27,38.2,42
41
+ 39,2024-04-10 11:10:59,4,56,37.1,47
42
+ 40,2024-04-10 11:10:59,3,26,34.0,25
43
+ 41,2024-04-10 11:10:59,7,37,40.2,0
44
+ 42,2024-04-10 11:10:59,7,48,22.7,18
45
+ 43,2024-04-10 11:10:59,9,54,45.4,24
46
+ 44,2024-04-10 11:10:59,7,40,27.4,0
47
+ 45,2024-04-10 11:10:59,0,25,42.0,39
48
+ 46,2024-04-10 11:10:59,1,29,29.7,0
49
+ 47,2024-04-10 11:10:59,2,22,28.0,27
50
+ 48,2024-04-10 11:10:59,7,31,39.1,32
51
+ 49,2024-04-10 11:10:59,7,24,0.0,0
52
+ 50,2024-04-10 11:10:59,1,22,19.4,11
53
+ 51,2024-04-10 11:10:59,1,26,24.2,15
54
+ 52,2024-04-10 11:10:59,5,30,24.4,21
55
+ 53,2024-04-10 11:10:59,8,58,33.7,34
56
+ 54,2024-04-10 11:10:59,7,42,34.7,42
57
+ 55,2024-04-10 11:10:59,1,21,23.0,10
58
+ 56,2024-04-10 11:10:59,7,41,37.7,39
59
+ 57,2024-04-10 11:10:59,0,31,46.8,60
60
+ 58,2024-04-10 11:10:59,0,44,40.5,0
61
+ 59,2024-04-10 11:10:59,0,22,41.5,41
62
+ 60,2024-04-10 11:10:59,2,21,0.0,0
63
+ 61,2024-04-10 11:10:59,8,39,32.9,0
64
+ 62,2024-04-10 11:10:59,5,36,25.0,0
65
+ 63,2024-04-10 11:10:59,2,24,25.4,34
66
+ 64,2024-04-10 11:10:59,7,42,32.8,0
67
+ 65,2024-04-10 11:10:59,5,32,29.0,27
68
+ 66,2024-04-10 11:10:59,0,38,32.5,30
69
+ 67,2024-04-10 11:10:59,2,54,42.7,0
70
+ 68,2024-04-10 11:10:59,1,25,19.6,13
71
+ 69,2024-04-10 11:10:59,4,27,28.9,27
72
+ 70,2024-04-10 11:10:59,2,28,32.9,20
73
+ 71,2024-04-10 11:10:59,5,26,28.6,35
74
+ 72,2024-04-10 11:10:59,13,42,43.4,0
75
+ 73,2024-04-10 11:10:59,4,23,35.1,20
76
+ 74,2024-04-10 11:10:59,1,22,32.0,30
77
+ 75,2024-04-10 11:10:59,1,22,24.7,20
78
+ 76,2024-04-10 11:10:59,7,41,32.6,0
79
+ 77,2024-04-10 11:10:59,5,27,37.7,33
80
+ 78,2024-04-10 11:10:59,0,26,43.2,0
81
+ 79,2024-04-10 11:10:59,2,24,25.0,22
82
+ 80,2024-04-10 11:10:59,3,22,22.4,13
83
+ 81,2024-04-10 11:10:59,2,22,0.0,0
84
+ 82,2024-04-10 11:10:59,7,36,29.3,26
85
+ 83,2024-04-10 11:10:59,0,22,24.6,28
86
+ 84,2024-04-10 11:10:59,5,37,48.8,0
87
+ 85,2024-04-10 11:10:59,2,27,32.4,29
88
+ 86,2024-04-10 11:10:59,13,45,36.6,54
89
+ 87,2024-04-10 11:10:59,2,26,38.5,25
90
+ 88,2024-04-10 11:10:59,15,43,37.1,32
91
+ 89,2024-04-10 11:10:59,1,24,26.5,19
92
+ 90,2024-04-10 11:10:59,1,21,19.1,0
93
+ 91,2024-04-10 11:10:59,4,34,32.0,15
94
+ 92,2024-04-10 11:10:59,7,42,46.7,40
95
+ 93,2024-04-10 11:10:59,4,60,23.8,0
96
+ 94,2024-04-10 11:10:59,2,21,24.7,18
97
+ 95,2024-04-10 11:10:59,6,40,33.9,27
98
+ 96,2024-04-10 11:10:59,2,24,31.6,28
99
+ 97,2024-04-10 11:10:59,1,22,20.4,18
100
+ 98,2024-04-10 11:10:59,6,23,28.7,30
101
+ 99,2024-04-10 11:10:59,1,31,49.7,51
@@ -0,0 +1,157 @@
1
+ import pandas as pd
2
+ import pickle
3
+ import json
4
+ import numpy as np
5
+ import ast
6
+ import sys
7
+ from collections import OrderedDict
8
+ import base64
9
+ from importlib import import_module
10
+ import sys
11
+
12
+ DELIMITER = "\t"
13
+
14
+ def convert_to_type(val, typee):
15
+ if typee == 'int':
16
+ return int(val) if val != "" else np.nan
17
+ if typee == 'float':
18
+ if isinstance(val, str):
19
+ val = val.replace(' ', '')
20
+ return float(val) if val != "" else np.nan
21
+ if typee == 'bool':
22
+ return eval(val) if val != "" else None
23
+ return str(val) if val != "" else None
24
+
25
+ def splitter(strr, delim=",", convert_to="str"):
26
+ """
27
+ Split the string based on delimiter and convert to the type specified.
28
+ """
29
+ if strr == "None":
30
+ return []
31
+ return [convert_to_type(i, convert_to) for i in strr.split(delim)]
32
+
33
+
34
+ is_lake_system = eval(sys.argv[2])
35
+ model_file_prefix = sys.argv[1]
36
+ if not is_lake_system:
37
+ db = sys.argv[0].split("/")[1]
38
+
39
+ ### Start of data related arguments processing
40
+ data_partition_column_values = []
41
+ data_present = False
42
+ partition_join = ""
43
+ model = None
44
+
45
+ # Data related arguments information of indices and types.
46
+ data_args_indices_types = OrderedDict()
47
+
48
+ func_name = <func_name>
49
+ module_name = <module_name>
50
+ class_name = <class_name>
51
+ all_col_names = <all_col_names>
52
+ all_col_types = <types_of_data_cols>
53
+ data_partition_column_indices = <partition_cols_indices>
54
+ data_partition_column_types = [all_col_types[idx] for idx in data_partition_column_indices]
55
+
56
+ # Data related arguments values - prepare dictionary and populate data later.
57
+ data_args_values = {}
58
+
59
+ data_args_info_str = <data_args_info_str>
60
+
61
+ for data_arg in data_args_info_str.split("--"):
62
+ _arg_name, _indices, _types = data_arg.split("-")
63
+ _indices = splitter(_indices, convert_to="int")
64
+ types = [type_ for idx, type_ in enumerate(all_col_types) if idx in _indices]
65
+
66
+ data_args_indices_types[_arg_name] = {"indices": _indices, "types": types}
67
+ data_args_values[_arg_name] = [] # Keeping empty for each data arg name and populate data later.
68
+
69
+ ### End of data related arguments processing
70
+
71
+
72
+ ### Start of other arguments processing
73
+ params = json.loads('<params>')
74
+ ### End of other arguments processing
75
+
76
+
77
+ # Read data - columns information is passed as command line argument and stored in
78
+ # data_args_indices_types dictionary.
79
+ while 1:
80
+ try:
81
+ line = input()
82
+ if line == '': # Exit if user provides blank line
83
+ break
84
+ else:
85
+ data_present = True
86
+ values = line.split(DELIMITER)
87
+ if not data_partition_column_values:
88
+ # Partition column values is same for all rows. Hence, only read once.
89
+ for i, val in enumerate(data_partition_column_indices):
90
+ data_partition_column_values.append(
91
+ convert_to_type(values[val], typee=data_partition_column_types[i])
92
+ )
93
+
94
+ # Prepare the corresponding model file name and extract model.
95
+ partition_join = "_".join([str(x) for x in data_partition_column_values])
96
+ # Replace '-' with '_' as '-' because partition_columns can be negative.
97
+ partition_join = partition_join.replace("-", "_")
98
+
99
+ model_file_path = f"{model_file_prefix}_{partition_join}"\
100
+ if is_lake_system else \
101
+ f"./{db}/{model_file_prefix}_{partition_join}"
102
+
103
+ # Prepare data dictionary containing only arguments related to data.
104
+ for arg_name in data_args_values:
105
+ data_indices = data_args_indices_types[arg_name]["indices"]
106
+ types = data_args_indices_types[arg_name]["types"]
107
+ cur_row = []
108
+ for idx, data_idx in enumerate(data_indices):
109
+ cur_row.append(convert_to_type(values[data_idx], types[idx]))
110
+ data_args_values[arg_name].append(cur_row)
111
+ except EOFError: # Exit if reached EOF or CTRL-D
112
+ break
113
+
114
+ if not data_present:
115
+ sys.exit(0)
116
+
117
+ for key, value in data_args_values.items():
118
+ col_names = [all_col_names[idx] for idx in data_args_indices_types[key]["indices"]]
119
+ data_args_values[key] = pd.DataFrame(value, columns=col_names)
120
+
121
+ # If reference argument (is a Dataset object) present in params, then it contains
122
+ # the prefix of the file path which contains the reference Dataset object.
123
+ if "reference" in params.keys() and params["reference"] is not None:
124
+ reference_dataset_file_prefix = params["reference"]
125
+ reference_arg_file_path = f"{reference_dataset_file_prefix}_{partition_join}"\
126
+ if is_lake_system else \
127
+ f"./{db}/{reference_dataset_file_prefix}_{partition_join}"
128
+ with open(reference_arg_file_path, "rb") as f:
129
+ params["reference"] = pickle.load(f)
130
+
131
+ if not func_name:
132
+ # Create DataSet object if no function of Dataset class is called.
133
+ lib = import_module(module_name)
134
+ class_instance = getattr(lib, class_name)
135
+ obj = class_instance(**{**data_args_values, **params})
136
+ else:
137
+ # If function of Dataset object is called, then call the function on model object.
138
+ with open(model_file_path, "rb") as fp:
139
+ model = pickle.loads(fp.read())
140
+
141
+ if not model:
142
+ sys.exit("Model file is not installed in Vantage.")
143
+
144
+ obj = getattr(model, func_name)(**{**data_args_values, **params})
145
+
146
+ model_str = pickle.dumps(obj)
147
+
148
+ if is_lake_system:
149
+ model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
150
+
151
+ # Save DataSet object to binary file
152
+ with open(model_file_path, "wb") as f:
153
+ f.write(model_str)
154
+
155
+ model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
156
+
157
+ print(*(data_partition_column_values + [model_data]), sep=DELIMITER)