teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +71 -0
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +51 -24
- teradataml/analytics/json_parser/utils.py +11 -17
- teradataml/automl/__init__.py +103 -48
- teradataml/automl/data_preparation.py +55 -37
- teradataml/automl/data_transformation.py +131 -69
- teradataml/automl/feature_engineering.py +117 -185
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +13 -25
- teradataml/automl/model_training.py +214 -75
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +11 -6
- teradataml/common/garbagecollector.py +5 -0
- teradataml/common/messagecodes.py +3 -1
- teradataml/common/messages.py +2 -1
- teradataml/common/utils.py +6 -0
- teradataml/context/context.py +49 -29
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/glm_example.json +28 -1
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +20 -1
- teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
- teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
- teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
- teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
- teradataml/data/teradataml_example.json +77 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +120 -61
- teradataml/dataframe/dataframe.py +102 -17
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +272 -89
- teradataml/dataframe/sql.py +84 -0
- teradataml/dbutils/dbutils.py +2 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
- teradataml/options/__init__.py +13 -4
- teradataml/options/configure.py +27 -6
- teradataml/scriptmgmt/UserEnv.py +19 -16
- teradataml/scriptmgmt/lls_utils.py +117 -14
- teradataml/table_operators/Script.py +2 -3
- teradataml/table_operators/TableOperator.py +58 -10
- teradataml/utils/validators.py +40 -2
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0
|
@@ -79,7 +79,7 @@
|
|
|
79
79
|
"lowerBoundType": "EXCLUSIVE",
|
|
80
80
|
"upperBoundType": "INCLUSIVE",
|
|
81
81
|
"allowNaN": false,
|
|
82
|
-
"name": "
|
|
82
|
+
"name": "NumParallelTrees",
|
|
83
83
|
"alternateNames": [],
|
|
84
84
|
"isRequired": false,
|
|
85
85
|
"defaultValue": 1000,
|
|
@@ -97,10 +97,10 @@
|
|
|
97
97
|
"lowerBoundType": "EXCLUSIVE",
|
|
98
98
|
"upperBoundType": "INCLUSIVE",
|
|
99
99
|
"allowNaN": false,
|
|
100
|
-
"name": "
|
|
100
|
+
"name": "NumBoostRounds",
|
|
101
101
|
"alternateNames": [],
|
|
102
102
|
"isRequired": false,
|
|
103
|
-
"defaultValue":
|
|
103
|
+
"defaultValue": 10,
|
|
104
104
|
"rDescription": "Specify number of iterations within a boosted trees to be loaded from model table",
|
|
105
105
|
"description": "Specify number of iterations within a boosted trees to be loaded from model table",
|
|
106
106
|
"datatype": "NUMERIC",
|
|
@@ -177,6 +177,19 @@
|
|
|
177
177
|
"rName": "output.responses",
|
|
178
178
|
"useInR": true,
|
|
179
179
|
"rOrderNum": 9
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
"defaultValue": false,
|
|
183
|
+
"name": "Detailed",
|
|
184
|
+
"alternateNames": [],
|
|
185
|
+
"isRequired": false,
|
|
186
|
+
"rDescription": "Specifies whether to output detailed information of each prediction.",
|
|
187
|
+
"description": "Specifies whether to output detailed information of each prediction.",
|
|
188
|
+
"datatype": "BOOLEAN",
|
|
189
|
+
"allowsLists": false,
|
|
190
|
+
"rName": "detailed",
|
|
191
|
+
"useInR": true,
|
|
192
|
+
"rOrderNum": 10
|
|
180
193
|
}
|
|
181
194
|
]
|
|
182
|
-
}
|
|
195
|
+
}
|
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
{
|
|
2
2
|
"json_schema_major_version": "1",
|
|
3
|
-
"json_schema_minor_version": "
|
|
4
|
-
"json_content_version": "
|
|
3
|
+
"json_schema_minor_version": "2",
|
|
4
|
+
"json_content_version": "2",
|
|
5
5
|
"function_name": "TD_ZTest",
|
|
6
|
-
"
|
|
6
|
+
"function_alias_name": "TD_ZTest",
|
|
7
|
+
"function_version": "2.0",
|
|
8
|
+
"commence_db_version" : "17.20",
|
|
9
|
+
"change_db_version" : "17.20.03.XX",
|
|
7
10
|
"function_type": "fastpath",
|
|
8
11
|
"function_category": "Hypothesis Testing",
|
|
9
|
-
"function_alias_name": "TD_ZTest",
|
|
10
12
|
"function_r_name": "aa.td_ztest",
|
|
11
13
|
"short_description": "hypothesis test function to perform ztest analysis on a data set.",
|
|
12
14
|
"long_description": "fastpath function to perform ztest analysis on a data set.",
|
|
13
15
|
"input_tables": [
|
|
14
16
|
{
|
|
15
|
-
"isOrdered": false,
|
|
16
|
-
"partitionByOne": false,
|
|
17
17
|
"name": "InputTable",
|
|
18
18
|
"alternateNames": [],
|
|
19
19
|
"isRequired": true,
|
|
20
|
-
"rDescription": "The input table for ztest analysis",
|
|
21
|
-
"description": "The input table for ztest analysis.",
|
|
22
20
|
"datatype": "TABLE_ALIAS",
|
|
23
|
-
"
|
|
21
|
+
"partitionByOne": false,
|
|
22
|
+
"isOrdered": false,
|
|
23
|
+
"description": "The input table for ztest analysis.",
|
|
24
|
+
"rDescription": "The input table for ztest analysis",
|
|
24
25
|
"rName": "data",
|
|
25
26
|
"useInR": true,
|
|
26
27
|
"rOrderNum": 1
|
|
@@ -28,27 +29,30 @@
|
|
|
28
29
|
],
|
|
29
30
|
"argument_clauses": [
|
|
30
31
|
{
|
|
31
|
-
"
|
|
32
|
-
"lowerBound": 0,
|
|
33
|
-
"upperBound": 1,
|
|
34
|
-
"lowerBoundType": "INCLUSIVE",
|
|
35
|
-
"upperBoundType": "INCLUSIVE",
|
|
36
|
-
"allowNaN": false,
|
|
37
|
-
"isOutputColumn": false,
|
|
38
|
-
"matchLengthOfArgument": "",
|
|
39
|
-
"allowPadding": false,
|
|
40
|
-
"name": "Alpha",
|
|
32
|
+
"name": "FirstSampleColumn",
|
|
41
33
|
"alternateNames": [],
|
|
42
34
|
"isRequired": false,
|
|
43
|
-
"
|
|
44
|
-
|
|
45
|
-
|
|
35
|
+
"targetTable": [
|
|
36
|
+
"InputTable"
|
|
37
|
+
],
|
|
38
|
+
"checkDuplicate": true,
|
|
39
|
+
"allowedTypes": [],
|
|
40
|
+
"allowedTypeGroups": [
|
|
41
|
+
"NUMERIC"
|
|
42
|
+
],
|
|
43
|
+
"rOrderNum": 2,
|
|
44
|
+
"description": "Specifies the first sample column in z test",
|
|
45
|
+
"rDescription": "Specifies the first sample column in z test",
|
|
46
|
+
"datatype": "COLUMNS",
|
|
46
47
|
"allowsLists": false,
|
|
47
|
-
"rName": "
|
|
48
|
+
"rName": "first.sample.column",
|
|
48
49
|
"useInR": true,
|
|
49
|
-
"
|
|
50
|
+
"rFormulaUsage" : false
|
|
50
51
|
},
|
|
51
52
|
{
|
|
53
|
+
"name": "SecondSampleColumn",
|
|
54
|
+
"alternateNames": [],
|
|
55
|
+
"isRequired": false,
|
|
52
56
|
"targetTable": [
|
|
53
57
|
"InputTable"
|
|
54
58
|
],
|
|
@@ -57,22 +61,39 @@
|
|
|
57
61
|
"allowedTypeGroups": [
|
|
58
62
|
"NUMERIC"
|
|
59
63
|
],
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
"name": "FirstSampleColumn",
|
|
65
|
-
"alternateNames": [],
|
|
66
|
-
"isRequired": true,
|
|
67
|
-
"rDescription": "Specifies the first sample column in f test",
|
|
68
|
-
"description": "Specifies the first sample column in f test",
|
|
69
|
-
"datatype": "COLUMN",
|
|
64
|
+
"rOrderNum": 3,
|
|
65
|
+
"description": "Specifies the second sample column in z test",
|
|
66
|
+
"rDescription": "Specifies the second sample column in z test",
|
|
67
|
+
"datatype": "COLUMNS",
|
|
70
68
|
"allowsLists": false,
|
|
71
|
-
"rName": "
|
|
69
|
+
"rName": "second.sample.column",
|
|
72
70
|
"useInR": true,
|
|
73
|
-
"
|
|
71
|
+
"rFormulaUsage" : false
|
|
74
72
|
},
|
|
75
73
|
{
|
|
74
|
+
"name": "SampleNameColumn",
|
|
75
|
+
"alternateNames": [],
|
|
76
|
+
"isRequired": false,
|
|
77
|
+
"targetTable": [
|
|
78
|
+
"InputTable"
|
|
79
|
+
],
|
|
80
|
+
"checkDuplicate": true,
|
|
81
|
+
"allowedTypes": [],
|
|
82
|
+
"allowedTypeGroups": [
|
|
83
|
+
"STRING"
|
|
84
|
+
],
|
|
85
|
+
"rOrderNum": 4,
|
|
86
|
+
"description": "Specifies the input table column containing the names of the samples included in the z test. This argument is used when Input is in sample-value format.",
|
|
87
|
+
"rDescription": "Specifies the input table column containing the names of the samples included in the z test. This argument is used when Input is in sample-value format.",
|
|
88
|
+
"datatype": "COLUMNS",
|
|
89
|
+
"allowsLists": false,
|
|
90
|
+
"rName": "sample.name.column",
|
|
91
|
+
"useInR": true
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"name": "SampleValueColumn",
|
|
95
|
+
"alternateNames": [],
|
|
96
|
+
"isRequired": false,
|
|
76
97
|
"targetTable": [
|
|
77
98
|
"InputTable"
|
|
78
99
|
],
|
|
@@ -81,90 +102,146 @@
|
|
|
81
102
|
"allowedTypeGroups": [
|
|
82
103
|
"NUMERIC"
|
|
83
104
|
],
|
|
84
|
-
"
|
|
85
|
-
"
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
"
|
|
105
|
+
"rOrderNum": 5,
|
|
106
|
+
"description": "Specifies the input table column containing the values for each sample member. This argument is used when Input is in sample-value format.",
|
|
107
|
+
"rDescription": "Specifies the input table column containing the values for each sample member. This argument is used when Input is in sample-value format.",
|
|
108
|
+
"datatype": "COLUMNS",
|
|
109
|
+
"allowsLists": false,
|
|
110
|
+
"rName": "sample.value.column",
|
|
111
|
+
"useInR": true
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"name": "FirstSampleName",
|
|
89
115
|
"alternateNames": [],
|
|
90
116
|
"isRequired": false,
|
|
91
|
-
"
|
|
92
|
-
"
|
|
93
|
-
"
|
|
117
|
+
"defaultValue" : "",
|
|
118
|
+
"rOrderNum": 6,
|
|
119
|
+
"description": "Specifies the name of the first sample included in the z test. This argument is used when Input is in sample-value format.",
|
|
120
|
+
"rDescription": "Specifies the name of the first sample included in the z test. This argument is used when Input is in sample-value format.",
|
|
121
|
+
"datatype": "STRING",
|
|
94
122
|
"allowsLists": false,
|
|
95
|
-
"
|
|
123
|
+
"allowPadding": false,
|
|
124
|
+
"rName": "first.sample.name",
|
|
96
125
|
"useInR": true,
|
|
97
|
-
"
|
|
126
|
+
"rDefaultValue" : ""
|
|
98
127
|
},
|
|
99
128
|
{
|
|
100
|
-
|
|
101
|
-
"lower-tailed",
|
|
102
|
-
"two-tailed",
|
|
103
|
-
"upper-tailed"
|
|
104
|
-
],
|
|
105
|
-
"defaultValue": "two-tailed",
|
|
106
|
-
"allowNaN": false,
|
|
107
|
-
"isOutputColumn": false,
|
|
108
|
-
"matchLengthOfArgument": "",
|
|
109
|
-
"allowPadding": false,
|
|
110
|
-
"name": "Alternatehypothesis",
|
|
129
|
+
"name": "SecondSampleName",
|
|
111
130
|
"alternateNames": [],
|
|
112
131
|
"isRequired": false,
|
|
113
|
-
"
|
|
114
|
-
"
|
|
132
|
+
"defaultValue" : "",
|
|
133
|
+
"rOrderNum": 7,
|
|
134
|
+
"description": "Specifies the name of the second sample included in the z test. This argument is used when Input is in sample-value format.",
|
|
135
|
+
"rDescription": "Specifies the name of the second sample included in the z test. This argument is used when Input is in sample-value format.",
|
|
115
136
|
"datatype": "STRING",
|
|
116
137
|
"allowsLists": false,
|
|
117
|
-
"
|
|
138
|
+
"allowPadding": false,
|
|
139
|
+
"rName": "second.sample.name",
|
|
118
140
|
"useInR": true,
|
|
119
|
-
"
|
|
141
|
+
"rDefaultValue" : ""
|
|
120
142
|
},
|
|
121
143
|
{
|
|
144
|
+
"name": "FirstSampleVariance",
|
|
145
|
+
"alternateNames": [],
|
|
146
|
+
"isRequired": false,
|
|
147
|
+
"rOrderNum": 8,
|
|
148
|
+
"lowerBound" : 0,
|
|
149
|
+
"lowerBoundType" : "EXCLUSIVE",
|
|
150
|
+
"upperBound" : 1e10,
|
|
151
|
+
"upperBoundType" : "EXCLUSIVE",
|
|
122
152
|
"allowNaN": false,
|
|
123
153
|
"isOutputColumn": false,
|
|
124
154
|
"matchLengthOfArgument": "",
|
|
125
|
-
"allowPadding": false,
|
|
126
|
-
"name": "FirstSampleVariance",
|
|
127
|
-
"alternateNames": [],
|
|
128
|
-
"isRequired": true,
|
|
129
|
-
"rDescription": "Specifies the first sample variance",
|
|
130
155
|
"description": "Specifies the first sample variance",
|
|
131
|
-
"
|
|
156
|
+
"rDescription": "Specifies the first sample variance",
|
|
157
|
+
"datatype": "DOUBLE",
|
|
132
158
|
"allowsLists": false,
|
|
159
|
+
"allowPadding": false,
|
|
133
160
|
"rName": "first.sample.variance",
|
|
134
161
|
"useInR": true,
|
|
135
|
-
"
|
|
162
|
+
"rDefaultValue" : ""
|
|
136
163
|
},
|
|
137
164
|
{
|
|
138
|
-
"allowNaN": false,
|
|
139
|
-
"isOutputColumn": false,
|
|
140
|
-
"matchLengthOfArgument": "",
|
|
141
|
-
"allowPadding": false,
|
|
142
165
|
"name": "SecondSampleVariance",
|
|
143
166
|
"alternateNames": [],
|
|
144
167
|
"isRequired": false,
|
|
145
|
-
"
|
|
168
|
+
"rOrderNum": 9,
|
|
169
|
+
"lowerBound" : 0,
|
|
170
|
+
"lowerBoundType" : "EXCLUSIVE",
|
|
171
|
+
"upperBound" : 1e10,
|
|
172
|
+
"upperBoundType" : "EXCLUSIVE",
|
|
173
|
+
"allowNaN": false,
|
|
174
|
+
"isOutputColumn": false,
|
|
175
|
+
"matchLengthOfArgument": "",
|
|
146
176
|
"description": "Specifies the second sample variance",
|
|
147
|
-
"
|
|
177
|
+
"rDescription": "Specifies the second sample variance",
|
|
178
|
+
"datatype": "DOUBLE",
|
|
148
179
|
"allowsLists": false,
|
|
180
|
+
"allowPadding": false,
|
|
149
181
|
"rName": "second.sample.variance",
|
|
150
182
|
"useInR": true,
|
|
151
|
-
"
|
|
183
|
+
"rDefaultValue" : ""
|
|
152
184
|
},
|
|
153
185
|
{
|
|
154
|
-
"
|
|
155
|
-
"
|
|
186
|
+
"name": "AlternativeHypothesis",
|
|
187
|
+
"alternateNames": [],
|
|
188
|
+
"isRequired": false,
|
|
189
|
+
"defaultValue": "two-tailed",
|
|
190
|
+
"rOrderNum": 10,
|
|
191
|
+
"permittedValues": [
|
|
192
|
+
"lower-tailed",
|
|
193
|
+
"two-tailed",
|
|
194
|
+
"upper-tailed"
|
|
195
|
+
],
|
|
156
196
|
"matchLengthOfArgument": "",
|
|
197
|
+
"description": "Specifies the alternative hypothesis",
|
|
198
|
+
"rDescription": "Specifies the alternative hypothesis",
|
|
199
|
+
"datatype": "STRING",
|
|
200
|
+
"allowsLists": false,
|
|
157
201
|
"allowPadding": false,
|
|
202
|
+
"rName": "alternate.hypothesis",
|
|
203
|
+
"useInR": true,
|
|
204
|
+
"rDefaultValue" : ""
|
|
205
|
+
},
|
|
206
|
+
{
|
|
158
207
|
"name": "MeanUnderH0",
|
|
159
208
|
"alternateNames": [],
|
|
160
209
|
"isRequired": false,
|
|
161
|
-
"
|
|
210
|
+
"defaultValue": 0,
|
|
211
|
+
"rOrderNum": 11,
|
|
212
|
+
"lowerBound": 0,
|
|
213
|
+
"lowerBoundType": "INCLUSIVE",
|
|
214
|
+
"upperBound": 1,
|
|
215
|
+
"upperBoundType": "INCLUSIVE",
|
|
216
|
+
"allowNaN": false,
|
|
162
217
|
"description": "Specifies the mean under the null hypothesis",
|
|
218
|
+
"rDescription": "Specifies the mean under the null hypothesis",
|
|
163
219
|
"datatype": "NUMERIC",
|
|
164
220
|
"allowsLists": false,
|
|
221
|
+
"allowPadding": false,
|
|
165
222
|
"rName": "mean.under.h0",
|
|
166
223
|
"useInR": true,
|
|
167
|
-
"
|
|
224
|
+
"rDefaultValue" : ""
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"name": "Alpha",
|
|
228
|
+
"alternateNames": [],
|
|
229
|
+
"isRequired": false,
|
|
230
|
+
"defaultValue": 0.05,
|
|
231
|
+
"rOrderNum": 12,
|
|
232
|
+
"lowerBound": 0,
|
|
233
|
+
"lowerBoundType": "INCLUSIVE",
|
|
234
|
+
"upperBound": 1,
|
|
235
|
+
"upperBoundType": "INCLUSIVE",
|
|
236
|
+
"allowNaN": false,
|
|
237
|
+
"description": "Specifies the value of alpha in hypothesis test function",
|
|
238
|
+
"rDescription": "Specifies the value of alpha in hypothesis test function",
|
|
239
|
+
"datatype": "DOUBLE",
|
|
240
|
+
"allowsLists": false,
|
|
241
|
+
"allowPadding": false,
|
|
242
|
+
"rName": "alpha",
|
|
243
|
+
"useInR": true,
|
|
244
|
+
"rDefaultValue" : ""
|
|
168
245
|
}
|
|
169
246
|
]
|
|
170
|
-
}
|
|
247
|
+
}
|
|
@@ -59,5 +59,34 @@
|
|
|
59
59
|
"group_column" : "integer",
|
|
60
60
|
"partition_column_1" : "integer",
|
|
61
61
|
"partition_column_2" : "integer"
|
|
62
|
+
},
|
|
63
|
+
"onehot_encoder_train": {
|
|
64
|
+
"gender" : "varchar(20)",
|
|
65
|
+
"numb" : "integer"
|
|
66
|
+
},
|
|
67
|
+
"customer_segmentation_train": {
|
|
68
|
+
"ID" : "integer",
|
|
69
|
+
"Gender" : "varchar(10)",
|
|
70
|
+
"Ever_Married" : "varchar(10)",
|
|
71
|
+
"Age" : "integer",
|
|
72
|
+
"Graduated" : "varchar(10)",
|
|
73
|
+
"Profession" : "varchar(30)",
|
|
74
|
+
"Work_Experience" : "integer",
|
|
75
|
+
"Spending_Score" : "varchar(10)",
|
|
76
|
+
"Family_Size": "integer",
|
|
77
|
+
"Var_1": "varchar(10)",
|
|
78
|
+
"Segmentation": "varchar(2)"
|
|
79
|
+
},
|
|
80
|
+
"customer_segmentation_test": {
|
|
81
|
+
"ID" : "integer",
|
|
82
|
+
"Gender" : "varchar(10)",
|
|
83
|
+
"Ever_Married" : "varchar(10)",
|
|
84
|
+
"Age" : "integer",
|
|
85
|
+
"Graduated" : "varchar(10)",
|
|
86
|
+
"Profession" : "varchar(30)",
|
|
87
|
+
"Work_Experience" : "integer",
|
|
88
|
+
"Spending_Score" : "varchar(10)",
|
|
89
|
+
"Family_Size": "integer",
|
|
90
|
+
"Var_1": "varchar(10)"
|
|
62
91
|
}
|
|
63
92
|
}
|
|
@@ -19,5 +19,56 @@
|
|
|
19
19
|
"bathrms" : "real",
|
|
20
20
|
"stories" : "real"
|
|
21
21
|
|
|
22
|
-
}
|
|
22
|
+
},
|
|
23
|
+
"scale_attributes":{
|
|
24
|
+
|
|
25
|
+
"pid" : "integer",
|
|
26
|
+
"attribute_column" : "varchar(150)"
|
|
27
|
+
|
|
28
|
+
},
|
|
29
|
+
"scale_parameters":{
|
|
30
|
+
|
|
31
|
+
"pid" : "integer",
|
|
32
|
+
"parameter_column" : "varchar(150)",
|
|
33
|
+
"value_column" : "varchar(150)"
|
|
34
|
+
|
|
35
|
+
},
|
|
36
|
+
"scale_input_partitioned":{
|
|
37
|
+
"passenger" : "integer",
|
|
38
|
+
"pid" : "integer",
|
|
39
|
+
"survived" : "integer",
|
|
40
|
+
"pclass" : "integer",
|
|
41
|
+
"name" : "varchar(90)",
|
|
42
|
+
"gender" : "varchar(10)",
|
|
43
|
+
"age" : "integer",
|
|
44
|
+
"sibsp" : "integer",
|
|
45
|
+
"parch" : "integer",
|
|
46
|
+
"ticket" : "varchar(20)",
|
|
47
|
+
"fare" : "integer",
|
|
48
|
+
"cabin" : "varchar(20)",
|
|
49
|
+
"embarked" : "varchar(10)"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
"scale_input_sparse":
|
|
55
|
+
{
|
|
56
|
+
|
|
57
|
+
"passenger" : "integer",
|
|
58
|
+
"attribute_column" : "varchar(20)",
|
|
59
|
+
"attribute_value" : "real"
|
|
60
|
+
|
|
61
|
+
},
|
|
62
|
+
|
|
63
|
+
"scale_input_part_sparse":
|
|
64
|
+
{
|
|
65
|
+
|
|
66
|
+
"pid" : "integer",
|
|
67
|
+
"passenger" : "integer",
|
|
68
|
+
"attribute_column" : "varchar(20)",
|
|
69
|
+
"attribute_value" : "real"
|
|
70
|
+
|
|
23
71
|
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
pid,passenger,attribute_column,attribute_value
|
|
2
|
+
3,56,age,
|
|
3
|
+
3,56,fare,35.5
|
|
4
|
+
3,63,age,45.0
|
|
5
|
+
3,63,fare,83.475
|
|
6
|
+
3,67,age,29.0
|
|
7
|
+
3,67,fare,10.5
|
|
8
|
+
3,76,age,25.0
|
|
9
|
+
3,76,fare,7.65
|
|
10
|
+
3,93,age,46.0
|
|
11
|
+
3,93,fare,61.175
|
|
12
|
+
1,2,age,38.0
|
|
13
|
+
1,2,fare,71.2833
|
|
14
|
+
1,4,age,35.0
|
|
15
|
+
1,4,fare,53.1
|
|
16
|
+
1,7,age,54.0
|
|
17
|
+
1,7,fare,51.8625
|
|
18
|
+
1,11,age,4.0
|
|
19
|
+
1,11,fare,16.7
|
|
20
|
+
1,12,age,58.0
|
|
21
|
+
1,12,fare,26.55
|
|
22
|
+
2,22,age,34.0
|
|
23
|
+
2,22,fare,13.0
|
|
24
|
+
2,24,age,28.0
|
|
25
|
+
2,24,fare,35.5
|
|
26
|
+
2,32,age,
|
|
27
|
+
2,32,fare,146.5208
|
|
28
|
+
2,53,age,49.0
|
|
29
|
+
2,53,fare,76.7292
|
|
30
|
+
2,55,age,65.0
|
|
31
|
+
2,55,fare,61.9792
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
passenger,pid,survived,pclass,name,gender,age,sibsp,parch,ticket,fare,cabin,embarked
|
|
2
|
+
76,3,0,3,Moen; Mr. Sigurd Hansen,male,25.0,0,0,348123,7.65,F G73,S
|
|
3
|
+
32,2,1,1,Spencer; Mrs. William Augustus (Marie Eugenie),female,,1,0,PC 17569,146.5208,B78,C
|
|
4
|
+
55,2,0,1,Ostby; Mr. Engelhart Cornelius,male,65.0,0,1,113509,61.9792,B30,C
|
|
5
|
+
53,2,1,1,Harper; Mrs. Henry Sleeper (Myna Haxtun),female,49.0,1,0,PC 17572,76.7292,D33,C
|
|
6
|
+
93,3,0,1,Chaffee; Mr. Herbert Fuller,male,46.0,1,0,W.E.P. 5734,61.175,E31,S
|
|
7
|
+
11,1,1,3,Sandstrom; Miss. Marguerite Rut,female,4.0,1,1,PP 9549,16.7,G6,S
|
|
8
|
+
7,1,0,1,McCarthy; Mr. Timothy J,male,54.0,0,0,17463,51.8625,E46,S
|
|
9
|
+
24,2,1,1,Sloper; Mr. William Thompson,male,28.0,0,0,113788,35.5,A6,S
|
|
10
|
+
63,3,0,1,Harris; Mr. Henry Birkhardt,male,45.0,1,0,36973,83.475,C83,S
|
|
11
|
+
22,2,1,2,Beesley; Mr. Lawrence,male,34.0,0,0,248698,13.0,D56,S
|
|
12
|
+
56,3,1,1,Woolner; Mr. Hugh,male,,0,0,19947,35.5,C52,S
|
|
13
|
+
12,1,1,1,Bonnell; Miss. Elizabeth,female,58.0,0,0,113783,26.55,C103,S
|
|
14
|
+
2,1,1,1,Cumings; Mrs. John Bradley (Florence Briggs Thayer),female,38.0,1,0,PC 17599,71.2833,C85,C
|
|
15
|
+
67,3,1,2,Nye; Mrs. (Elizabeth Ramell),female,29.0,0,0,C.A. 29395,10.5,F33,S
|
|
16
|
+
4,1,1,1,Futrelle; Mrs. Jacques Heath (Lily May Peel),female,35.0,1,0,113803,53.1,C123,S
|
|
@@ -27,6 +27,11 @@ def get_values_list(values, ignore_none=True):
|
|
|
27
27
|
|
|
28
28
|
return ret_vals
|
|
29
29
|
|
|
30
|
+
if len(sys.argv) != 2:
|
|
31
|
+
sys.exit("Script command format: python deploy_script.py <enterprise/lake>")
|
|
32
|
+
|
|
33
|
+
vantage_type = sys.argv[1]
|
|
34
|
+
|
|
30
35
|
data_partition_column_values = []
|
|
31
36
|
data_partition_column_indices = [5, 6]
|
|
32
37
|
|
|
@@ -60,6 +65,20 @@ y = np.array(labels)
|
|
|
60
65
|
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
|
|
61
66
|
clf.fit(X, y)
|
|
62
67
|
|
|
63
|
-
|
|
68
|
+
model_str = pickle.dumps(clf)
|
|
69
|
+
|
|
70
|
+
# Prepare the corresponding model file name and extract model.
|
|
71
|
+
partition_join = "_".join([str(x) for x in data_partition_column_values])
|
|
72
|
+
# Replace '-' with '_' as '-' because partition_columns can be negative.
|
|
73
|
+
partition_join = partition_join.replace("-", "_")
|
|
74
|
+
|
|
75
|
+
if vantage_type == "lake":
|
|
76
|
+
model = f"/tmp/sklearn_model_{partition_join}.pickle"
|
|
77
|
+
with open(model, "wb") as fp:
|
|
78
|
+
fp.write(model_str)
|
|
79
|
+
elif vantage_type == "enterprise":
|
|
80
|
+
model = base64.b64encode(model_str)
|
|
81
|
+
else:
|
|
82
|
+
sys.exit("Invalid vantage type. Use either 'lake' or 'enterprise'.")
|
|
64
83
|
|
|
65
84
|
print(*(data_partition_column_values + [model]), sep=DELIMITER)
|