PyPI - teradataml - Versions diffs - 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl - Mend

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +306 -0
teradataml/__init__.py +10 -3
teradataml/_version.py +1 -1
teradataml/analytics/__init__.py +3 -2
teradataml/analytics/analytic_function_executor.py +299 -16
teradataml/analytics/analytic_query_generator.py +92 -0
teradataml/analytics/byom/__init__.py +3 -2
teradataml/analytics/json_parser/metadata.py +13 -3
teradataml/analytics/json_parser/utils.py +13 -6
teradataml/analytics/meta_class.py +40 -1
teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
teradataml/analytics/sqle/__init__.py +11 -2
teradataml/analytics/table_operator/__init__.py +4 -3
teradataml/analytics/uaf/__init__.py +21 -2
teradataml/analytics/utils.py +66 -1
teradataml/analytics/valib.py +1 -1
teradataml/automl/__init__.py +1502 -323
teradataml/automl/custom_json_utils.py +139 -61
teradataml/automl/data_preparation.py +247 -307
teradataml/automl/data_transformation.py +32 -12
teradataml/automl/feature_engineering.py +325 -86
teradataml/automl/model_evaluation.py +44 -35
teradataml/automl/model_training.py +122 -153
teradataml/catalog/byom.py +8 -8
teradataml/clients/pkce_client.py +1 -1
teradataml/common/__init__.py +2 -1
teradataml/common/constants.py +72 -0
teradataml/common/deprecations.py +13 -7
teradataml/common/garbagecollector.py +152 -120
teradataml/common/messagecodes.py +11 -2
teradataml/common/messages.py +4 -1
teradataml/common/sqlbundle.py +26 -4
teradataml/common/utils.py +225 -14
teradataml/common/wrapper_utils.py +1 -1
teradataml/context/context.py +82 -2
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/data/complaints_test_tokenized.csv +353 -0
teradataml/data/complaints_tokens_model.csv +348 -0
teradataml/data/covid_confirm_sd.csv +83 -0
teradataml/data/dataframe_example.json +27 -1
teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
teradataml/data/dwt2d_dataTable.csv +65 -0
teradataml/data/dwt_dataTable.csv +8 -0
teradataml/data/dwt_filterTable.csv +3 -0
teradataml/data/finance_data4.csv +13 -0
teradataml/data/grocery_transaction.csv +19 -0
teradataml/data/idwt2d_dataTable.csv +5 -0
teradataml/data/idwt_dataTable.csv +8 -0
teradataml/data/idwt_filterTable.csv +3 -0
teradataml/data/interval_data.csv +5 -0
teradataml/data/jsons/paired_functions.json +14 -0
teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
teradataml/data/load_example_data.py +8 -2
teradataml/data/medical_readings.csv +101 -0
teradataml/data/naivebayestextclassifier_example.json +1 -1
teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/peppers.png +0 -0
teradataml/data/real_values.csv +14 -0
teradataml/data/sax_example.json +8 -0
teradataml/data/scripts/deploy_script.py +1 -1
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
teradataml/data/star_pivot.csv +8 -0
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -1
teradataml/data/teradataml_example.json +20 -1
teradataml/data/timestamp_data.csv +4 -0
teradataml/data/titanic_dataset_unpivoted.csv +19 -0
teradataml/data/uaf_example.json +55 -1
teradataml/data/unpivot_example.json +15 -0
teradataml/data/url_data.csv +9 -0
teradataml/data/vectordistance_example.json +4 -0
teradataml/data/windowdfft.csv +16 -0
teradataml/dataframe/copy_to.py +1 -1
teradataml/dataframe/data_transfer.py +5 -3
teradataml/dataframe/dataframe.py +1002 -201
teradataml/dataframe/fastload.py +3 -3
teradataml/dataframe/functions.py +867 -0
teradataml/dataframe/row.py +160 -0
teradataml/dataframe/setop.py +2 -2
teradataml/dataframe/sql.py +840 -33
teradataml/dataframe/window.py +1 -1
teradataml/dbutils/dbutils.py +878 -34
teradataml/dbutils/filemgr.py +48 -1
teradataml/geospatial/geodataframe.py +1 -1
teradataml/geospatial/geodataframecolumn.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +13 -13
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
teradataml/opensource/_lightgbm.py +950 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
teradataml/options/__init__.py +9 -23
teradataml/options/configure.py +42 -4
teradataml/options/display.py +2 -2
teradataml/plot/axis.py +4 -4
teradataml/scriptmgmt/UserEnv.py +13 -9
teradataml/scriptmgmt/lls_utils.py +77 -23
teradataml/store/__init__.py +13 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2223 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/store/vector_store/__init__.py +1586 -0
teradataml/table_operators/Script.py +2 -2
teradataml/table_operators/TableOperator.py +106 -20
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +102 -56
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/table_operators/templates/dataframe_udf.template +63 -0
teradataml/telemetry_utils/__init__.py +0 -0
teradataml/telemetry_utils/queryband.py +52 -0
teradataml/utils/dtypes.py +4 -2
teradataml/utils/validators.py +34 -2
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0

teradataml/data/scripts/sklearn/sklearn_transform.py CHANGED Viewed

@@ -31,39 +31,83 @@ def splitter(strr, delim=",", convert_to="str"):
         return []
     return [convert_to_type(i, convert_to) for i in strr.split(delim)]
+def should_convert(t_val, py_type):
+    """
+    Function to check type of value and whether value is nan and infinity.
+    """
+    return not isinstance(t_val, eval(py_type)) and not math.isinf(t_val) and not math.isnan(t_val)
+def convert_value(t_val, py_type):
+    """
+    Function to convert value to specified python type.
+    """
+    return convert_to_type(t_val, py_type) if should_convert(t_val, py_type) else t_val
 # Process output returned by sklearn function.
-def get_output_data(trans_values, func_name, model_obj, n_c_labels):
-    # Converting sparse matrix to dense array as sparse matrices are NOT
+def get_output_data(trans_values, func_name, model_obj, n_c_labels, n_out_columns):
+    # Converting    sparse matrix to dense array as sparse matrices are NOT
     # supported in Vantage.
     module_name = model_obj.__module__.split("._")[0]
-    if type(trans_values).__name__ in ["csr_matrix", "csc_matrix"]:
-        trans_values = trans_values.toarray()
+    # Converting the translated values into corresponding the return column's
+    # python type.
+    if (func_name == "decision_path" or return_columns_python_types is None \
+            or not isinstance(trans_values, np.ndarray)):
+        trans_values_list = trans_values
+    else:
+        # Conversion.....
+        trans_values_list = []
+        for trans_value in trans_values.tolist():
+            if not isinstance(trans_value, list):
+                trans_value = [trans_value]
+            converted_list = []
+            if len(return_columns_python_types) == len(trans_value):
+                for t_val, py_type in zip(trans_value, return_columns_python_types):
+                    converted_list.append(convert_value(t_val, py_type))
+            ## transform() is having only 1 python return type, But it actually returns more than 1 column
+            else:
+                for t_val in trans_value:
+                    converted_list.append(convert_value(t_val, "".join(return_columns_python_types)))
+            trans_values_list.append(converted_list)
+    if type(trans_values_list).__name__ in ["csr_matrix", "csc_matrix"]:
+        trans_values_list = trans_values_list.toarray()
     if module_name == "sklearn.cross_decomposition" and n_c_labels > 0 and func_name == "transform":
         # For cross_decomposition, output is a tuple of arrays when label columns are provided
         # along with feature columns for transform function. In this case, concatenate the
         # arrays and return the combined values.
-        if isinstance(trans_values, tuple):
-            return np.concatenate(trans_values, axis=1).tolist()[0]
+        if isinstance(trans_values_list, tuple):
+            return np.concatenate(trans_values_list, axis=1).tolist()[0]
-    if isinstance(trans_values[0], np.ndarray) \
-            or isinstance(trans_values[0], list) \
-            or isinstance(trans_values[0], tuple):
+    if isinstance(trans_values_list[0], np.ndarray) \
+            or isinstance(trans_values_list[0], list) \
+            or isinstance(trans_values_list[0], tuple):
         # Here, the value returned by sklearn function is list type.
-        opt_list = list(trans_values[0])
+        opt_list = list(trans_values_list[0])
+        if len(opt_list) < n_out_columns:
+            # If the output list is less than the required number of columns, append
+            # empty strings to the list.
+            opt_list += [""] * (n_out_columns - len(opt_list))
         if func_name == "inverse_transform" and type(model_obj).__name__ == "MultiLabelBinarizer":
             # output array "trans_values[0]" may not be of same size. It should be of
             # maximum size of `model.classes_`
             # Append None to last elements.
             if len(opt_list) < len(model_obj.classes_):
                 opt_list += [""] * (len(model_obj.classes_) - len(opt_list))
         return opt_list
-    return [trans_values[0]]
+    # Only one element is returned by the function.
+    return [trans_values_list[0]]
 # Arguments to the Script
-if len(sys.argv) != 8:
-    # 8 arguments command line arguments should be passed to this file.
+if len(sys.argv) != 10:
+    # 10 arguments command line arguments should be passed to this file.
     # 1: file to be run
     # 2. function name (Eg. predict, fit etc)
     # 3. No of feature columns.
@@ -71,13 +115,17 @@ if len(sys.argv) != 8:
     # 5. Comma separated indices of partition columns.
     # 6. Comma separated types of all the data columns.
     # 7. Model file prefix to generated model file using partition columns.
-    # 8. Flag to check the system type. True, means Lake, Enterprise otherwise.
-    sys.exit("8 arguments should be passed to this file - file to be run, function name, "\
-             "no of feature columns, no of class labels, comma separated indices of partition "\
-             "columns, comma separated types of all columns, model file prefix to generate model "\
-             "file using partition columns and flag to check lake or enterprise.")
+    # 8. Number of columns to be returned by the sklearn's transform function.
+    # 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
+    # 10. Python types of returned/transfromed columns.
+    sys.exit("10 arguments should be passed to this file - file to be run, function name, "\
+                 "no of feature columns, no of class labels, comma separated indices of partition "\
+                 "columns, comma separated types of all columns, model file prefix to generate model "\
+                 "file using partition columns, number of columns to be returnd by sklearn's "\
+                 "transform function, flag to check lake or enterprise and Python types of "\
+                 "returned/transfromed columns.")
-is_lake_system = eval(sys.argv[7])
+is_lake_system = eval(sys.argv[8])
 if not is_lake_system:
     db = sys.argv[0].split("/")[1]
 func_name = sys.argv[1]
@@ -86,17 +134,29 @@ n_c_labels = int(sys.argv[3])
 data_column_types = splitter(sys.argv[5], delim="--")
 data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
 model_file_prefix = sys.argv[6]
+# sys.argv[9] will contain a string of python datatypes with '--'
+# separator OR a single datatype OR None in string format.
+ret_col_argv = sys.argv[9]
+if ret_col_argv == "None":
+    return_columns_python_types = eval(ret_col_argv)
+else:
+    return_columns_python_types = splitter(ret_col_argv, delim="--")
+no_of_output_columns = int(sys.argv[7])
 data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
 model = None
 data_partition_column_values = []
+all_rows_input = []
 # Data Format:
 # feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
 # data_partition_columnn.
 # label is optional (it is present when label_exists is not "None")
+model_name = ""
 while 1:
     try:
         line = input()
@@ -128,9 +188,48 @@ while 1:
                     sys.exit("Model file is not installed in Vantage.")
             f_ = values[:n_f_cols]
+            model_name = model.__class__.__name__
+            np_func_list = ["ClassifierChain", "EllipticEnvelope", "MinCovDet",
+                            "FeatureAgglomeration", "LabelBinarizer", "MultiLabelBinarizer",
+                            "BernoulliRBM"]
+            # MissingIndicator's transform() and SimpleImputer's inverse_transform() requires processing
+            # the entire dataset simultaneously, rather than on a row-by-row basis.
+            # Error getting during row-by-row processing of MissingIndicator -
+            # "ValueError: MissingIndicator does not support data with dtype <U13.
+            # Please provide either a numeric array (with a floating point or
+            # integer dtype) or categorical data represented ei
+            # Error getting during row-by-row processing of SimpleImputer -
+            # "IndexError: index 3 is out of bounds for axis 1 with size 3".
+            if ((model_name == "MissingIndicator" and func_name == "transform") or \
+                (model_name == "SimpleImputer" and func_name == "inverse_transform") or \
+                    (model_name in ["EllipticEnvelope", "MinCovDet"]
+                        and func_name == "correct_covariance")):
+                all_rows_input.append(f_)
+                continue
+            f__ = np.array([f_]) if model_name in np_func_list else [f_]
+            # transform() function in these functions generate different number of output columns and
+            # NULLS/NaNs are appended to the end of the output.
+            # If we run inverse_transform() on these models, it will take same number of input columns
+            # with NULLs/NaNs but those NULLs/NaNs should be ignored while reading the input to
+            # inverse_transform() function.
+            models_with_all_null_in_last_cols = ["SelectFpr", "SelectFdr", "SelectFwe", "SelectFromModel", "RFECV"]
+            if model_name in models_with_all_null_in_last_cols and func_name == "inverse_transform":
+                # Remove NULLs/NaNs from the end of one input row.
+                _f  = np.array([f_])
+                _f = _f[~np.isnan(_f)]
+                f__ = [_f.tolist()]
             if n_c_labels > 0:
                 # Labels are present in last column.
                 l_ = values[n_f_cols:n_f_cols+n_c_labels]
+                l__ = np.array([l_]) if model_name in np_func_list else [l_]
                 # predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
                 # in function call. Generally, 'y' is passed to return y along with actual output.
                 try:
@@ -138,23 +237,24 @@ while 1:
                     # used 'in' in if constion, as model.__module__ is giving
                     # 'sklearn.cross_decomposition._pls'.
                     if "cross_decomposition" in model.__module__:
-                        trans_values = getattr(model, func_name)(X=np.array([f_]), Y=np.array([l_]))
+                        trans_values = getattr(model, func_name)(X=f__, Y=l__)
                     else:
-                        trans_values = getattr(model, func_name)(X=np.array([f_]), y=np.array([l_]))
+                        trans_values = getattr(model, func_name)(X=f__, y=l__)
                 except TypeError as ex:
                     # Function which does not accept 'y' like predict_proba() raises error like
                     # "TypeError: predict_proba() takes 2 positional arguments but 3 were given".
-                    trans_values = getattr(model, func_name)(np.array([f_]))
+                    trans_values = getattr(model, func_name)(f__)
             else:
                 # If class labels do not exist in data, don't read labels, read just features.
-                trans_values = getattr(model, func_name)(np.array([f_]))
+                trans_values = getattr(model, func_name)(f__)
             result_list = f_
             if n_c_labels > 0 and func_name in ["predict", "decision_function"]:
                 result_list += l_
             result_list += get_output_data(trans_values=trans_values, func_name=func_name,
-                                           model_obj=model, n_c_labels=n_c_labels)
+                                           model_obj=model, n_c_labels=n_c_labels,
+                                           n_out_columns=no_of_output_columns)
             for i, val in enumerate(result_list):
                 if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
@@ -169,3 +269,41 @@ while 1:
     except EOFError:  # Exit if reached EOF or CTRL-D
         break
+# MissingIndicator and SimpleImputer needs processing of all the dataset at the same time, instead of row by row.
+# Hence, handling it outside of the while loop
+if model_name == "MissingIndicator" and func_name == "transform" or \
+    (model_name == "SimpleImputer" and func_name == "inverse_transform"):
+    if model_name == "SimpleImputer":
+        all_rows_input = np.array(all_rows_input)
+    m_out = getattr(model, func_name)(all_rows_input)
+    if type(m_out).__name__ in ["csr_matrix", "csc_matrix"]:
+        m_out = m_out.toarray()
+    for j in range(len(all_rows_input)):
+        m_out_list = get_output_data(trans_values=[m_out[j]], func_name=func_name,
+                                     model_obj=model, n_c_labels=n_c_labels,
+                                     n_out_columns=no_of_output_columns)
+        result_list = list(all_rows_input[j]) + list(m_out_list)
+        for i, val in enumerate(result_list):
+            if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
+                result_list[i] = ""
+            # MissingIndicator returns boolean values. Convert them to 0/1.
+            elif val == False:
+                result_list[i] = 0
+            elif val == True:
+                result_list[i] = 1
+        print(*(data_partition_column_values + result_list), sep=DELIMITER)
+## correct_covariance() requires processing of all the input rows at the same time.
+## It returns the output dataset  in (n_features, n_features) shape, i.e., based on
+## no. of columns.
+if model_name in ["EllipticEnvelope", "MinCovDet"] and func_name == "correct_covariance":
+    result_list = model.correct_covariance(np.array(all_rows_input))
+    for l, vals in enumerate(result_list):
+        print(*(data_partition_column_values + vals.tolist()), sep=DELIMITER)

teradataml/data/star_pivot.csv ADDED Viewed

@@ -0,0 +1,8 @@
+country,state,yr,qtr,sales,cogs,rating
+USA,CA,2001,Q1,30,15,A
+USA,NY,2001,Q1,45,25,D
+USA,CA,2001,Q2,50,20,A
+USA,CA,2001,Q2,5,5,B
+Canada,ON,2001,Q2,10,0,B
+Canada,BC,2001,Q3,15,0,A
+Canada,BC,2001,Q3,10,0,A

teradataml/data/target_udt_data.csv ADDED Viewed

@@ -0,0 +1,8 @@
+id,array_col
+1,"3.33e-05,0.2,0.1"
+2,"0.5,0.4,0.42"
+3,"1,0.8,0.9"
+4,"0.01,0.4,0.2"
+5,"0.93,0.4,0.7"
+6,"0.83,0.3,0.6"
+7,"0.73,0.5,0.7"

teradataml/data/templates/open_source_ml.json CHANGED Viewed

@@ -2,7 +2,9 @@
   "env_specs": [
     {
       "env_name": "openml_env",
-      "libs": "scikit-learn",
+      "libs": ["scikit-learn==1.5.1", "joblib==1.4.2", "numpy==1.23.5",
+               "scipy==1.14.0", "threadpoolctl==3.5.0", "lightgbm==3.3.3",
+               "pandas==2.2.3"],
       "desc": "DONT DELETE: OpenML environment"
     }
   ]

teradataml/data/teradataml_example.json CHANGED Viewed

@@ -1348,6 +1348,25 @@
     "radio":"FLOAT",
     "newspaper":"FLOAT",
     "sales":"FLOAT"
+  },
+  "timestamp_data":{
+    "id": "INTEGER",
+    "timestamp_col": "VARCHAR(50)",
+    "timestamp_col1": "BIGINT",
+    "format_col": "VARCHAR(50)",
+    "timezone_col": "VARCHAR(50)"
+  },
+  "interval_data":{
+    "id": "INTEGER",
+    "int_col": "BIGINT",
+    "value_col": "VARCHAR(30)",
+    "value_col1": "VARCHAR(30)",
+    "str_col1": "VARCHAR(30)",
+    "str_col2": "VARCHAR(30)"
+  },
+  "url_data": {
+    "id": "INTEGER",
+    "urls": "VARCHAR(60)",
+    "part": "VARCHAR(20)"
   }
 }

teradataml/data/timestamp_data.csv ADDED Viewed

@@ -0,0 +1,4 @@
+id,timestamp_col,timestamp_col1,format_col,timezone_col
+0,"2015-01-08 00:00:12.2",123456,"YYYY-MM-DD HH24:MI:SS.FF6","GMT"
+1,"2015-01-08 13:00",878986,"YYYY-MM-DD HH24:MI","America Pacific"
+2,"2015-01-08 00:00:12.2+10:00",45678910234,"YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM","GMT+10"

teradataml/data/titanic_dataset_unpivoted.csv ADDED Viewed

@@ -0,0 +1,19 @@
+passenger,AttributeName,AttributeValue,survived
+61,pclass,3,0
+1000,pclass,3,1
+40,pclass,3,1
+21,pclass,2,0
+61,gender,male,0
+1000,gender,,1
+40,gender,female,1
+21,gender,male,0
+2,pclass,1,1
+16,pclass,2,1
+7,pclass,1,0
+2,gender,female,1
+16,gender,female,1
+7,gender,male,0
+10,pclass,2,1
+4,pclass,1,1
+10,gender,female,1
+4,gender,female,1

teradataml/data/uaf_example.json CHANGED Viewed

@@ -471,5 +471,59 @@
     "CONF_OFF_v": "FLOAT",
     "CONF_LOW_v": "FLOAT",
     "CONF_HI_v": "FLOAT"
+  },
+  "dwt_dataTable":{
+	  "id": "INTEGER",
+	  "rowi": "INTEGER",
+	  "v": "FLOAT"
+  },
+  "dwt_filterTable":{
+	  "id": "INTEGER",
+	  "seq": "INTEGER",
+	  "lo": "FLOAT",
+	  "hi": "FLOAT"
+  },
+  "idwt_dataTable":{
+	  "id": "INTEGER",
+	  "rowi": "INTEGER",
+	  "approx": "FLOAT",
+	  "detail": "FLOAT"
+  },
+  "idwt_filterTable":{
+	  "id": "INTEGER",
+	  "seq": "INTEGER",
+	  "lo": "FLOAT",
+	  "hi": "FLOAT"
+  },
+   "dwt2d_dataTable":{
+	  "id": "INTEGER",
+	  "x": "INTEGER",
+	  "y": "INTEGER",
+	  "v": "FLOAT"
+  },
+  "idwt2d_dataTable":{
+	  "id": "INTEGER",
+	  "x": "INTEGER",
+	  "y": "INTEGER",
+	  "v": "FLOAT"
+  },
+  "covid_confirm_sd":{
+	  "city": "VARCHAR(15)",
+      "row_axis": "INTEGER",
+      "cnumber": "INTEGER"
+  },
+  "real_values":{
+	  "TD_TIMECODE": "TIMESTAMP(0)",
+      "id": "INTEGER",
+      "val": "FLOAT",
+      "<PTI_CLAUSE>": "(TIMESTAMP(0), DATE '2020-01-01', HOURS(1), COLUMNS(id), nonsequenced)"
+  },
+  "windowdfft":{
+    "id": "INTEGER",
+    "row_i": "INTEGER",
+    "v1": "FLOAT",
+    "v2": "FLOAT",
+    "v3": "FLOAT",
+    "v4": "FLOAT"
   }
-}
+}

teradataml/data/unpivot_example.json CHANGED Viewed

@@ -6,5 +6,20 @@
     "temp": "integer",
     "pressure": "real",
     "dewpoint": "varchar(30)"
+  },
+  "titanic_dataset_unpivoted":{
+    "passenger": "integer",
+    "AttributeName": "varchar(30)",
+    "AttributeValue": "varchar(30)",
+    "survived": "integer"
+  },
+  "star_pivot":{
+    "country": "varchar(30)",
+    "state": "varchar(30)",
+    "yr": "integer",
+    "qtr": "varchar(30)",
+    "sales": "integer",
+    "cogs": "integer",
+    "rating": "varchar(30)"
   }
 }

teradataml/data/url_data.csv ADDED Viewed

@@ -0,0 +1,9 @@
+"id","urls","part"
+0,"http://example.com:8080/path","FILE"
+1,"ftp://example.net:21/path","PATH"
+2,"https://example.net/path4/path5/path6?query4=value4#fragment3","REF"
+3,"https://www.facebook.com","HOST"
+4,"https://teracloud-pod-services-pod-account-service.dummyvalue.production.pods.teracloud.ninja/v1/accounts/acc-dummyvalue/user-environment-service/api/v1/","QUERY"
+5,"http://pg.example.ml/path150#fragment90","AUTHORITY"
+6,"smtp://user:password@smtp.example.com:21/file.txt","USERINFO"
+7,"https://www.google.com","PROTOCOL"

teradataml/data/vectordistance_example.json CHANGED Viewed

@@ -22,5 +22,9 @@
     "CallDuration": "REAL",
     "DataCounter": "REAL",
     "SMS": "REAL"
+  },
+  "target_udt_data":{
+    "id": "INTEGER",
+    "array_col":"AIVector"
   }
 }

teradataml/data/windowdfft.csv ADDED Viewed

@@ -0,0 +1,16 @@
+id,row_i,v1,v2,v3,v4
+3,1,0.0,1.4,1.0,1.0
+3,2,1.0,2.4,2.0,2.0
+3,3,2.0,3.4,3.0,3.0
+3,4,3.0,4.6,4.0,4.0
+3,5,0.0,5.9,5.0,5.0
+3,6,1.0,6.7,6.0,6.0
+3,7,2.0,7.7,7.0,7.0
+3,8,3.0,8.7,8.0,8.0
+3,9,0.0,9.9,9.0,9.0
+3,10,1.0,10.2,10.0,10.0
+3,11,2.0,11.2,11.0,11.0
+3,12,3.0,12.2,12.0,12.0
+3,13,1.0,10.2,13.0,13.0
+3,14,2.0,11.2,14.0,14.0
+3,15,3.0,12.2,15.0,15.0

teradataml/dataframe/copy_to.py CHANGED Viewed

@@ -30,7 +30,7 @@ from teradatasql import OperationalError
 from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
 from teradataml.utils.utils import execute_sql
 from teradataml.utils.validators import _Validators
-from teradatasqlalchemy.telemetry.queryband import collect_queryband
+from teradataml.telemetry_utils.queryband import collect_queryband
 @collect_queryband(queryband="CpToSql")

teradataml/dataframe/data_transfer.py CHANGED Viewed

@@ -27,7 +27,7 @@ from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
 from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
 from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
 from teradataml.utils.validators import _Validators
-from teradatasqlalchemy.telemetry.queryband import collect_queryband
+from teradataml.telemetry_utils.queryband import collect_queryband
 @collect_queryband(queryband="fstExprt")
@@ -1958,7 +1958,8 @@ class _DataTransferUtils():
             dt_obj = _DataTransferUtils(df)
             ins_query = dt_obj._table_exists()
         """
-        return con.dialect.has_table(get_connection(), self.table_name, self.schema_name)
+        return con.dialect.has_table(get_connection(), self.table_name, self.schema_name,
+                                     table_only=True)
     def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
         """
@@ -2144,7 +2145,8 @@ class _DataTransferUtils():
             # drop the tables created by FastloadCSV.
             if not self.save_errors:
                 for table in fastloadcsv_err_tables:
-                    if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name):
+                    if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name,
+                                              table_only=True):
                         UtilFuncs._drop_table(self._get_fully_qualified_table_name(table))
                     err_warn_dict.update({"fastloadcsv_error_tables": []})
                 return err_warn_dict

teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.3py3-none-any.whl