teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +196 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +79 -4
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +1 -0
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/automl/data_preparation.py +3 -2
- teradataml/automl/feature_engineering.py +15 -7
- teradataml/automl/model_training.py +39 -33
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +35 -0
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +8 -2
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +25 -3
- teradataml/common/utils.py +134 -9
- teradataml/context/context.py +20 -10
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/dataframe.py +543 -175
- teradataml/dataframe/functions.py +553 -25
- teradataml/dataframe/sql.py +184 -15
- teradataml/dbutils/dbutils.py +556 -18
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
- teradataml/options/__init__.py +7 -23
- teradataml/options/configure.py +29 -3
- teradataml/scriptmgmt/UserEnv.py +3 -3
- teradataml/scriptmgmt/lls_utils.py +74 -21
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +33 -1
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
teradataml/dataframe/sql.py
CHANGED
|
@@ -222,6 +222,10 @@ class _MetaExpression(object):
|
|
|
222
222
|
def __repr__(self):
|
|
223
223
|
return repr(self.__t)
|
|
224
224
|
|
|
225
|
+
def _get_table_expr(self):
|
|
226
|
+
return self.__t
|
|
227
|
+
|
|
228
|
+
|
|
225
229
|
class _PandasTableExpression(TableExpression):
|
|
226
230
|
|
|
227
231
|
def _assign(self, drop_columns, **kw):
|
|
@@ -484,6 +488,7 @@ class _SQLTableExpression(_PandasTableExpression):
|
|
|
484
488
|
columns = []
|
|
485
489
|
for c in kw['column_order']:
|
|
486
490
|
name = c.strip()
|
|
491
|
+
# Get case-insensitive column names from Table object.
|
|
487
492
|
col = table.c.get(name, table.c.get(name.lower(), table.c.get(name.upper())))
|
|
488
493
|
|
|
489
494
|
if col is None:
|
|
@@ -5473,7 +5478,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5473
5478
|
self._env_name = kw.get("env_name", None)
|
|
5474
5479
|
self._delimiter = kw.get("delimiter", None)
|
|
5475
5480
|
self._quotechar = kw.get("quotechar", None)
|
|
5476
|
-
self.
|
|
5481
|
+
self._udf_script = kw.get("udf_script", None)
|
|
5482
|
+
self.alias_name = self.compile() if (self._udf or self._udf_script) is None else None
|
|
5477
5483
|
|
|
5478
5484
|
@property
|
|
5479
5485
|
def expression(self):
|
|
@@ -5653,23 +5659,23 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5653
5659
|
"""
|
|
5654
5660
|
Calls the compile method of the underlying sqlalchemy.Column
|
|
5655
5661
|
"""
|
|
5656
|
-
|
|
5657
|
-
|
|
5658
|
-
|
|
5659
|
-
|
|
5660
|
-
|
|
5661
|
-
|
|
5662
|
-
|
|
5663
|
-
|
|
5664
|
-
|
|
5665
|
-
return str(self.expression.compile(*args, **
|
|
5662
|
+
kw_new = dict({'dialect': td_dialect(),
|
|
5663
|
+
'compile_kwargs':
|
|
5664
|
+
{
|
|
5665
|
+
'include_table': False,
|
|
5666
|
+
'literal_binds': True
|
|
5667
|
+
}
|
|
5668
|
+
})
|
|
5669
|
+
if len(kw) != 0:
|
|
5670
|
+
kw_new.update(kw)
|
|
5671
|
+
return str(self.expression.compile(*args, **kw_new))
|
|
5666
5672
|
|
|
5667
5673
|
def compile_label(self, label):
|
|
5668
5674
|
"""
|
|
5669
5675
|
DESCRIPTION:
|
|
5670
5676
|
Compiles expression with label, by calling underlying sqlalchemy methods.
|
|
5671
5677
|
|
|
5672
|
-
|
|
5678
|
+
PARAMETERS:
|
|
5673
5679
|
label:
|
|
5674
5680
|
Required Argument.
|
|
5675
5681
|
Specifies the label to be used to alias the compiled expression.
|
|
@@ -5699,7 +5705,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
5699
5705
|
with the "value". Use this function either to replace or remove
|
|
5700
5706
|
NA from Column.
|
|
5701
5707
|
|
|
5702
|
-
|
|
5708
|
+
PARAMETERS:
|
|
5703
5709
|
value:
|
|
5704
5710
|
Required Argument.
|
|
5705
5711
|
Specifies the replacement value for null values in the column.
|
|
@@ -6186,12 +6192,19 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
6186
6192
|
# If user has not passed any type, then set it to
|
|
6187
6193
|
# NullType().
|
|
6188
6194
|
type = sqlalc.sql.sqltypes.NullType()
|
|
6189
|
-
|
|
6195
|
+
# Boolean flag to treat function as an instance method.
|
|
6196
|
+
function_has_col_caller = column_function
|
|
6190
6197
|
# Generate the function syntax based on whether the
|
|
6191
6198
|
# function is column function or not.
|
|
6192
6199
|
if column_function:
|
|
6193
6200
|
name = quoted_name("{}.{}".format(col_name, func_name),
|
|
6194
6201
|
False)
|
|
6202
|
+
# Dynamic function gets called on teradataml._SQLColumnExpression type object.
|
|
6203
|
+
# 'expression' attribute of _SQLColumnExpression object holds
|
|
6204
|
+
# corresponding SQLAlchemy.Expression type object.
|
|
6205
|
+
# SQLAlchemy.Expression type object should be available from FunctionElement.
|
|
6206
|
+
# This 'func_caller' attribute points to that Expression object.
|
|
6207
|
+
func_caller = self.expression
|
|
6195
6208
|
else:
|
|
6196
6209
|
name = quoted_name(func_name, False)
|
|
6197
6210
|
|
|
@@ -10809,4 +10822,160 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
10809
10822
|
whens = case([((self != 0) & (base != 0) & (base.ln() != 0),
|
|
10810
10823
|
(self.ln() / base.ln()).cast(FLOAT))])
|
|
10811
10824
|
|
|
10812
|
-
return whens
|
|
10825
|
+
return whens
|
|
10826
|
+
|
|
10827
|
+
def isnan(self):
|
|
10828
|
+
"""
|
|
10829
|
+
DESCRIPTION:
|
|
10830
|
+
Function evaluates a variable or expression to determine if the
|
|
10831
|
+
floating-point argument is a NaN (Not-a-Number) value. When a database
|
|
10832
|
+
table contains a NaN value, the data is undefined and unrepresentable
|
|
10833
|
+
in floating-point arithmetic. For example, division by 0, or the square root
|
|
10834
|
+
of a negative number would return a NaN result.
|
|
10835
|
+
|
|
10836
|
+
RETURNS:
|
|
10837
|
+
ColumnExpression.
|
|
10838
|
+
|
|
10839
|
+
EXAMPLES:
|
|
10840
|
+
# Load the data to run the example.
|
|
10841
|
+
>>> load_example_data("teradataml","titanic")
|
|
10842
|
+
|
|
10843
|
+
# Create a DataFrame on 'titanic' table.
|
|
10844
|
+
>>> titanic = DataFrame.from_table('titanic')
|
|
10845
|
+
>>> df = titanic.select(["passenger", "age", "fare"])
|
|
10846
|
+
>>> print(df)
|
|
10847
|
+
age fare
|
|
10848
|
+
passenger
|
|
10849
|
+
326 36.0 135.6333
|
|
10850
|
+
183 9.0 31.3875
|
|
10851
|
+
652 18.0 23.0000
|
|
10852
|
+
40 14.0 11.2417
|
|
10853
|
+
774 NaN 7.2250
|
|
10854
|
+
366 30.0 7.2500
|
|
10855
|
+
509 28.0 22.5250
|
|
10856
|
+
795 25.0 7.8958
|
|
10857
|
+
61 22.0 7.2292
|
|
10858
|
+
469 NaN 7.7250
|
|
10859
|
+
>>>
|
|
10860
|
+
|
|
10861
|
+
# Example 1: Find whether 'fare' column contains NaN values or not.
|
|
10862
|
+
>>> nan_df = df.assign(nanornot = df.fare.isnan())
|
|
10863
|
+
>>> print(nan_df)
|
|
10864
|
+
age fare nanornot
|
|
10865
|
+
passenger
|
|
10866
|
+
326 36.0 135.6333 0
|
|
10867
|
+
183 9.0 31.3875 0
|
|
10868
|
+
652 18.0 23.0000 0
|
|
10869
|
+
40 14.0 11.2417 0
|
|
10870
|
+
774 NaN 7.2250 0
|
|
10871
|
+
366 30.0 7.2500 0
|
|
10872
|
+
509 28.0 22.5250 0
|
|
10873
|
+
795 25.0 7.8958 0
|
|
10874
|
+
61 22.0 7.2292 0
|
|
10875
|
+
469 NaN 7.7250 0
|
|
10876
|
+
>>>
|
|
10877
|
+
"""
|
|
10878
|
+
return _SQLColumnExpression(literal_column(f"TD_ISNAN({self.compile()})"), type=INTEGER)
|
|
10879
|
+
|
|
10880
|
+
def isinf(self):
|
|
10881
|
+
"""
|
|
10882
|
+
DESCRIPTION:
|
|
10883
|
+
Function evaluates a variable or expression to determine if the
|
|
10884
|
+
floating-point argument is an infinite number. This function determines
|
|
10885
|
+
if a database table contains positive or negative infinite values.
|
|
10886
|
+
|
|
10887
|
+
RETURNS:
|
|
10888
|
+
ColumnExpression.
|
|
10889
|
+
|
|
10890
|
+
EXAMPLES:
|
|
10891
|
+
# Load the data to run the example.
|
|
10892
|
+
>>> load_example_data("teradataml","titanic")
|
|
10893
|
+
|
|
10894
|
+
# Create a DataFrame on 'titanic' table.
|
|
10895
|
+
>>> titanic = DataFrame.from_table('titanic')
|
|
10896
|
+
>>> df = titanic.select(["passenger", "age", "fare"])
|
|
10897
|
+
>>> print(df)
|
|
10898
|
+
age fare
|
|
10899
|
+
passenger
|
|
10900
|
+
326 36.0 135.6333
|
|
10901
|
+
183 9.0 31.3875
|
|
10902
|
+
652 18.0 23.0000
|
|
10903
|
+
40 14.0 11.2417
|
|
10904
|
+
774 NaN 7.2250
|
|
10905
|
+
366 30.0 7.2500
|
|
10906
|
+
509 28.0 22.5250
|
|
10907
|
+
795 25.0 7.8958
|
|
10908
|
+
61 22.0 7.2292
|
|
10909
|
+
469 NaN 7.7250
|
|
10910
|
+
>>>
|
|
10911
|
+
|
|
10912
|
+
# Example 1: Find whether 'fare' column contains infinity values or not.
|
|
10913
|
+
>>> inf_df = df.assign(infornot = df.fare.isinf())
|
|
10914
|
+
>>> print(inf_df)
|
|
10915
|
+
age fare infornot
|
|
10916
|
+
passenger
|
|
10917
|
+
326 36.0 135.6333 0
|
|
10918
|
+
183 9.0 31.3875 0
|
|
10919
|
+
652 18.0 23.0000 0
|
|
10920
|
+
40 14.0 11.2417 0
|
|
10921
|
+
774 NaN 7.2250 0
|
|
10922
|
+
366 30.0 7.2500 0
|
|
10923
|
+
509 28.0 22.5250 0
|
|
10924
|
+
795 25.0 7.8958 0
|
|
10925
|
+
61 22.0 7.2292 0
|
|
10926
|
+
469 NaN 7.7250 0
|
|
10927
|
+
>>>
|
|
10928
|
+
"""
|
|
10929
|
+
return _SQLColumnExpression(literal_column(f"TD_ISINF({self.compile()})"), type=INTEGER)
|
|
10930
|
+
|
|
10931
|
+
def isfinite(self):
|
|
10932
|
+
"""
|
|
10933
|
+
DESCRIPTION:
|
|
10934
|
+
Function evaluates a variable or expression to determine if
|
|
10935
|
+
it is a finite floating value. A finite floating value is not
|
|
10936
|
+
a NaN (Not a Number) value and is not an infinity value.
|
|
10937
|
+
|
|
10938
|
+
RETURNS:
|
|
10939
|
+
ColumnExpression.
|
|
10940
|
+
|
|
10941
|
+
EXAMPLES:
|
|
10942
|
+
# Load the data to run the example.
|
|
10943
|
+
>>> load_example_data("teradataml","titanic")
|
|
10944
|
+
|
|
10945
|
+
# Create a DataFrame on 'titanic' table.
|
|
10946
|
+
>>> titanic = DataFrame.from_table('titanic')
|
|
10947
|
+
>>> df = titanic.select(["passenger", "age", "fare"])
|
|
10948
|
+
>>> print(df)
|
|
10949
|
+
age fare
|
|
10950
|
+
passenger
|
|
10951
|
+
326 36.0 135.6333
|
|
10952
|
+
183 9.0 31.3875
|
|
10953
|
+
652 18.0 23.0000
|
|
10954
|
+
40 14.0 11.2417
|
|
10955
|
+
774 NaN 7.2250
|
|
10956
|
+
366 30.0 7.2500
|
|
10957
|
+
509 28.0 22.5250
|
|
10958
|
+
795 25.0 7.8958
|
|
10959
|
+
61 22.0 7.2292
|
|
10960
|
+
469 NaN 7.7250
|
|
10961
|
+
>>>
|
|
10962
|
+
|
|
10963
|
+
# Example 1: Find whether 'fare' column contains finite values or not.
|
|
10964
|
+
>>> finite_df = df.assign(finiteornot = df.fare.isfinite())
|
|
10965
|
+
>>> print(finite_df)
|
|
10966
|
+
age fare finiteornot
|
|
10967
|
+
passenger
|
|
10968
|
+
530 23.0 11.500 1
|
|
10969
|
+
591 35.0 7.125 1
|
|
10970
|
+
387 1.0 46.900 1
|
|
10971
|
+
856 18.0 9.350 1
|
|
10972
|
+
244 22.0 7.125 1
|
|
10973
|
+
713 48.0 52.000 1
|
|
10974
|
+
448 34.0 26.550 1
|
|
10975
|
+
122 NaN 8.050 1
|
|
10976
|
+
734 23.0 13.000 1
|
|
10977
|
+
265 NaN 7.750 1
|
|
10978
|
+
>>>
|
|
10979
|
+
|
|
10980
|
+
"""
|
|
10981
|
+
return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)
|