tdfs4ds 0.2.4.46__py3-none-any.whl → 0.2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,152 +0,0 @@
1
- import teradataml as tdml
2
-
3
- def plot_graph(tddf, root_name='ml__'):
4
- """
5
- Visualizes a given dataframe's source-target relationships using a Sankey diagram.
6
-
7
- :param df: pd.DataFrame
8
- The input dataframe should have two columns: 'source' and 'target'.
9
- Each row represents a relationship between a source and a target.
10
-
11
- :Note: This function makes use of Plotly's Sankey diagram representation for visualization.
12
-
13
- :return: None
14
- The function outputs the Sankey diagram and doesn't return anything.
15
- """
16
-
17
- tddf._DataFrame__execute_node_and_set_table_name(tddf._nodeid, tddf._metaexpr)
18
-
19
- df, node_info = analyze_sql_query(tddf.show_query(), df=None, target=tddf._table_name, root_name=root_name)
20
-
21
- if df['source'].values[0].lower() == df['target'].values[0].lower():
22
- df = df.iloc[1::, :]
23
-
24
- # Create a list of unique labels combining sources and targets from the dataframe
25
- labels = list(pd.concat([df['source'], df['target']]).unique())
26
-
27
- # Creating a mapping of node labels to additional information
28
- node_info_dict = pd.DataFrame(node_info).set_index('target').T.to_dict()
29
-
30
- # Create hovertext for each label using the node_info_map
31
- hovertexts = [
32
- f"Columns:<br> {','.join(node_info_dict[label]['columns'])}<br> Query: {sqlparse.format(node_info_dict[label]['query'], reindent=True, keyword_case='upper')}".replace(
33
- '\n', '<br>').replace('PARTITION BY', '<br>PARTITION BY').replace('USING', '<br>USING').replace(' ON',
34
- '<br>ON').replace(') ',')<br>').replace(')<br>AS',') AS').replace(', ',',<br>')
35
-
36
- if label in node_info_dict else '' for label in labels]
37
-
38
- # Use the length of 'columns' for the value (thickness) of each link
39
- values = df['source'].apply(lambda x: len(node_info_dict[x]['columns']) if x in node_info_dict else 1)
40
-
41
- # Convert source and target names to indices based on their position in the labels list
42
- source_indices = df['source'].apply(lambda x: labels.index(x))
43
- target_indices = df['target'].apply(lambda x: labels.index(x))
44
-
45
- # Construct the Sankey diagram with nodes (sources & targets) and links (relationships)
46
- fig = go.Figure(data=[go.Sankey(
47
- node=dict(
48
- pad=15, # Space between the nodes
49
- thickness=20, # Node thickness
50
- line=dict(color="black", width=0.5), # Node border properties
51
- label=labels, # Labels for nodes
52
- color="blue", # Node color
53
- # hovertext=link_hovertexts # set hover text for nodes
54
- customdata=hovertexts,
55
- hovertemplate=' %{customdata}<extra></extra>',
56
- ),
57
- link=dict(
58
- source=source_indices, # Link sources
59
- target=target_indices, # Link targets
60
- value=values # [1] * len(df) # Assuming equal "flow" for each link. Can be modified if needed.
61
- )
62
- )])
63
-
64
- # Customize the layout, such as setting the title and font size
65
- fig.update_layout(title_text="Hierarchical Data Visualization", font_size=10)
66
-
67
- # Display the Sankey diagram
68
- fig.show()
69
-
70
- return df
71
- def materialize_view(tddf, view_name, schema_name):
72
- """
73
- Materializes a given teradataml DataFrame as a view in the database with sub-views, if needed. This function
74
- helps in creating nested views, where complex views are broken down into simpler sub-views to simplify debugging
75
- and optimization. Each sub-view is named based on the main view's name with an additional suffix.
76
-
77
- Parameters:
78
- :param tddf: teradataml.DataFrame
79
- The teradataml dataframe whose view needs to be materialized.
80
- :param view_name: str
81
- The name of the main view to be created.
82
- :param schema_name: str
83
- The schema in which the view should be created.
84
-
85
- Returns:
86
- :return: teradataml.DataFrame
87
- A teradataml DataFrame representation of the created view.
88
-
89
- Notes:
90
- This function is specific to the teradataml library, and assumes the existence of certain attributes in the input DataFrame.
91
- """
92
-
93
- # Create the _table_name attribute for the teradataml DataFrame if it doesn't exist
94
- tddf._DataFrame__execute_node_and_set_table_name(tddf._nodeid, tddf._metaexpr)
95
-
96
- # Generate the dependency graph for the input DataFrame's SQL representation
97
- tddf_graph, _ = analyze_sql_query(tddf.show_query(), target=tddf._table_name)
98
-
99
- # Generate new names for sub-views based on the main view's name and store in a mapping dictionary
100
- mapping = {n: schema_name + '.' + view_name + '_sub_' + str(i) for i, n in enumerate(tddf_graph['target'].values)}
101
-
102
- # Replace or create the sub-views with their new names in the database
103
- for old_name, new_name in reversed(mapping.items()):
104
- query = tdml.execute_sql(f"SHOW VIEW {old_name}").fetchall()[0][0].replace('\r','\n').lower()
105
- query = query.replace('create', 'replace')
106
- for old_sub_name, new_sub_name in mapping.items():
107
- query = query.replace(old_sub_name.lower(), new_sub_name.lower())
108
- #print(query)
109
- print('REPLACE VIEW ', new_name)
110
- tdml.execute_sql(query)
111
-
112
- # Construct the final view by replacing the old names with new ones in the SQL representation
113
- mapping[new_name] = view_name
114
- #query = tdml.execute_sql(f"SHOW VIEW {tddf._table_name}").fetchall()[0][0].replace('\r','\n').lower()
115
- #query = f'replace view {schema_name}.{view_name} AS \n' + query
116
- for old_name, new_name in mapping.items():
117
- query = query.replace(old_name.lower(), new_name.lower())
118
-
119
- # Execute the final query to create the main view
120
- #print(query)
121
- print('REPLACE VIEW ', view_name)
122
- tdml.execute_sql(query)
123
-
124
-
125
- # Return a teradataml DataFrame representation of the created view
126
- return tdml.DataFrame(tdml.in_schema(schema_name, view_name))
127
-
128
-
129
- def crystallize_view(tddf, view_name, schema_name):
130
-
131
- """
132
- Crystallizes a given teradataml DataFrame as a view in the database with sub-views, if needed. This function
133
- helps in creating nested views, where complex views are broken down into simpler sub-views to simplify debugging
134
- and optimization. Each sub-view is named based on the main view's name with an additional suffix.
135
-
136
- Parameters:
137
- :param tddf: teradataml.DataFrame
138
- The teradataml dataframe whose view needs to be materialized.
139
- :param view_name: str
140
- The name of the main view to be created.
141
- :param schema_name: str
142
- The schema in which the view should be created.
143
-
144
- Returns:
145
- :return: teradataml.DataFrame
146
- A teradataml DataFrame representation of the created view.
147
-
148
- Notes:
149
- This function is specific to the teradataml library, and assumes the existence of certain attributes in the input DataFrame.
150
- """
151
-
152
- return materialize_view(tddf, view_name, schema_name)