topologicpy 0.8.98__py3-none-any.whl → 0.8.99__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
topologicpy/Graph.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2025
1
+ # Copyright (C) 2026
2
2
  # Wassim Jabi <wassim.jabi@gmail.com>
3
3
  #
4
4
  # This program is free software: you can redistribute it and/or modify it under
@@ -3102,8 +3102,349 @@ class Graph:
3102
3102
  zMax = zMax,
3103
3103
  tolerance = tolerance
3104
3104
  )
3105
+
3105
3106
  @staticmethod
3106
3107
  def ByCSVPath(path,
3108
+ graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat", graphFeaturesKeys=None,
3109
+ edgeSRCHeader="src_id", edgeDSTHeader="dst_id", edgeLabelHeader="label",
3110
+ edgeTrainMaskHeader="train_mask", edgeValidateMaskHeader="val_mask", edgeTestMaskHeader="test_mask",
3111
+ edgeFeaturesHeader="feat", edgeFeaturesKeys=None,
3112
+ nodeIDHeader="node_id", nodeLabelHeader="label",
3113
+ nodeTrainMaskHeader="train_mask", nodeValidateMaskHeader="val_mask", nodeTestMaskHeader="test_mask",
3114
+ nodeFeaturesHeader="feat", nodeXHeader="X", nodeYHeader="Y", nodeZHeader="Z",
3115
+ nodeFeaturesKeys=None,
3116
+ tolerance=0.0001, silent=False):
3117
+ """
3118
+ Imports TopologicPy graphs from a folder containing CSV files (graphs.csv, nodes.csv, edges.csv)
3119
+ exported using the *new* TopologicPy CSV format for PyTorch Geometric / DGL-style datasets.
3120
+
3121
+ New format changes handled
3122
+ --------------------------
3123
+ - Graph, node, and edge features are stored in *separate numeric columns*:
3124
+ <featuresHeader>_0, <featuresHeader>_1, <featuresHeader>_2, ...
3125
+ (instead of a single comma-separated string column).
3126
+
3127
+ - Graph label and graph features are embedded in the returned graph's dictionary.
3128
+
3129
+ Parameters
3130
+ ----------
3131
+ path : str
3132
+ The path to the folder containing graphs.csv, nodes.csv, edges.csv.
3133
+ graphIDHeader : str , optional
3134
+ The graph id column header. Default is "graph_id".
3135
+ graphLabelHeader : str , optional
3136
+ The graph label column header. Default is "label".
3137
+ graphFeaturesHeader : str , optional
3138
+ The graph features prefix. Feature columns start with "<graphFeaturesHeader>_". Default is "feat".
3139
+ graphFeaturesKeys : list , optional
3140
+ If provided, these keys are used to store graph features in the graph dictionary.
3141
+ Length must match number of graph feature columns found. If None, keys will be
3142
+ "<graphFeaturesHeader>_0", "<graphFeaturesHeader>_1", ...
3143
+ edgeSRCHeader : str , optional
3144
+ Edge source node id header. Default is "src_id".
3145
+ edgeDSTHeader : str , optional
3146
+ Edge destination node id header. Default is "dst_id".
3147
+ edgeLabelHeader : str , optional
3148
+ Edge label header. Default is "label".
3149
+ edgeTrainMaskHeader, edgeValidateMaskHeader, edgeTestMaskHeader : str , optional
3150
+ Edge mask headers. Defaults are "train_mask", "val_mask", "test_mask".
3151
+ edgeFeaturesHeader : str , optional
3152
+ Edge features prefix. Feature columns start with "<edgeFeaturesHeader>_". Default is "feat".
3153
+ edgeFeaturesKeys : list , optional
3154
+ If provided, these keys are used to store edge features in the edge dictionary.
3155
+ Length must match number of edge feature columns found. If None, keys will be
3156
+ "<edgeFeaturesHeader>_0", "<edgeFeaturesHeader>_1", ...
3157
+ nodeIDHeader : str , optional
3158
+ Node id header. Default is "node_id".
3159
+ nodeLabelHeader : str , optional
3160
+ Node label header. Default is "label".
3161
+ nodeTrainMaskHeader, nodeValidateMaskHeader, nodeTestMaskHeader : str , optional
3162
+ Node mask headers. Defaults are "train_mask", "val_mask", "test_mask".
3163
+ nodeFeaturesHeader : str , optional
3164
+ Node features prefix. Feature columns start with "<nodeFeaturesHeader>_". Default is "feat".
3165
+ nodeXHeader, nodeYHeader, nodeZHeader : str , optional
3166
+ Node coordinate headers. Defaults are "X", "Y", "Z".
3167
+ If missing, coordinates will be generated deterministically from node_id.
3168
+ nodeFeaturesKeys : list , optional
3169
+ If provided, these keys are used to store node features in the node dictionary.
3170
+ Length must match number of node feature columns found. If None, keys will be
3171
+ "<nodeFeaturesHeader>_0", "<nodeFeaturesHeader>_1", ...
3172
+ tolerance : float , optional
3173
+ Desired tolerance. Default is 0.0001.
3174
+ silent : bool , optional
3175
+ If True, warnings/errors are suppressed. Default is False.
3176
+
3177
+ Returns
3178
+ -------
3179
+ list
3180
+ A list of imported TopologicPy graphs.
3181
+ """
3182
+ from topologicpy.Vertex import Vertex
3183
+ from topologicpy.Edge import Edge
3184
+ from topologicpy.Graph import Graph
3185
+ from topologicpy.Topology import Topology
3186
+ from topologicpy.Dictionary import Dictionary
3187
+
3188
+ import os
3189
+ from os.path import exists, isdir
3190
+ import pandas as pd
3191
+ import numbers
3192
+
3193
+ def _warn(msg):
3194
+ if not silent:
3195
+ print(msg)
3196
+
3197
+ def _feature_columns(df, prefix):
3198
+ # e.g. feat_0, feat_1, ...
3199
+ cols = [c for c in df.columns if isinstance(c, str) and c.startswith(prefix + "_")]
3200
+ def _key(c):
3201
+ parts = c.rsplit("_", 1)
3202
+ if len(parts) == 2 and parts[1].isdigit():
3203
+ return int(parts[1])
3204
+ return 10**9
3205
+ return sorted(cols, key=_key)
3206
+
3207
+ def _mask_to_int(train_mask, val_mask, test_mask):
3208
+ try:
3209
+ t = bool(train_mask)
3210
+ v = bool(val_mask)
3211
+ te = bool(test_mask)
3212
+ except:
3213
+ return 0
3214
+ if [t, v, te] == [True, False, False]:
3215
+ return 0
3216
+ if [t, v, te] == [False, True, False]:
3217
+ return 1
3218
+ if [t, v, te] == [False, False, True]:
3219
+ return 2
3220
+ return 0
3221
+
3222
+ if not exists(path):
3223
+ _warn("Graph.ByCSVPath - Error: the input path parameter does not exist. Returning None.")
3224
+ return None
3225
+ if not isdir(path):
3226
+ _warn("Graph.ByCSVPath - Error: the input path parameter is not a folder. Returning None.")
3227
+ return None
3228
+
3229
+ graphs_csv = os.path.join(path, "graphs.csv")
3230
+ edges_csv = os.path.join(path, "edges.csv")
3231
+ nodes_csv = os.path.join(path, "nodes.csv")
3232
+
3233
+ if not exists(edges_csv):
3234
+ _warn("Graph.ByCSVPath - Error: edges.csv not found. Returning None.")
3235
+ return None
3236
+ if not exists(nodes_csv):
3237
+ _warn("Graph.ByCSVPath - Error: nodes.csv not found. Returning None.")
3238
+ return None
3239
+
3240
+ edges_df = pd.read_csv(edges_csv)
3241
+ nodes_df = pd.read_csv(nodes_csv)
3242
+
3243
+ if exists(graphs_csv):
3244
+ graphs_df = pd.read_csv(graphs_csv)
3245
+ else:
3246
+ _warn("Graph.ByCSVPath - Warning: graphs.csv not found. Assuming a single graph with graph_id=0.")
3247
+ graphs_df = pd.DataFrame([{graphIDHeader: 0, graphLabelHeader: 0}])
3248
+
3249
+ # Feature columns (new format)
3250
+ graph_feat_cols = _feature_columns(graphs_df, graphFeaturesHeader)
3251
+ node_feat_cols = _feature_columns(nodes_df, nodeFeaturesHeader)
3252
+ edge_feat_cols = _feature_columns(edges_df, edgeFeaturesHeader)
3253
+
3254
+ # Feature keys mapping
3255
+ if graphFeaturesKeys is None:
3256
+ graph_feat_keys = graph_feat_cols[:] # store using column names
3257
+ else:
3258
+ graph_feat_keys = list(graphFeaturesKeys)
3259
+ if len(graph_feat_keys) != len(graph_feat_cols):
3260
+ _warn("Graph.ByCSVPath - Error: graphFeaturesKeys length does not match number of graph feature columns.")
3261
+ return None
3262
+
3263
+ if nodeFeaturesKeys is None:
3264
+ node_feat_keys = node_feat_cols[:]
3265
+ else:
3266
+ node_feat_keys = list(nodeFeaturesKeys)
3267
+ if len(node_feat_keys) != len(node_feat_cols):
3268
+ _warn("Graph.ByCSVPath - Error: nodeFeaturesKeys length does not match number of node feature columns.")
3269
+ return None
3270
+
3271
+ if edgeFeaturesKeys is None:
3272
+ edge_feat_keys = edge_feat_cols[:]
3273
+ else:
3274
+ edge_feat_keys = list(edgeFeaturesKeys)
3275
+ if len(edge_feat_keys) != len(edge_feat_cols):
3276
+ _warn("Graph.ByCSVPath - Error: edgeFeaturesKeys length does not match number of edge feature columns.")
3277
+ return None
3278
+
3279
+ # Group by graph_id
3280
+ if graphIDHeader not in nodes_df.columns or graphIDHeader not in edges_df.columns:
3281
+ _warn("Graph.ByCSVPath - Error: graph_id header not found in nodes.csv or edges.csv. Returning None.")
3282
+ return None
3283
+
3284
+ grouped_nodes = nodes_df.groupby(graphIDHeader)
3285
+ grouped_edges = edges_df.groupby(graphIDHeader)
3286
+
3287
+ # Build per-graph vertices and a node_id->vertex mapping
3288
+ vertices_by_gid = {}
3289
+ node_map_by_gid = {}
3290
+
3291
+ for gid, g_nodes in grouped_nodes:
3292
+ # sort by node_id for consistent indexing, but keep mapping by id
3293
+ if nodeIDHeader in g_nodes.columns:
3294
+ g_nodes = g_nodes.sort_values(nodeIDHeader)
3295
+ vertices = []
3296
+ node_id_to_vertex = {}
3297
+
3298
+ for _, row in g_nodes.iterrows():
3299
+ node_id = int(row[nodeIDHeader]) if nodeIDHeader in row else len(vertices)
3300
+
3301
+ # coordinates (optional)
3302
+ x = row[nodeXHeader] if nodeXHeader in g_nodes.columns else None
3303
+ y = row[nodeYHeader] if nodeYHeader in g_nodes.columns else None
3304
+ z = row[nodeZHeader] if nodeZHeader in g_nodes.columns else None
3305
+
3306
+ # fallback deterministic coordinates if missing/non-numeric
3307
+ if not isinstance(x, numbers.Number): x = float(node_id)
3308
+ if not isinstance(y, numbers.Number): y = 0.0
3309
+ if not isinstance(z, numbers.Number): z = 0.0
3310
+
3311
+ v = Vertex.ByCoordinates(float(x), float(y), float(z))
3312
+ if not Topology.IsInstance(v, "Vertex"):
3313
+ _warn("Graph.ByCSVPath - Warning: Failed to create a vertex.")
3314
+ continue
3315
+
3316
+ # label + mask (optional)
3317
+ v_label = row[nodeLabelHeader] if nodeLabelHeader in g_nodes.columns else 0
3318
+ if nodeTrainMaskHeader in g_nodes.columns and nodeValidateMaskHeader in g_nodes.columns and nodeTestMaskHeader in g_nodes.columns:
3319
+ mask = _mask_to_int(row[nodeTrainMaskHeader], row[nodeValidateMaskHeader], row[nodeTestMaskHeader])
3320
+ else:
3321
+ mask = 0
3322
+
3323
+ # features (new format)
3324
+ feat_vals = []
3325
+ for c in node_feat_cols:
3326
+ try:
3327
+ feat_vals.append(float(row[c]))
3328
+ except:
3329
+ feat_vals.append(0.0)
3330
+
3331
+ node_keys = [nodeIDHeader, nodeLabelHeader, "mask"] + node_feat_keys
3332
+ node_vals = [node_id, v_label, mask] + feat_vals
3333
+ d = Dictionary.ByKeysValues(node_keys, node_vals)
3334
+ if Topology.IsInstance(d, "Dictionary"):
3335
+ v = Topology.SetDictionary(v, d)
3336
+
3337
+ vertices.append(v)
3338
+ node_id_to_vertex[node_id] = v
3339
+
3340
+ vertices_by_gid[gid] = vertices
3341
+ node_map_by_gid[gid] = node_id_to_vertex
3342
+
3343
+ # Build per-graph edges
3344
+ edges_by_gid = {}
3345
+
3346
+ for gid, g_edges in grouped_edges:
3347
+ node_id_to_vertex = node_map_by_gid.get(gid, {})
3348
+ edges = []
3349
+
3350
+ for _, row in g_edges.iterrows():
3351
+ try:
3352
+ src_id = int(row[edgeSRCHeader])
3353
+ dst_id = int(row[edgeDSTHeader])
3354
+ except:
3355
+ continue
3356
+
3357
+ if src_id == dst_id:
3358
+ continue
3359
+
3360
+ v_src = node_id_to_vertex.get(src_id, None)
3361
+ v_dst = node_id_to_vertex.get(dst_id, None)
3362
+ if v_src is None or v_dst is None:
3363
+ continue
3364
+
3365
+ try:
3366
+ e = Edge.ByVertices([v_src, v_dst], tolerance=tolerance)
3367
+ except:
3368
+ e = None
3369
+
3370
+ if not Topology.IsInstance(e, "Edge"):
3371
+ continue
3372
+
3373
+ e_label = row[edgeLabelHeader] if edgeLabelHeader in g_edges.columns else 0
3374
+
3375
+ if edgeTrainMaskHeader in g_edges.columns and edgeValidateMaskHeader in g_edges.columns and edgeTestMaskHeader in g_edges.columns:
3376
+ mask = _mask_to_int(row[edgeTrainMaskHeader], row[edgeValidateMaskHeader], row[edgeTestMaskHeader])
3377
+ else:
3378
+ mask = 0
3379
+
3380
+ feat_vals = []
3381
+ for c in edge_feat_cols:
3382
+ try:
3383
+ feat_vals.append(float(row[c]))
3384
+ except:
3385
+ feat_vals.append(0.0)
3386
+
3387
+ edge_keys = [edgeSRCHeader, edgeDSTHeader, edgeLabelHeader, "mask"] + edge_feat_keys
3388
+ edge_vals = [src_id, dst_id, e_label, mask] + feat_vals
3389
+ d = Dictionary.ByKeysValues(edge_keys, edge_vals)
3390
+ if Topology.IsInstance(d, "Dictionary"):
3391
+ e = Topology.SetDictionary(e, d)
3392
+
3393
+ edges.append(e)
3394
+
3395
+ edges_by_gid[gid] = edges
3396
+
3397
+ # Build graphs and embed graph label + features in graph dictionary
3398
+ graphs = []
3399
+
3400
+ # iterate graphs_df rows (authoritative list of graph_ids)
3401
+ if graphIDHeader not in graphs_df.columns:
3402
+ _warn("Graph.ByCSVPath - Error: graph_id header not found in graphs.csv. Returning None.")
3403
+ return None
3404
+
3405
+ for _, grow in graphs_df.iterrows():
3406
+ gid = int(grow[graphIDHeader])
3407
+
3408
+ verts = vertices_by_gid.get(gid, [])
3409
+ eds = edges_by_gid.get(gid, [])
3410
+
3411
+ if len(verts) < 1:
3412
+ _warn(f"Graph.ByCSVPath - Warning: Graph id {gid} has no vertices. Skipping.")
3413
+ continue
3414
+
3415
+ g = Graph.ByVerticesEdges(verts, eds)
3416
+ if not Topology.IsInstance(g, "Graph"):
3417
+ _warn(f"Graph.ByCSVPath - Warning: Failed to create graph id {gid}. Skipping.")
3418
+ continue
3419
+
3420
+ # graph label
3421
+ g_label = grow[graphLabelHeader] if graphLabelHeader in graphs_df.columns else 0
3422
+
3423
+ # graph features (new format)
3424
+ g_feat_vals = []
3425
+ for c in graph_feat_cols:
3426
+ try:
3427
+ g_feat_vals.append(float(grow[c]))
3428
+ except:
3429
+ g_feat_vals.append(0.0)
3430
+
3431
+ graph_keys = [graphIDHeader, graphLabelHeader] + graph_feat_keys
3432
+ graph_vals = [gid, g_label] + g_feat_vals
3433
+
3434
+ d = Dictionary.ByKeysValues(graph_keys, graph_vals)
3435
+ if Topology.IsInstance(d, "Dictionary"):
3436
+ try:
3437
+ g = Graph.SetDictionary(g, d)
3438
+ except:
3439
+ # fallback if Graph.SetDictionary not available in your build
3440
+ g = Topology.SetDictionary(g, d)
3441
+
3442
+ graphs.append(g)
3443
+
3444
+ return graphs
3445
+
3446
+ @staticmethod
3447
+ def ByCSVPath_old(path,
3107
3448
  graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat", graphFeaturesKeys=[],
3108
3449
  edgeSRCHeader="src_id", edgeDSTHeader="dst_id", edgeLabelHeader="label", edgeTrainMaskHeader="train_mask",
3109
3450
  edgeValidateMaskHeader="val_mask", edgeTestMaskHeader="test_mask", edgeFeaturesHeader="feat", edgeFeaturesKeys=[],
@@ -9828,7 +10169,207 @@ class Graph:
9828
10169
  return status
9829
10170
 
9830
10171
  @staticmethod
9831
- def ExportToCSV(graph, path, graphLabel, graphFeatures="",
10172
+ def ExportToCSV(graph,
10173
+ path,
10174
+
10175
+ graphLabelKey="label",
10176
+ defaultGraphLabel=0,
10177
+ graphFeaturesKeys=None,
10178
+ graphIDHeader="graph_id",
10179
+ graphLabelHeader="label",
10180
+ graphFeaturesHeader="feat",
10181
+
10182
+ edgeLabelKey="label",
10183
+ defaultEdgeLabel=0,
10184
+ edgeFeaturesKeys=None,
10185
+ edgeSRCHeader="src_id",
10186
+ edgeDSTHeader="dst_id",
10187
+ edgeLabelHeader="label",
10188
+ edgeFeaturesHeader="feat",
10189
+ edgeTrainMaskHeader="train_mask",
10190
+ edgeValidateMaskHeader="val_mask",
10191
+ edgeTestMaskHeader="test_mask",
10192
+ edgeMaskKey="mask",
10193
+ edgeTrainRatio=0.8,
10194
+ edgeValidateRatio=0.1,
10195
+ edgeTestRatio=0.1,
10196
+ bidirectional=True,
10197
+
10198
+ nodeLabelKey="label",
10199
+ defaultNodeLabel=0,
10200
+ nodeFeaturesKeys=None,
10201
+ nodeIDHeader="node_id",
10202
+ nodeLabelHeader="label",
10203
+ nodeFeaturesHeader="feat",
10204
+ nodeTrainMaskHeader="train_mask",
10205
+ nodeValidateMaskHeader="val_mask",
10206
+ nodeTestMaskHeader="test_mask",
10207
+ nodeMaskKey="mask",
10208
+ nodeTrainRatio=0.8,
10209
+ nodeValidateRatio=0.1,
10210
+ nodeTestRatio=0.1,
10211
+
10212
+ mantissa=6,
10213
+ tolerance=0.0001,
10214
+ overwrite=False,
10215
+ silent=False):
10216
+ """
10217
+ Exports the input graph into a set of CSV files compatible with DGL.
10218
+
10219
+ Parameters
10220
+ ----------
10221
+ graph : topologic_core.Graph or list of graphs.
10222
+ The input graph or graphs
10223
+ path : str
10224
+ The desired path to the output folder where the graphs, edges, and nodes CSV files will be saved.
10225
+ graphLabelKey : str , optional
10226
+ The graph label dictionary key saved in each graph. Default is "label".
10227
+ defaultGraphLabel : int , optional
10228
+ The default graph label to use if no graph label is found. Default is 0.
10229
+ graphFeaturesKeys : list , optional
10230
+ The list of feature dictionary keys saved in the dicitonaries of graphs. Default is None.
10231
+ graphIDHeader : str , optional
10232
+ The desired graph ID column header. Default is "graph_id".
10233
+ graphLabelHeader : str , optional
10234
+ The desired graph label column header. Default is "label".
10235
+ graphFeaturesHeader : str , optional
10236
+ The desired graph features column header. Default is "feat".
10237
+ edgeLabelKey : str , optional
10238
+ The edge label dictionary key saved in each graph edge. Default is "label".
10239
+ defaultEdgeLabel : int , optional
10240
+ The default edge label to use if no edge label is found. Default is 0.
10241
+ edgeLabelHeader : str , optional
10242
+ The desired edge label column header. Default is "label".
10243
+ edgeSRCHeader : str , optional
10244
+ The desired edge source column header. Default is "src_id".
10245
+ edgeDSTHeader : str , optional
10246
+ The desired edge destination column header. Default is "dst_id".
10247
+ edgeFeaturesHeader : str , optional
10248
+ The desired edge features column header. Default is "feat".
10249
+ edgeFeaturesKeys : list , optional
10250
+ The list of feature dictionary keys saved in the dicitonaries of edges. Default is None.
10251
+ edgeTrainMaskHeader : str , optional
10252
+ The desired edge train mask column header. Default is "train_mask".
10253
+ edgeValidateMaskHeader : str , optional
10254
+ The desired edge validate mask column header. Default is "val_mask".
10255
+ edgeTestMaskHeader : str , optional
10256
+ The desired edge test mask column header. Default is "test_mask".
10257
+ edgeMaskKey : str , optional
10258
+ The dictionary key where the edge train, validate, test category is to be found. The value should be 0 for train
10259
+ 1 for validate, and 2 for test. If no key is found, the ratio of train/validate/test will be used. Default is "mask".
10260
+ edgeTrainRatio : float , optional
10261
+ The desired ratio of the edge data to use for training. The number must be between 0 and 1. Default is 0.8 which means 80% of the data will be used for training.
10262
+ This value is ignored if an edgeMaskKey is foud.
10263
+ edgeValidateRatio : float , optional
10264
+ The desired ratio of the edge data to use for validation. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for validation.
10265
+ This value is ignored if an edgeMaskKey is foud.
10266
+ edgeTestRatio : float , optional
10267
+ The desired ratio of the edge data to use for testing. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for testing.
10268
+ This value is ignored if an edgeMaskKey is foud.
10269
+ bidirectional : bool , optional
10270
+ If set to True, a reversed edge will also be saved for each edge in the graph. Otherwise, it will not. Default is True.
10271
+ nodeFeaturesKeys : list , optional
10272
+ The list of features keys saved in the dicitonaries of nodes. Default is None.
10273
+ nodeLabelKey : str , optional
10274
+ The node label dictionary key saved in each graph vertex. Default is "label".
10275
+ defaultNodeLabel : int , optional
10276
+ The default node label to use if no node label is found. Default is 0.
10277
+ nodeIDHeader : str , optional
10278
+ The desired node ID column header. Default is "node_id".
10279
+ nodeLabelHeader : str , optional
10280
+ The desired node label column header. Default is "label".
10281
+ nodeFeaturesHeader : str , optional
10282
+ The desired node features column header. Default is "feat".
10283
+ nodeTrainMaskHeader : str , optional
10284
+ The desired node train mask column header. Default is "train_mask".
10285
+ nodeValidateMaskHeader : str , optional
10286
+ The desired node validate mask column header. Default is "val_mask".
10287
+ nodeTestMaskHeader : str , optional
10288
+ The desired node test mask column header. Default is "test_mask".
10289
+ nodeMaskKey : str , optional
10290
+ The dictionary key where the node train, validate, test category is to be found. The value should be 0 for train
10291
+ 1 for validate, and 2 for test. If no key is found, the ratio of train/validate/test will be used. Default is "mask".
10292
+ nodeTrainRatio : float , optional
10293
+ The desired ratio of the node data to use for training. The number must be between 0 and 1. Default is 0.8 which means 80% of the data will be used for training.
10294
+ This value is ignored if an nodeMaskKey is found.
10295
+ nodeValidateRatio : float , optional
10296
+ The desired ratio of the node data to use for validation. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for validation.
10297
+ This value is ignored if an nodeMaskKey is found.
10298
+ nodeTestRatio : float , optional
10299
+ The desired ratio of the node data to use for testing. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for testing.
10300
+ This value is ignored if an nodeMaskKey is found.
10301
+ mantissa : int , optional
10302
+ The number of decimal places to round the result to. Default is 6.
10303
+ tolerance : float , optional
10304
+ The desired tolerance. Default is 0.0001.
10305
+ overwrite : bool , optional
10306
+ If set to True, any existing files are overwritten. Otherwise, the input list of graphs is appended to the end of each file. Default is False.
10307
+ silent : bool , optional
10308
+ If set to True no warnings or errors are printed. Default is False.
10309
+ Returns
10310
+ -------
10311
+ bool
10312
+ True if the graph has been successfully exported. False otherwise.
10313
+
10314
+ """
10315
+ from topologicpy.Topology import Topology
10316
+
10317
+ if not isinstance(graph, list):
10318
+ graph = [graph]
10319
+
10320
+ graph = [g for g in graph if Topology.IsInstance(g, "graph")]
10321
+
10322
+ if len(graph) < 1:
10323
+ if not silent:
10324
+ print("Graph.ExportToCSV - Error: The input graph parameter does not contain any valid graphs. Returning None")
10325
+ return None
10326
+
10327
+ return Graph.ExportGraphsToCSV( graphs = graph,
10328
+ path = path,
10329
+ graphLabelKey = graphLabelKey,
10330
+ defaultGraphLabel = defaultGraphLabel,
10331
+ graphFeaturesKeys = graphFeaturesKeys,
10332
+ graphIDHeader = graphIDHeader,
10333
+ graphLabelHeader = graphLabelHeader,
10334
+ graphFeaturesHeader = graphFeaturesHeader,
10335
+
10336
+ edgeLabelKey = edgeLabelKey,
10337
+ defaultEdgeLabel = defaultEdgeLabel,
10338
+ edgeFeaturesKeys = edgeFeaturesKeys,
10339
+ edgeSRCHeader = edgeSRCHeader,
10340
+ edgeDSTHeader = edgeDSTHeader,
10341
+ edgeLabelHeader = edgeLabelHeader,
10342
+ edgeFeaturesHeader = edgeFeaturesHeader,
10343
+ edgeTrainMaskHeader = edgeTrainMaskHeader,
10344
+ edgeValidateMaskHeader = edgeValidateMaskHeader,
10345
+ edgeTestMaskHeader = edgeTestMaskHeader,
10346
+ edgeMaskKey = edgeMaskKey,
10347
+ edgeTrainRatio = edgeTrainRatio,
10348
+ edgeValidateRatio = edgeValidateRatio,
10349
+ edgeTestRatio = edgeTestRatio,
10350
+ bidirectional = bidirectional,
10351
+
10352
+ nodeLabelKey = nodeLabelKey,
10353
+ defaultNodeLabel = defaultNodeLabel,
10354
+ nodeFeaturesKeys = nodeFeaturesKeys,
10355
+ nodeIDHeader = nodeIDHeader,
10356
+ nodeLabelHeader = nodeLabelHeader,
10357
+ nodeFeaturesHeader = nodeFeaturesHeader,
10358
+ nodeTrainMaskHeader = nodeTrainMaskHeader,
10359
+ nodeValidateMaskHeader = nodeValidateMaskHeader,
10360
+ nodeTestMaskHeader = nodeTestMaskHeader,
10361
+ nodeMaskKey = nodeMaskKey,
10362
+ nodeTrainRatio = nodeTrainRatio,
10363
+ nodeValidateRatio = nodeValidateRatio,
10364
+ nodeTestRatio = nodeTestRatio,
10365
+
10366
+ mantissa = mantissa,
10367
+ tolerance = tolerance,
10368
+ overwrite = overwrite,
10369
+ silent = silent)
10370
+
10371
+ @staticmethod
10372
+ def ExportGraphToCSV(graph, path, graphLabel, graphFeatures="",
9832
10373
  graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat",
9833
10374
 
9834
10375
  edgeLabelKey="label", defaultEdgeLabel=0, edgeFeaturesKeys=[],
@@ -10182,7 +10723,7 @@ class Graph:
10182
10723
  edge_data.append(single_edge_data)
10183
10724
 
10184
10725
  if bidirectional == True:
10185
- single_edge_data = [graph_id, src, dst, edge_label, train_mask, validate_mask, test_mask, edge_features]
10726
+ single_edge_data = [graph_id, dst, src, edge_label, train_mask, validate_mask, test_mask, edge_features]
10186
10727
  edge_data.append(single_edge_data)
10187
10728
  df = pd.DataFrame(edge_data, columns=edge_columns)
10188
10729
 
@@ -10199,7 +10740,349 @@ class Graph:
10199
10740
  yaml_file.write('dataset_name: topologic_dataset\nedge_data:\n- file_name: edges.csv\nnode_data:\n- file_name: nodes.csv')
10200
10741
  yaml_file.close()
10201
10742
  return True
10202
-
10743
+
10744
+
10745
+
10746
+ @staticmethod
10747
+ def ExportGraphsToCSV(graphs,
10748
+ path,
10749
+ graphLabelKey="label",
10750
+ defaultGraphLabel=0,
10751
+ graphFeaturesKeys=None,
10752
+ graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat",
10753
+
10754
+ edgeLabelKey="label", defaultEdgeLabel=0, edgeFeaturesKeys=None,
10755
+ edgeSRCHeader="src_id", edgeDSTHeader="dst_id",
10756
+ edgeLabelHeader="label", edgeFeaturesHeader="feat",
10757
+ edgeTrainMaskHeader="train_mask", edgeValidateMaskHeader="val_mask", edgeTestMaskHeader="test_mask",
10758
+ edgeMaskKey="mask",
10759
+ edgeTrainRatio=0.8, edgeValidateRatio=0.1, edgeTestRatio=0.1,
10760
+ bidirectional=True,
10761
+
10762
+ nodeLabelKey="label", defaultNodeLabel=0, nodeFeaturesKeys=None,
10763
+ nodeIDHeader="node_id", nodeLabelHeader="label", nodeFeaturesHeader="feat",
10764
+ nodeTrainMaskHeader="train_mask", nodeValidateMaskHeader="val_mask", nodeTestMaskHeader="test_mask",
10765
+ nodeMaskKey="mask",
10766
+ nodeTrainRatio=0.8, nodeValidateRatio=0.1, nodeTestRatio=0.1,
10767
+ mantissa=6, tolerance=0.0001, overwrite=False, silent=False):
10768
+ """
10769
+ Batch-export a list of TopologicPy graphs to CSV files (graphs.csv, nodes.csv, edges.csv)
10770
+ in a format suitable for graph ML pipelines.
10771
+
10772
+ Modifications vs. ExportToCSV
10773
+ -----------------------------
10774
+ 1. Graph labels are retrieved from the graph dictionary using `graphLabelKey`
10775
+ (fallback to `defaultGraphLabel`).
10776
+ 2. Graph features are retrieved from the graph dictionary using `graphFeaturesKeys`.
10777
+ 3. Graph/edge/node features are written as separate numeric columns:
10778
+ - Graph: graphFeaturesHeader_0 ... graphFeaturesHeader_{k-1}
10779
+ - Node : nodeFeaturesHeader_0 ... nodeFeaturesHeader_{k-1}
10780
+ - Edge : edgeFeaturesHeader_0 ... edgeFeaturesHeader_{k-1}
10781
+
10782
+ Returns
10783
+ -------
10784
+ bool
10785
+ True if export succeeded, False otherwise.
10786
+ """
10787
+
10788
+ from topologicpy.Graph import Graph
10789
+ from topologicpy.Vertex import Vertex
10790
+ from topologicpy.Edge import Edge
10791
+ from topologicpy.Helper import Helper
10792
+ from topologicpy.Dictionary import Dictionary
10793
+ from topologicpy.Topology import Topology
10794
+
10795
+ import os
10796
+ import csv
10797
+ import math
10798
+ import random
10799
+
10800
+ # ----------------------------
10801
+ # Helpers
10802
+ # ----------------------------
10803
+ def _err(msg):
10804
+ if not silent:
10805
+ print(msg)
10806
+ return None
10807
+
10808
+ def _ensure_dir(p):
10809
+ if not os.path.exists(p):
10810
+ try:
10811
+ os.makedirs(p)
10812
+ except Exception:
10813
+ return False
10814
+ return True
10815
+
10816
+ def _last_graph_id_from_csv(csv_path):
10817
+ """
10818
+ Return last integer in first column of last non-empty row.
10819
+ Reads only the tail of the file (fast).
10820
+ """
10821
+ if not os.path.exists(csv_path):
10822
+ return -1
10823
+ try:
10824
+ with open(csv_path, "rb") as f:
10825
+ f.seek(0, os.SEEK_END)
10826
+ size = f.tell()
10827
+ if size == 0:
10828
+ return -1
10829
+ chunk = 4096
10830
+ data = b""
10831
+ pos = size
10832
+ while pos > 0:
10833
+ step = chunk if pos >= chunk else pos
10834
+ pos -= step
10835
+ f.seek(pos, os.SEEK_SET)
10836
+ data = f.read(step) + data
10837
+ lines = data.splitlines()
10838
+ if len(lines) >= 2:
10839
+ break
10840
+ for line in reversed(lines):
10841
+ if line.strip():
10842
+ s = line.decode("utf-8", errors="ignore")
10843
+ first = s.split(",", 1)[0].strip()
10844
+ try:
10845
+ return int(first)
10846
+ except Exception:
10847
+ return -1
10848
+ return -1
10849
+ except Exception:
10850
+ return -1
10851
+
10852
+ def _quant_key_xyz(v):
10853
+ return (round(float(Vertex.X(v, mantissa=mantissa)), mantissa),
10854
+ round(float(Vertex.Y(v, mantissa=mantissa)), mantissa),
10855
+ round(float(Vertex.Z(v, mantissa=mantissa)), mantissa))
10856
+
10857
+ def _mask_from_dict_or_ratio(d, mask_key, train_max, val_max, counts):
10858
+ """
10859
+ counts: dict with keys train, val, test (mutated).
10860
+ """
10861
+ if mask_key is not None and d is not None:
10862
+ try:
10863
+ keys = Dictionary.Keys(d)
10864
+ except Exception:
10865
+ keys = []
10866
+ if mask_key in keys:
10867
+ mv = Dictionary.ValueAtKey(d, mask_key)
10868
+ if mv in [0, 1, 2]:
10869
+ if mv == 0:
10870
+ counts["train"] += 1
10871
+ return True, False, False
10872
+ if mv == 1:
10873
+ counts["val"] += 1
10874
+ return False, True, False
10875
+ counts["test"] += 1
10876
+ return False, False, True
10877
+
10878
+ if counts["train"] < train_max:
10879
+ counts["train"] += 1
10880
+ return True, False, False
10881
+ if counts["val"] < val_max:
10882
+ counts["val"] += 1
10883
+ return False, True, False
10884
+ counts["test"] += 1
10885
+ return False, False, True
10886
+
10887
+ def _feat_list_from_keys(d, keys_flat):
10888
+ """
10889
+ Returns list[float] with length = len(keys_flat).
10890
+ Missing/invalid values -> 0.0 (keeps vector length stable).
10891
+ """
10892
+ if not keys_flat:
10893
+ return []
10894
+ out = []
10895
+ for k in keys_flat:
10896
+ try:
10897
+ val = Dictionary.ValueAtKey(d, k) if d is not None else None
10898
+ if val is None:
10899
+ out.append(0.0)
10900
+ else:
10901
+ out.append(round(float(val), mantissa))
10902
+ except Exception:
10903
+ out.append(0.0)
10904
+ return out
10905
+
10906
+ def _label_from_dict(d, key, default_val):
10907
+ try:
10908
+ if d is None:
10909
+ return default_val
10910
+ val = Dictionary.ValueAtKey(d, key)
10911
+ return default_val if val is None else val
10912
+ except Exception:
10913
+ return default_val
10914
+
10915
+ def _feature_headers(prefix, k):
10916
+ return [f"{prefix}_{i}" for i in range(int(k))]
10917
+
10918
+ # ----------------------------
10919
+ # Validate inputs
10920
+ # ----------------------------
10921
+ if graphs is None or (not isinstance(graphs, list)) or len(graphs) == 0:
10922
+ return _err("Graph.ExportGraphsToCSV - Error: 'graphs' must be a non-empty list. Returning None.")
10923
+
10924
+ if abs(nodeTrainRatio + nodeValidateRatio + nodeTestRatio - 1) > 0.001:
10925
+ return _err("Graph.ExportGraphsToCSV - Error: node train/val/test ratios must add up to 1. Returning None.")
10926
+ if abs(edgeTrainRatio + edgeValidateRatio + edgeTestRatio - 1) > 0.001:
10927
+ return _err("Graph.ExportGraphsToCSV - Error: edge train/val/test ratios must add up to 1. Returning None.")
10928
+
10929
+ if not _ensure_dir(path):
10930
+ return _err("Graph.ExportGraphsToCSV - Error: Could not create output folder. Returning None.")
10931
+
10932
+ graphs_csv = os.path.join(path, "graphs.csv")
10933
+ nodes_csv = os.path.join(path, "nodes.csv")
10934
+ edges_csv = os.path.join(path, "edges.csv")
10935
+
10936
+ if overwrite is False:
10937
+ if not os.path.exists(graphs_csv):
10938
+ return _err("Graph.ExportGraphsToCSV - Error: overwrite=False but graphs.csv not found. Returning None.")
10939
+ if not os.path.exists(nodes_csv):
10940
+ return _err("Graph.ExportGraphsToCSV - Error: overwrite=False but nodes.csv not found. Returning None.")
10941
+ if not os.path.exists(edges_csv):
10942
+ return _err("Graph.ExportGraphsToCSV - Error: overwrite=False but edges.csv not found. Returning None.")
10943
+
10944
+ # Determine starting graph_id once
10945
+ start_graph_id = 0 if overwrite else (_last_graph_id_from_csv(graphs_csv) + 1)
10946
+
10947
+ # Flatten feature keys once
10948
+ graphFeaturesKeys = [] if graphFeaturesKeys is None else graphFeaturesKeys
10949
+ nodeFeaturesKeys = [] if nodeFeaturesKeys is None else nodeFeaturesKeys
10950
+ edgeFeaturesKeys = [] if edgeFeaturesKeys is None else edgeFeaturesKeys
10951
+
10952
+ graph_feature_keys = Helper.Flatten(graphFeaturesKeys)
10953
+ node_feature_keys = Helper.Flatten(nodeFeaturesKeys)
10954
+ edge_feature_keys = Helper.Flatten(edgeFeaturesKeys)
10955
+
10956
+ # Build feature headers
10957
+ graph_feat_headers = _feature_headers(graphFeaturesHeader, len(graph_feature_keys))
10958
+ node_feat_headers = _feature_headers(nodeFeaturesHeader, len(node_feature_keys))
10959
+ edge_feat_headers = _feature_headers(edgeFeaturesHeader, len(edge_feature_keys))
10960
+
10961
+ # CSV modes
10962
+ graphs_mode = "w" if overwrite else "a"
10963
+ nodes_mode = "w" if overwrite else "a"
10964
+ edges_mode = "w" if overwrite else "a"
10965
+
10966
+ # Column headers
10967
+ graphs_header = [graphIDHeader, graphLabelHeader] + graph_feat_headers
10968
+ nodes_header = [graphIDHeader, nodeIDHeader, nodeLabelHeader,
10969
+ nodeTrainMaskHeader, nodeValidateMaskHeader, nodeTestMaskHeader] + node_feat_headers + ["X", "Y", "Z"]
10970
+ edges_header = [graphIDHeader, edgeSRCHeader, edgeDSTHeader, edgeLabelHeader,
10971
+ edgeTrainMaskHeader, edgeValidateMaskHeader, edgeTestMaskHeader] + edge_feat_headers
10972
+
10973
+ try:
10974
+ with open(graphs_csv, graphs_mode, newline="", encoding="utf-8") as f_g, \
10975
+ open(nodes_csv, nodes_mode, newline="", encoding="utf-8") as f_n, \
10976
+ open(edges_csv, edges_mode, newline="", encoding="utf-8") as f_e:
10977
+
10978
+ w_g = csv.writer(f_g)
10979
+ w_n = csv.writer(f_n)
10980
+ w_e = csv.writer(f_e)
10981
+
10982
+ # write headers if overwriting
10983
+ if overwrite:
10984
+ w_g.writerow(graphs_header)
10985
+ w_n.writerow(nodes_header)
10986
+ w_e.writerow(edges_header)
10987
+
10988
+ # ----------------------------
10989
+ # Main loop: per graph
10990
+ # ----------------------------
10991
+ for idx, graph in enumerate(graphs):
10992
+ if not Topology.IsInstance(graph, "Graph"):
10993
+ return _err(f"Graph.ExportGraphsToCSV - Error: item {idx} is not a valid topologic graph. Returning None.")
10994
+
10995
+ graph_id = start_graph_id + idx
10996
+
10997
+ # ---- Graph label/features from graph dictionary
10998
+ gd = Topology.Dictionary(graph)
10999
+ g_label = _label_from_dict(gd, graphLabelKey, defaultGraphLabel)
11000
+ g_feat = _feat_list_from_keys(gd, graph_feature_keys)
11001
+
11002
+ w_g.writerow([graph_id, g_label] + g_feat)
11003
+
11004
+ # ---- Nodes
11005
+ vertices = Graph.Vertices(graph)
11006
+ if vertices is None or len(vertices) < 3:
11007
+ return _err(f"Graph.ExportGraphsToCSV - Error: graph {graph_id} is too small (<3 vertices). Returning None.")
11008
+
11009
+ vertices = random.sample(vertices, len(vertices))
11010
+
11011
+ n = len(vertices)
11012
+ node_train_max = max(1, math.floor(n * float(nodeTrainRatio)))
11013
+ node_val_max = max(1, math.floor(n * float(nodeValidateRatio)))
11014
+ node_counts = {"train": 0, "val": 0, "test": 0}
11015
+
11016
+ # Vertex->index map for O(1) edge endpoint lookup
11017
+ v_index = { _quant_key_xyz(v): i for i, v in enumerate(vertices) }
11018
+
11019
+ for i, v in enumerate(vertices):
11020
+ nd = Topology.Dictionary(v)
11021
+
11022
+ v_label = _label_from_dict(nd, nodeLabelKey, defaultNodeLabel)
11023
+ tmask, vmask, smask = _mask_from_dict_or_ratio(nd, nodeMaskKey, node_train_max, node_val_max, node_counts)
11024
+ v_feat = _feat_list_from_keys(nd, node_feature_keys)
11025
+
11026
+ x = float(Vertex.X(v, mantissa=mantissa))
11027
+ y = float(Vertex.Y(v, mantissa=mantissa))
11028
+ z = float(Vertex.Z(v, mantissa=mantissa))
11029
+
11030
+ w_n.writerow([graph_id, i, v_label, tmask, vmask, smask] + v_feat + [x, y, z])
11031
+
11032
+ # ---- Edges
11033
+ edges = Graph.Edges(graph) or []
11034
+ m = len(edges)
11035
+ edge_train_max = math.floor(m * float(edgeTrainRatio))
11036
+ edge_val_max = math.floor(m * float(edgeValidateRatio))
11037
+ edge_counts = {"train": 0, "val": 0, "test": 0}
11038
+
11039
+ for e in edges:
11040
+ ed = Topology.Dictionary(e)
11041
+
11042
+ e_label = _label_from_dict(ed, edgeLabelKey, defaultEdgeLabel)
11043
+ tmask, vmask, smask = _mask_from_dict_or_ratio(ed, edgeMaskKey, edge_train_max, edge_val_max, edge_counts)
11044
+ e_feat = _feat_list_from_keys(ed, edge_feature_keys)
11045
+
11046
+ sv = Edge.StartVertex(e)
11047
+ tv = Edge.EndVertex(e)
11048
+
11049
+ src = v_index.get(_quant_key_xyz(sv), None)
11050
+ dst = v_index.get(_quant_key_xyz(tv), None)
11051
+
11052
+ # rare fallback
11053
+ if src is None or dst is None:
11054
+ try:
11055
+ src = Vertex.Index(sv, vertices, tolerance=tolerance)
11056
+ dst = Vertex.Index(tv, vertices, tolerance=tolerance)
11057
+ except Exception:
11058
+ src = None
11059
+ dst = None
11060
+
11061
+ if src is None or dst is None:
11062
+ continue
11063
+
11064
+ # forward
11065
+ w_e.writerow([graph_id, src, dst, e_label, tmask, vmask, smask] + e_feat)
11066
+
11067
+ if bidirectional:
11068
+ # reverse (correct)
11069
+ w_e.writerow([graph_id, dst, src, e_label, tmask, vmask, smask] + e_feat)
11070
+
11071
+ # meta.yaml
11072
+ with open(os.path.join(path, "meta.yaml"), "w", encoding="utf-8") as yaml_file:
11073
+ yaml_file.write(
11074
+ "dataset_name: topologic_dataset\n"
11075
+ "edge_data:\n- file_name: edges.csv\n"
11076
+ "node_data:\n- file_name: nodes.csv\n"
11077
+ "graph_data:\n file_name: graphs.csv\n"
11078
+ )
11079
+
11080
+ return True
11081
+
11082
+ except Exception as ex:
11083
+ return _err(f"Graph.ExportGraphsToCSV - Error: {ex}. Returning None.")
11084
+
11085
+
10203
11086
  @staticmethod
10204
11087
  def ExportToGEXF(graph, path: str = None, graphWidth: float = 20, graphLength: float = 20, graphHeight: float = 20,
10205
11088
  defaultVertexColor: str = "black", defaultVertexSize: float = 3,