topologicpy 0.8.98__py3-none-any.whl → 0.8.99__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- topologicpy/ANN.py +1 -1
- topologicpy/Aperture.py +1 -1
- topologicpy/BVH.py +1 -1
- topologicpy/CSG.py +1 -1
- topologicpy/Cell.py +1 -1
- topologicpy/CellComplex.py +1 -1
- topologicpy/Cluster.py +1 -1
- topologicpy/Color.py +1 -1
- topologicpy/Context.py +1 -1
- topologicpy/DGL.py +1 -1
- topologicpy/Dictionary.py +92 -1
- topologicpy/Edge.py +1 -1
- topologicpy/EnergyModel.py +1 -1
- topologicpy/Face.py +1 -1
- topologicpy/Graph.py +887 -4
- topologicpy/Grid.py +1 -1
- topologicpy/Helper.py +1 -1
- topologicpy/Honeybee.py +1 -1
- topologicpy/Matrix.py +1 -1
- topologicpy/Neo4j.py +1 -1
- topologicpy/Plotly.py +1 -1
- topologicpy/Polyskel.py +1 -1
- topologicpy/PyG.py +1287 -2308
- topologicpy/ShapeGrammar.py +1 -1
- topologicpy/Shell.py +1 -1
- topologicpy/Speckle.py +1 -1
- topologicpy/Sun.py +1 -1
- topologicpy/Topology.py +1 -1
- topologicpy/Vector.py +1 -1
- topologicpy/Vertex.py +1 -1
- topologicpy/Wire.py +1 -1
- topologicpy/__init__.py +1 -1
- topologicpy/version.py +1 -1
- {topologicpy-0.8.98.dist-info → topologicpy-0.8.99.dist-info}/METADATA +1 -1
- topologicpy-0.8.99.dist-info/RECORD +39 -0
- topologicpy-0.8.98.dist-info/RECORD +0 -39
- {topologicpy-0.8.98.dist-info → topologicpy-0.8.99.dist-info}/WHEEL +0 -0
- {topologicpy-0.8.98.dist-info → topologicpy-0.8.99.dist-info}/licenses/LICENSE +0 -0
- {topologicpy-0.8.98.dist-info → topologicpy-0.8.99.dist-info}/top_level.txt +0 -0
topologicpy/Graph.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C)
|
|
1
|
+
# Copyright (C) 2026
|
|
2
2
|
# Wassim Jabi <wassim.jabi@gmail.com>
|
|
3
3
|
#
|
|
4
4
|
# This program is free software: you can redistribute it and/or modify it under
|
|
@@ -3102,8 +3102,349 @@ class Graph:
|
|
|
3102
3102
|
zMax = zMax,
|
|
3103
3103
|
tolerance = tolerance
|
|
3104
3104
|
)
|
|
3105
|
+
|
|
3105
3106
|
@staticmethod
|
|
3106
3107
|
def ByCSVPath(path,
|
|
3108
|
+
graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat", graphFeaturesKeys=None,
|
|
3109
|
+
edgeSRCHeader="src_id", edgeDSTHeader="dst_id", edgeLabelHeader="label",
|
|
3110
|
+
edgeTrainMaskHeader="train_mask", edgeValidateMaskHeader="val_mask", edgeTestMaskHeader="test_mask",
|
|
3111
|
+
edgeFeaturesHeader="feat", edgeFeaturesKeys=None,
|
|
3112
|
+
nodeIDHeader="node_id", nodeLabelHeader="label",
|
|
3113
|
+
nodeTrainMaskHeader="train_mask", nodeValidateMaskHeader="val_mask", nodeTestMaskHeader="test_mask",
|
|
3114
|
+
nodeFeaturesHeader="feat", nodeXHeader="X", nodeYHeader="Y", nodeZHeader="Z",
|
|
3115
|
+
nodeFeaturesKeys=None,
|
|
3116
|
+
tolerance=0.0001, silent=False):
|
|
3117
|
+
"""
|
|
3118
|
+
Imports TopologicPy graphs from a folder containing CSV files (graphs.csv, nodes.csv, edges.csv)
|
|
3119
|
+
exported using the *new* TopologicPy CSV format for PyTorch Geometric / DGL-style datasets.
|
|
3120
|
+
|
|
3121
|
+
New format changes handled
|
|
3122
|
+
--------------------------
|
|
3123
|
+
- Graph, node, and edge features are stored in *separate numeric columns*:
|
|
3124
|
+
<featuresHeader>_0, <featuresHeader>_1, <featuresHeader>_2, ...
|
|
3125
|
+
(instead of a single comma-separated string column).
|
|
3126
|
+
|
|
3127
|
+
- Graph label and graph features are embedded in the returned graph's dictionary.
|
|
3128
|
+
|
|
3129
|
+
Parameters
|
|
3130
|
+
----------
|
|
3131
|
+
path : str
|
|
3132
|
+
The path to the folder containing graphs.csv, nodes.csv, edges.csv.
|
|
3133
|
+
graphIDHeader : str , optional
|
|
3134
|
+
The graph id column header. Default is "graph_id".
|
|
3135
|
+
graphLabelHeader : str , optional
|
|
3136
|
+
The graph label column header. Default is "label".
|
|
3137
|
+
graphFeaturesHeader : str , optional
|
|
3138
|
+
The graph features prefix. Feature columns start with "<graphFeaturesHeader>_". Default is "feat".
|
|
3139
|
+
graphFeaturesKeys : list , optional
|
|
3140
|
+
If provided, these keys are used to store graph features in the graph dictionary.
|
|
3141
|
+
Length must match number of graph feature columns found. If None, keys will be
|
|
3142
|
+
"<graphFeaturesHeader>_0", "<graphFeaturesHeader>_1", ...
|
|
3143
|
+
edgeSRCHeader : str , optional
|
|
3144
|
+
Edge source node id header. Default is "src_id".
|
|
3145
|
+
edgeDSTHeader : str , optional
|
|
3146
|
+
Edge destination node id header. Default is "dst_id".
|
|
3147
|
+
edgeLabelHeader : str , optional
|
|
3148
|
+
Edge label header. Default is "label".
|
|
3149
|
+
edgeTrainMaskHeader, edgeValidateMaskHeader, edgeTestMaskHeader : str , optional
|
|
3150
|
+
Edge mask headers. Defaults are "train_mask", "val_mask", "test_mask".
|
|
3151
|
+
edgeFeaturesHeader : str , optional
|
|
3152
|
+
Edge features prefix. Feature columns start with "<edgeFeaturesHeader>_". Default is "feat".
|
|
3153
|
+
edgeFeaturesKeys : list , optional
|
|
3154
|
+
If provided, these keys are used to store edge features in the edge dictionary.
|
|
3155
|
+
Length must match number of edge feature columns found. If None, keys will be
|
|
3156
|
+
"<edgeFeaturesHeader>_0", "<edgeFeaturesHeader>_1", ...
|
|
3157
|
+
nodeIDHeader : str , optional
|
|
3158
|
+
Node id header. Default is "node_id".
|
|
3159
|
+
nodeLabelHeader : str , optional
|
|
3160
|
+
Node label header. Default is "label".
|
|
3161
|
+
nodeTrainMaskHeader, nodeValidateMaskHeader, nodeTestMaskHeader : str , optional
|
|
3162
|
+
Node mask headers. Defaults are "train_mask", "val_mask", "test_mask".
|
|
3163
|
+
nodeFeaturesHeader : str , optional
|
|
3164
|
+
Node features prefix. Feature columns start with "<nodeFeaturesHeader>_". Default is "feat".
|
|
3165
|
+
nodeXHeader, nodeYHeader, nodeZHeader : str , optional
|
|
3166
|
+
Node coordinate headers. Defaults are "X", "Y", "Z".
|
|
3167
|
+
If missing, coordinates will be generated deterministically from node_id.
|
|
3168
|
+
nodeFeaturesKeys : list , optional
|
|
3169
|
+
If provided, these keys are used to store node features in the node dictionary.
|
|
3170
|
+
Length must match number of node feature columns found. If None, keys will be
|
|
3171
|
+
"<nodeFeaturesHeader>_0", "<nodeFeaturesHeader>_1", ...
|
|
3172
|
+
tolerance : float , optional
|
|
3173
|
+
Desired tolerance. Default is 0.0001.
|
|
3174
|
+
silent : bool , optional
|
|
3175
|
+
If True, warnings/errors are suppressed. Default is False.
|
|
3176
|
+
|
|
3177
|
+
Returns
|
|
3178
|
+
-------
|
|
3179
|
+
list
|
|
3180
|
+
A list of imported TopologicPy graphs.
|
|
3181
|
+
"""
|
|
3182
|
+
from topologicpy.Vertex import Vertex
|
|
3183
|
+
from topologicpy.Edge import Edge
|
|
3184
|
+
from topologicpy.Graph import Graph
|
|
3185
|
+
from topologicpy.Topology import Topology
|
|
3186
|
+
from topologicpy.Dictionary import Dictionary
|
|
3187
|
+
|
|
3188
|
+
import os
|
|
3189
|
+
from os.path import exists, isdir
|
|
3190
|
+
import pandas as pd
|
|
3191
|
+
import numbers
|
|
3192
|
+
|
|
3193
|
+
def _warn(msg):
|
|
3194
|
+
if not silent:
|
|
3195
|
+
print(msg)
|
|
3196
|
+
|
|
3197
|
+
def _feature_columns(df, prefix):
|
|
3198
|
+
# e.g. feat_0, feat_1, ...
|
|
3199
|
+
cols = [c for c in df.columns if isinstance(c, str) and c.startswith(prefix + "_")]
|
|
3200
|
+
def _key(c):
|
|
3201
|
+
parts = c.rsplit("_", 1)
|
|
3202
|
+
if len(parts) == 2 and parts[1].isdigit():
|
|
3203
|
+
return int(parts[1])
|
|
3204
|
+
return 10**9
|
|
3205
|
+
return sorted(cols, key=_key)
|
|
3206
|
+
|
|
3207
|
+
def _mask_to_int(train_mask, val_mask, test_mask):
|
|
3208
|
+
try:
|
|
3209
|
+
t = bool(train_mask)
|
|
3210
|
+
v = bool(val_mask)
|
|
3211
|
+
te = bool(test_mask)
|
|
3212
|
+
except:
|
|
3213
|
+
return 0
|
|
3214
|
+
if [t, v, te] == [True, False, False]:
|
|
3215
|
+
return 0
|
|
3216
|
+
if [t, v, te] == [False, True, False]:
|
|
3217
|
+
return 1
|
|
3218
|
+
if [t, v, te] == [False, False, True]:
|
|
3219
|
+
return 2
|
|
3220
|
+
return 0
|
|
3221
|
+
|
|
3222
|
+
if not exists(path):
|
|
3223
|
+
_warn("Graph.ByCSVPath - Error: the input path parameter does not exist. Returning None.")
|
|
3224
|
+
return None
|
|
3225
|
+
if not isdir(path):
|
|
3226
|
+
_warn("Graph.ByCSVPath - Error: the input path parameter is not a folder. Returning None.")
|
|
3227
|
+
return None
|
|
3228
|
+
|
|
3229
|
+
graphs_csv = os.path.join(path, "graphs.csv")
|
|
3230
|
+
edges_csv = os.path.join(path, "edges.csv")
|
|
3231
|
+
nodes_csv = os.path.join(path, "nodes.csv")
|
|
3232
|
+
|
|
3233
|
+
if not exists(edges_csv):
|
|
3234
|
+
_warn("Graph.ByCSVPath - Error: edges.csv not found. Returning None.")
|
|
3235
|
+
return None
|
|
3236
|
+
if not exists(nodes_csv):
|
|
3237
|
+
_warn("Graph.ByCSVPath - Error: nodes.csv not found. Returning None.")
|
|
3238
|
+
return None
|
|
3239
|
+
|
|
3240
|
+
edges_df = pd.read_csv(edges_csv)
|
|
3241
|
+
nodes_df = pd.read_csv(nodes_csv)
|
|
3242
|
+
|
|
3243
|
+
if exists(graphs_csv):
|
|
3244
|
+
graphs_df = pd.read_csv(graphs_csv)
|
|
3245
|
+
else:
|
|
3246
|
+
_warn("Graph.ByCSVPath - Warning: graphs.csv not found. Assuming a single graph with graph_id=0.")
|
|
3247
|
+
graphs_df = pd.DataFrame([{graphIDHeader: 0, graphLabelHeader: 0}])
|
|
3248
|
+
|
|
3249
|
+
# Feature columns (new format)
|
|
3250
|
+
graph_feat_cols = _feature_columns(graphs_df, graphFeaturesHeader)
|
|
3251
|
+
node_feat_cols = _feature_columns(nodes_df, nodeFeaturesHeader)
|
|
3252
|
+
edge_feat_cols = _feature_columns(edges_df, edgeFeaturesHeader)
|
|
3253
|
+
|
|
3254
|
+
# Feature keys mapping
|
|
3255
|
+
if graphFeaturesKeys is None:
|
|
3256
|
+
graph_feat_keys = graph_feat_cols[:] # store using column names
|
|
3257
|
+
else:
|
|
3258
|
+
graph_feat_keys = list(graphFeaturesKeys)
|
|
3259
|
+
if len(graph_feat_keys) != len(graph_feat_cols):
|
|
3260
|
+
_warn("Graph.ByCSVPath - Error: graphFeaturesKeys length does not match number of graph feature columns.")
|
|
3261
|
+
return None
|
|
3262
|
+
|
|
3263
|
+
if nodeFeaturesKeys is None:
|
|
3264
|
+
node_feat_keys = node_feat_cols[:]
|
|
3265
|
+
else:
|
|
3266
|
+
node_feat_keys = list(nodeFeaturesKeys)
|
|
3267
|
+
if len(node_feat_keys) != len(node_feat_cols):
|
|
3268
|
+
_warn("Graph.ByCSVPath - Error: nodeFeaturesKeys length does not match number of node feature columns.")
|
|
3269
|
+
return None
|
|
3270
|
+
|
|
3271
|
+
if edgeFeaturesKeys is None:
|
|
3272
|
+
edge_feat_keys = edge_feat_cols[:]
|
|
3273
|
+
else:
|
|
3274
|
+
edge_feat_keys = list(edgeFeaturesKeys)
|
|
3275
|
+
if len(edge_feat_keys) != len(edge_feat_cols):
|
|
3276
|
+
_warn("Graph.ByCSVPath - Error: edgeFeaturesKeys length does not match number of edge feature columns.")
|
|
3277
|
+
return None
|
|
3278
|
+
|
|
3279
|
+
# Group by graph_id
|
|
3280
|
+
if graphIDHeader not in nodes_df.columns or graphIDHeader not in edges_df.columns:
|
|
3281
|
+
_warn("Graph.ByCSVPath - Error: graph_id header not found in nodes.csv or edges.csv. Returning None.")
|
|
3282
|
+
return None
|
|
3283
|
+
|
|
3284
|
+
grouped_nodes = nodes_df.groupby(graphIDHeader)
|
|
3285
|
+
grouped_edges = edges_df.groupby(graphIDHeader)
|
|
3286
|
+
|
|
3287
|
+
# Build per-graph vertices and a node_id->vertex mapping
|
|
3288
|
+
vertices_by_gid = {}
|
|
3289
|
+
node_map_by_gid = {}
|
|
3290
|
+
|
|
3291
|
+
for gid, g_nodes in grouped_nodes:
|
|
3292
|
+
# sort by node_id for consistent indexing, but keep mapping by id
|
|
3293
|
+
if nodeIDHeader in g_nodes.columns:
|
|
3294
|
+
g_nodes = g_nodes.sort_values(nodeIDHeader)
|
|
3295
|
+
vertices = []
|
|
3296
|
+
node_id_to_vertex = {}
|
|
3297
|
+
|
|
3298
|
+
for _, row in g_nodes.iterrows():
|
|
3299
|
+
node_id = int(row[nodeIDHeader]) if nodeIDHeader in row else len(vertices)
|
|
3300
|
+
|
|
3301
|
+
# coordinates (optional)
|
|
3302
|
+
x = row[nodeXHeader] if nodeXHeader in g_nodes.columns else None
|
|
3303
|
+
y = row[nodeYHeader] if nodeYHeader in g_nodes.columns else None
|
|
3304
|
+
z = row[nodeZHeader] if nodeZHeader in g_nodes.columns else None
|
|
3305
|
+
|
|
3306
|
+
# fallback deterministic coordinates if missing/non-numeric
|
|
3307
|
+
if not isinstance(x, numbers.Number): x = float(node_id)
|
|
3308
|
+
if not isinstance(y, numbers.Number): y = 0.0
|
|
3309
|
+
if not isinstance(z, numbers.Number): z = 0.0
|
|
3310
|
+
|
|
3311
|
+
v = Vertex.ByCoordinates(float(x), float(y), float(z))
|
|
3312
|
+
if not Topology.IsInstance(v, "Vertex"):
|
|
3313
|
+
_warn("Graph.ByCSVPath - Warning: Failed to create a vertex.")
|
|
3314
|
+
continue
|
|
3315
|
+
|
|
3316
|
+
# label + mask (optional)
|
|
3317
|
+
v_label = row[nodeLabelHeader] if nodeLabelHeader in g_nodes.columns else 0
|
|
3318
|
+
if nodeTrainMaskHeader in g_nodes.columns and nodeValidateMaskHeader in g_nodes.columns and nodeTestMaskHeader in g_nodes.columns:
|
|
3319
|
+
mask = _mask_to_int(row[nodeTrainMaskHeader], row[nodeValidateMaskHeader], row[nodeTestMaskHeader])
|
|
3320
|
+
else:
|
|
3321
|
+
mask = 0
|
|
3322
|
+
|
|
3323
|
+
# features (new format)
|
|
3324
|
+
feat_vals = []
|
|
3325
|
+
for c in node_feat_cols:
|
|
3326
|
+
try:
|
|
3327
|
+
feat_vals.append(float(row[c]))
|
|
3328
|
+
except:
|
|
3329
|
+
feat_vals.append(0.0)
|
|
3330
|
+
|
|
3331
|
+
node_keys = [nodeIDHeader, nodeLabelHeader, "mask"] + node_feat_keys
|
|
3332
|
+
node_vals = [node_id, v_label, mask] + feat_vals
|
|
3333
|
+
d = Dictionary.ByKeysValues(node_keys, node_vals)
|
|
3334
|
+
if Topology.IsInstance(d, "Dictionary"):
|
|
3335
|
+
v = Topology.SetDictionary(v, d)
|
|
3336
|
+
|
|
3337
|
+
vertices.append(v)
|
|
3338
|
+
node_id_to_vertex[node_id] = v
|
|
3339
|
+
|
|
3340
|
+
vertices_by_gid[gid] = vertices
|
|
3341
|
+
node_map_by_gid[gid] = node_id_to_vertex
|
|
3342
|
+
|
|
3343
|
+
# Build per-graph edges
|
|
3344
|
+
edges_by_gid = {}
|
|
3345
|
+
|
|
3346
|
+
for gid, g_edges in grouped_edges:
|
|
3347
|
+
node_id_to_vertex = node_map_by_gid.get(gid, {})
|
|
3348
|
+
edges = []
|
|
3349
|
+
|
|
3350
|
+
for _, row in g_edges.iterrows():
|
|
3351
|
+
try:
|
|
3352
|
+
src_id = int(row[edgeSRCHeader])
|
|
3353
|
+
dst_id = int(row[edgeDSTHeader])
|
|
3354
|
+
except:
|
|
3355
|
+
continue
|
|
3356
|
+
|
|
3357
|
+
if src_id == dst_id:
|
|
3358
|
+
continue
|
|
3359
|
+
|
|
3360
|
+
v_src = node_id_to_vertex.get(src_id, None)
|
|
3361
|
+
v_dst = node_id_to_vertex.get(dst_id, None)
|
|
3362
|
+
if v_src is None or v_dst is None:
|
|
3363
|
+
continue
|
|
3364
|
+
|
|
3365
|
+
try:
|
|
3366
|
+
e = Edge.ByVertices([v_src, v_dst], tolerance=tolerance)
|
|
3367
|
+
except:
|
|
3368
|
+
e = None
|
|
3369
|
+
|
|
3370
|
+
if not Topology.IsInstance(e, "Edge"):
|
|
3371
|
+
continue
|
|
3372
|
+
|
|
3373
|
+
e_label = row[edgeLabelHeader] if edgeLabelHeader in g_edges.columns else 0
|
|
3374
|
+
|
|
3375
|
+
if edgeTrainMaskHeader in g_edges.columns and edgeValidateMaskHeader in g_edges.columns and edgeTestMaskHeader in g_edges.columns:
|
|
3376
|
+
mask = _mask_to_int(row[edgeTrainMaskHeader], row[edgeValidateMaskHeader], row[edgeTestMaskHeader])
|
|
3377
|
+
else:
|
|
3378
|
+
mask = 0
|
|
3379
|
+
|
|
3380
|
+
feat_vals = []
|
|
3381
|
+
for c in edge_feat_cols:
|
|
3382
|
+
try:
|
|
3383
|
+
feat_vals.append(float(row[c]))
|
|
3384
|
+
except:
|
|
3385
|
+
feat_vals.append(0.0)
|
|
3386
|
+
|
|
3387
|
+
edge_keys = [edgeSRCHeader, edgeDSTHeader, edgeLabelHeader, "mask"] + edge_feat_keys
|
|
3388
|
+
edge_vals = [src_id, dst_id, e_label, mask] + feat_vals
|
|
3389
|
+
d = Dictionary.ByKeysValues(edge_keys, edge_vals)
|
|
3390
|
+
if Topology.IsInstance(d, "Dictionary"):
|
|
3391
|
+
e = Topology.SetDictionary(e, d)
|
|
3392
|
+
|
|
3393
|
+
edges.append(e)
|
|
3394
|
+
|
|
3395
|
+
edges_by_gid[gid] = edges
|
|
3396
|
+
|
|
3397
|
+
# Build graphs and embed graph label + features in graph dictionary
|
|
3398
|
+
graphs = []
|
|
3399
|
+
|
|
3400
|
+
# iterate graphs_df rows (authoritative list of graph_ids)
|
|
3401
|
+
if graphIDHeader not in graphs_df.columns:
|
|
3402
|
+
_warn("Graph.ByCSVPath - Error: graph_id header not found in graphs.csv. Returning None.")
|
|
3403
|
+
return None
|
|
3404
|
+
|
|
3405
|
+
for _, grow in graphs_df.iterrows():
|
|
3406
|
+
gid = int(grow[graphIDHeader])
|
|
3407
|
+
|
|
3408
|
+
verts = vertices_by_gid.get(gid, [])
|
|
3409
|
+
eds = edges_by_gid.get(gid, [])
|
|
3410
|
+
|
|
3411
|
+
if len(verts) < 1:
|
|
3412
|
+
_warn(f"Graph.ByCSVPath - Warning: Graph id {gid} has no vertices. Skipping.")
|
|
3413
|
+
continue
|
|
3414
|
+
|
|
3415
|
+
g = Graph.ByVerticesEdges(verts, eds)
|
|
3416
|
+
if not Topology.IsInstance(g, "Graph"):
|
|
3417
|
+
_warn(f"Graph.ByCSVPath - Warning: Failed to create graph id {gid}. Skipping.")
|
|
3418
|
+
continue
|
|
3419
|
+
|
|
3420
|
+
# graph label
|
|
3421
|
+
g_label = grow[graphLabelHeader] if graphLabelHeader in graphs_df.columns else 0
|
|
3422
|
+
|
|
3423
|
+
# graph features (new format)
|
|
3424
|
+
g_feat_vals = []
|
|
3425
|
+
for c in graph_feat_cols:
|
|
3426
|
+
try:
|
|
3427
|
+
g_feat_vals.append(float(grow[c]))
|
|
3428
|
+
except:
|
|
3429
|
+
g_feat_vals.append(0.0)
|
|
3430
|
+
|
|
3431
|
+
graph_keys = [graphIDHeader, graphLabelHeader] + graph_feat_keys
|
|
3432
|
+
graph_vals = [gid, g_label] + g_feat_vals
|
|
3433
|
+
|
|
3434
|
+
d = Dictionary.ByKeysValues(graph_keys, graph_vals)
|
|
3435
|
+
if Topology.IsInstance(d, "Dictionary"):
|
|
3436
|
+
try:
|
|
3437
|
+
g = Graph.SetDictionary(g, d)
|
|
3438
|
+
except:
|
|
3439
|
+
# fallback if Graph.SetDictionary not available in your build
|
|
3440
|
+
g = Topology.SetDictionary(g, d)
|
|
3441
|
+
|
|
3442
|
+
graphs.append(g)
|
|
3443
|
+
|
|
3444
|
+
return graphs
|
|
3445
|
+
|
|
3446
|
+
@staticmethod
|
|
3447
|
+
def ByCSVPath_old(path,
|
|
3107
3448
|
graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat", graphFeaturesKeys=[],
|
|
3108
3449
|
edgeSRCHeader="src_id", edgeDSTHeader="dst_id", edgeLabelHeader="label", edgeTrainMaskHeader="train_mask",
|
|
3109
3450
|
edgeValidateMaskHeader="val_mask", edgeTestMaskHeader="test_mask", edgeFeaturesHeader="feat", edgeFeaturesKeys=[],
|
|
@@ -9828,7 +10169,207 @@ class Graph:
|
|
|
9828
10169
|
return status
|
|
9829
10170
|
|
|
9830
10171
|
@staticmethod
|
|
9831
|
-
def ExportToCSV(graph,
|
|
10172
|
+
def ExportToCSV(graph,
|
|
10173
|
+
path,
|
|
10174
|
+
|
|
10175
|
+
graphLabelKey="label",
|
|
10176
|
+
defaultGraphLabel=0,
|
|
10177
|
+
graphFeaturesKeys=None,
|
|
10178
|
+
graphIDHeader="graph_id",
|
|
10179
|
+
graphLabelHeader="label",
|
|
10180
|
+
graphFeaturesHeader="feat",
|
|
10181
|
+
|
|
10182
|
+
edgeLabelKey="label",
|
|
10183
|
+
defaultEdgeLabel=0,
|
|
10184
|
+
edgeFeaturesKeys=None,
|
|
10185
|
+
edgeSRCHeader="src_id",
|
|
10186
|
+
edgeDSTHeader="dst_id",
|
|
10187
|
+
edgeLabelHeader="label",
|
|
10188
|
+
edgeFeaturesHeader="feat",
|
|
10189
|
+
edgeTrainMaskHeader="train_mask",
|
|
10190
|
+
edgeValidateMaskHeader="val_mask",
|
|
10191
|
+
edgeTestMaskHeader="test_mask",
|
|
10192
|
+
edgeMaskKey="mask",
|
|
10193
|
+
edgeTrainRatio=0.8,
|
|
10194
|
+
edgeValidateRatio=0.1,
|
|
10195
|
+
edgeTestRatio=0.1,
|
|
10196
|
+
bidirectional=True,
|
|
10197
|
+
|
|
10198
|
+
nodeLabelKey="label",
|
|
10199
|
+
defaultNodeLabel=0,
|
|
10200
|
+
nodeFeaturesKeys=None,
|
|
10201
|
+
nodeIDHeader="node_id",
|
|
10202
|
+
nodeLabelHeader="label",
|
|
10203
|
+
nodeFeaturesHeader="feat",
|
|
10204
|
+
nodeTrainMaskHeader="train_mask",
|
|
10205
|
+
nodeValidateMaskHeader="val_mask",
|
|
10206
|
+
nodeTestMaskHeader="test_mask",
|
|
10207
|
+
nodeMaskKey="mask",
|
|
10208
|
+
nodeTrainRatio=0.8,
|
|
10209
|
+
nodeValidateRatio=0.1,
|
|
10210
|
+
nodeTestRatio=0.1,
|
|
10211
|
+
|
|
10212
|
+
mantissa=6,
|
|
10213
|
+
tolerance=0.0001,
|
|
10214
|
+
overwrite=False,
|
|
10215
|
+
silent=False):
|
|
10216
|
+
"""
|
|
10217
|
+
Exports the input graph into a set of CSV files compatible with DGL.
|
|
10218
|
+
|
|
10219
|
+
Parameters
|
|
10220
|
+
----------
|
|
10221
|
+
graph : topologic_core.Graph or list of graphs.
|
|
10222
|
+
The input graph or graphs
|
|
10223
|
+
path : str
|
|
10224
|
+
The desired path to the output folder where the graphs, edges, and nodes CSV files will be saved.
|
|
10225
|
+
graphLabelKey : str , optional
|
|
10226
|
+
The graph label dictionary key saved in each graph. Default is "label".
|
|
10227
|
+
defaultGraphLabel : int , optional
|
|
10228
|
+
The default graph label to use if no graph label is found. Default is 0.
|
|
10229
|
+
graphFeaturesKeys : list , optional
|
|
10230
|
+
The list of feature dictionary keys saved in the dicitonaries of graphs. Default is None.
|
|
10231
|
+
graphIDHeader : str , optional
|
|
10232
|
+
The desired graph ID column header. Default is "graph_id".
|
|
10233
|
+
graphLabelHeader : str , optional
|
|
10234
|
+
The desired graph label column header. Default is "label".
|
|
10235
|
+
graphFeaturesHeader : str , optional
|
|
10236
|
+
The desired graph features column header. Default is "feat".
|
|
10237
|
+
edgeLabelKey : str , optional
|
|
10238
|
+
The edge label dictionary key saved in each graph edge. Default is "label".
|
|
10239
|
+
defaultEdgeLabel : int , optional
|
|
10240
|
+
The default edge label to use if no edge label is found. Default is 0.
|
|
10241
|
+
edgeLabelHeader : str , optional
|
|
10242
|
+
The desired edge label column header. Default is "label".
|
|
10243
|
+
edgeSRCHeader : str , optional
|
|
10244
|
+
The desired edge source column header. Default is "src_id".
|
|
10245
|
+
edgeDSTHeader : str , optional
|
|
10246
|
+
The desired edge destination column header. Default is "dst_id".
|
|
10247
|
+
edgeFeaturesHeader : str , optional
|
|
10248
|
+
The desired edge features column header. Default is "feat".
|
|
10249
|
+
edgeFeaturesKeys : list , optional
|
|
10250
|
+
The list of feature dictionary keys saved in the dicitonaries of edges. Default is None.
|
|
10251
|
+
edgeTrainMaskHeader : str , optional
|
|
10252
|
+
The desired edge train mask column header. Default is "train_mask".
|
|
10253
|
+
edgeValidateMaskHeader : str , optional
|
|
10254
|
+
The desired edge validate mask column header. Default is "val_mask".
|
|
10255
|
+
edgeTestMaskHeader : str , optional
|
|
10256
|
+
The desired edge test mask column header. Default is "test_mask".
|
|
10257
|
+
edgeMaskKey : str , optional
|
|
10258
|
+
The dictionary key where the edge train, validate, test category is to be found. The value should be 0 for train
|
|
10259
|
+
1 for validate, and 2 for test. If no key is found, the ratio of train/validate/test will be used. Default is "mask".
|
|
10260
|
+
edgeTrainRatio : float , optional
|
|
10261
|
+
The desired ratio of the edge data to use for training. The number must be between 0 and 1. Default is 0.8 which means 80% of the data will be used for training.
|
|
10262
|
+
This value is ignored if an edgeMaskKey is foud.
|
|
10263
|
+
edgeValidateRatio : float , optional
|
|
10264
|
+
The desired ratio of the edge data to use for validation. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for validation.
|
|
10265
|
+
This value is ignored if an edgeMaskKey is foud.
|
|
10266
|
+
edgeTestRatio : float , optional
|
|
10267
|
+
The desired ratio of the edge data to use for testing. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for testing.
|
|
10268
|
+
This value is ignored if an edgeMaskKey is foud.
|
|
10269
|
+
bidirectional : bool , optional
|
|
10270
|
+
If set to True, a reversed edge will also be saved for each edge in the graph. Otherwise, it will not. Default is True.
|
|
10271
|
+
nodeFeaturesKeys : list , optional
|
|
10272
|
+
The list of features keys saved in the dicitonaries of nodes. Default is None.
|
|
10273
|
+
nodeLabelKey : str , optional
|
|
10274
|
+
The node label dictionary key saved in each graph vertex. Default is "label".
|
|
10275
|
+
defaultNodeLabel : int , optional
|
|
10276
|
+
The default node label to use if no node label is found. Default is 0.
|
|
10277
|
+
nodeIDHeader : str , optional
|
|
10278
|
+
The desired node ID column header. Default is "node_id".
|
|
10279
|
+
nodeLabelHeader : str , optional
|
|
10280
|
+
The desired node label column header. Default is "label".
|
|
10281
|
+
nodeFeaturesHeader : str , optional
|
|
10282
|
+
The desired node features column header. Default is "feat".
|
|
10283
|
+
nodeTrainMaskHeader : str , optional
|
|
10284
|
+
The desired node train mask column header. Default is "train_mask".
|
|
10285
|
+
nodeValidateMaskHeader : str , optional
|
|
10286
|
+
The desired node validate mask column header. Default is "val_mask".
|
|
10287
|
+
nodeTestMaskHeader : str , optional
|
|
10288
|
+
The desired node test mask column header. Default is "test_mask".
|
|
10289
|
+
nodeMaskKey : str , optional
|
|
10290
|
+
The dictionary key where the node train, validate, test category is to be found. The value should be 0 for train
|
|
10291
|
+
1 for validate, and 2 for test. If no key is found, the ratio of train/validate/test will be used. Default is "mask".
|
|
10292
|
+
nodeTrainRatio : float , optional
|
|
10293
|
+
The desired ratio of the node data to use for training. The number must be between 0 and 1. Default is 0.8 which means 80% of the data will be used for training.
|
|
10294
|
+
This value is ignored if an nodeMaskKey is found.
|
|
10295
|
+
nodeValidateRatio : float , optional
|
|
10296
|
+
The desired ratio of the node data to use for validation. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for validation.
|
|
10297
|
+
This value is ignored if an nodeMaskKey is found.
|
|
10298
|
+
nodeTestRatio : float , optional
|
|
10299
|
+
The desired ratio of the node data to use for testing. The number must be between 0 and 1. Default is 0.1 which means 10% of the data will be used for testing.
|
|
10300
|
+
This value is ignored if an nodeMaskKey is found.
|
|
10301
|
+
mantissa : int , optional
|
|
10302
|
+
The number of decimal places to round the result to. Default is 6.
|
|
10303
|
+
tolerance : float , optional
|
|
10304
|
+
The desired tolerance. Default is 0.0001.
|
|
10305
|
+
overwrite : bool , optional
|
|
10306
|
+
If set to True, any existing files are overwritten. Otherwise, the input list of graphs is appended to the end of each file. Default is False.
|
|
10307
|
+
silent : bool , optional
|
|
10308
|
+
If set to True no warnings or errors are printed. Default is False.
|
|
10309
|
+
Returns
|
|
10310
|
+
-------
|
|
10311
|
+
bool
|
|
10312
|
+
True if the graph has been successfully exported. False otherwise.
|
|
10313
|
+
|
|
10314
|
+
"""
|
|
10315
|
+
from topologicpy.Topology import Topology
|
|
10316
|
+
|
|
10317
|
+
if not isinstance(graph, list):
|
|
10318
|
+
graph = [graph]
|
|
10319
|
+
|
|
10320
|
+
graph = [g for g in graph if Topology.IsInstance(g, "graph")]
|
|
10321
|
+
|
|
10322
|
+
if len(graph) < 1:
|
|
10323
|
+
if not silent:
|
|
10324
|
+
print("Graph.ExportToCSV - Error: The input graph parameter does not contain any valid graphs. Returning None")
|
|
10325
|
+
return None
|
|
10326
|
+
|
|
10327
|
+
return Graph.ExportGraphsToCSV( graphs = graph,
|
|
10328
|
+
path = path,
|
|
10329
|
+
graphLabelKey = graphLabelKey,
|
|
10330
|
+
defaultGraphLabel = defaultGraphLabel,
|
|
10331
|
+
graphFeaturesKeys = graphFeaturesKeys,
|
|
10332
|
+
graphIDHeader = graphIDHeader,
|
|
10333
|
+
graphLabelHeader = graphLabelHeader,
|
|
10334
|
+
graphFeaturesHeader = graphFeaturesHeader,
|
|
10335
|
+
|
|
10336
|
+
edgeLabelKey = edgeLabelKey,
|
|
10337
|
+
defaultEdgeLabel = defaultEdgeLabel,
|
|
10338
|
+
edgeFeaturesKeys = edgeFeaturesKeys,
|
|
10339
|
+
edgeSRCHeader = edgeSRCHeader,
|
|
10340
|
+
edgeDSTHeader = edgeDSTHeader,
|
|
10341
|
+
edgeLabelHeader = edgeLabelHeader,
|
|
10342
|
+
edgeFeaturesHeader = edgeFeaturesHeader,
|
|
10343
|
+
edgeTrainMaskHeader = edgeTrainMaskHeader,
|
|
10344
|
+
edgeValidateMaskHeader = edgeValidateMaskHeader,
|
|
10345
|
+
edgeTestMaskHeader = edgeTestMaskHeader,
|
|
10346
|
+
edgeMaskKey = edgeMaskKey,
|
|
10347
|
+
edgeTrainRatio = edgeTrainRatio,
|
|
10348
|
+
edgeValidateRatio = edgeValidateRatio,
|
|
10349
|
+
edgeTestRatio = edgeTestRatio,
|
|
10350
|
+
bidirectional = bidirectional,
|
|
10351
|
+
|
|
10352
|
+
nodeLabelKey = nodeLabelKey,
|
|
10353
|
+
defaultNodeLabel = defaultNodeLabel,
|
|
10354
|
+
nodeFeaturesKeys = nodeFeaturesKeys,
|
|
10355
|
+
nodeIDHeader = nodeIDHeader,
|
|
10356
|
+
nodeLabelHeader = nodeLabelHeader,
|
|
10357
|
+
nodeFeaturesHeader = nodeFeaturesHeader,
|
|
10358
|
+
nodeTrainMaskHeader = nodeTrainMaskHeader,
|
|
10359
|
+
nodeValidateMaskHeader = nodeValidateMaskHeader,
|
|
10360
|
+
nodeTestMaskHeader = nodeTestMaskHeader,
|
|
10361
|
+
nodeMaskKey = nodeMaskKey,
|
|
10362
|
+
nodeTrainRatio = nodeTrainRatio,
|
|
10363
|
+
nodeValidateRatio = nodeValidateRatio,
|
|
10364
|
+
nodeTestRatio = nodeTestRatio,
|
|
10365
|
+
|
|
10366
|
+
mantissa = mantissa,
|
|
10367
|
+
tolerance = tolerance,
|
|
10368
|
+
overwrite = overwrite,
|
|
10369
|
+
silent = silent)
|
|
10370
|
+
|
|
10371
|
+
@staticmethod
|
|
10372
|
+
def ExportGraphToCSV(graph, path, graphLabel, graphFeatures="",
|
|
9832
10373
|
graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat",
|
|
9833
10374
|
|
|
9834
10375
|
edgeLabelKey="label", defaultEdgeLabel=0, edgeFeaturesKeys=[],
|
|
@@ -10182,7 +10723,7 @@ class Graph:
|
|
|
10182
10723
|
edge_data.append(single_edge_data)
|
|
10183
10724
|
|
|
10184
10725
|
if bidirectional == True:
|
|
10185
|
-
single_edge_data = [graph_id,
|
|
10726
|
+
single_edge_data = [graph_id, dst, src, edge_label, train_mask, validate_mask, test_mask, edge_features]
|
|
10186
10727
|
edge_data.append(single_edge_data)
|
|
10187
10728
|
df = pd.DataFrame(edge_data, columns=edge_columns)
|
|
10188
10729
|
|
|
@@ -10199,7 +10740,349 @@ class Graph:
|
|
|
10199
10740
|
yaml_file.write('dataset_name: topologic_dataset\nedge_data:\n- file_name: edges.csv\nnode_data:\n- file_name: nodes.csv')
|
|
10200
10741
|
yaml_file.close()
|
|
10201
10742
|
return True
|
|
10202
|
-
|
|
10743
|
+
|
|
10744
|
+
|
|
10745
|
+
|
|
10746
|
+
@staticmethod
|
|
10747
|
+
def ExportGraphsToCSV(graphs,
|
|
10748
|
+
path,
|
|
10749
|
+
graphLabelKey="label",
|
|
10750
|
+
defaultGraphLabel=0,
|
|
10751
|
+
graphFeaturesKeys=None,
|
|
10752
|
+
graphIDHeader="graph_id", graphLabelHeader="label", graphFeaturesHeader="feat",
|
|
10753
|
+
|
|
10754
|
+
edgeLabelKey="label", defaultEdgeLabel=0, edgeFeaturesKeys=None,
|
|
10755
|
+
edgeSRCHeader="src_id", edgeDSTHeader="dst_id",
|
|
10756
|
+
edgeLabelHeader="label", edgeFeaturesHeader="feat",
|
|
10757
|
+
edgeTrainMaskHeader="train_mask", edgeValidateMaskHeader="val_mask", edgeTestMaskHeader="test_mask",
|
|
10758
|
+
edgeMaskKey="mask",
|
|
10759
|
+
edgeTrainRatio=0.8, edgeValidateRatio=0.1, edgeTestRatio=0.1,
|
|
10760
|
+
bidirectional=True,
|
|
10761
|
+
|
|
10762
|
+
nodeLabelKey="label", defaultNodeLabel=0, nodeFeaturesKeys=None,
|
|
10763
|
+
nodeIDHeader="node_id", nodeLabelHeader="label", nodeFeaturesHeader="feat",
|
|
10764
|
+
nodeTrainMaskHeader="train_mask", nodeValidateMaskHeader="val_mask", nodeTestMaskHeader="test_mask",
|
|
10765
|
+
nodeMaskKey="mask",
|
|
10766
|
+
nodeTrainRatio=0.8, nodeValidateRatio=0.1, nodeTestRatio=0.1,
|
|
10767
|
+
mantissa=6, tolerance=0.0001, overwrite=False, silent=False):
|
|
10768
|
+
"""
|
|
10769
|
+
Batch-export a list of TopologicPy graphs to CSV files (graphs.csv, nodes.csv, edges.csv)
|
|
10770
|
+
in a format suitable for graph ML pipelines.
|
|
10771
|
+
|
|
10772
|
+
Modifications vs. ExportToCSV
|
|
10773
|
+
-----------------------------
|
|
10774
|
+
1. Graph labels are retrieved from the graph dictionary using `graphLabelKey`
|
|
10775
|
+
(fallback to `defaultGraphLabel`).
|
|
10776
|
+
2. Graph features are retrieved from the graph dictionary using `graphFeaturesKeys`.
|
|
10777
|
+
3. Graph/edge/node features are written as separate numeric columns:
|
|
10778
|
+
- Graph: graphFeaturesHeader_0 ... graphFeaturesHeader_{k-1}
|
|
10779
|
+
- Node : nodeFeaturesHeader_0 ... nodeFeaturesHeader_{k-1}
|
|
10780
|
+
- Edge : edgeFeaturesHeader_0 ... edgeFeaturesHeader_{k-1}
|
|
10781
|
+
|
|
10782
|
+
Returns
|
|
10783
|
+
-------
|
|
10784
|
+
bool
|
|
10785
|
+
True if export succeeded, False otherwise.
|
|
10786
|
+
"""
|
|
10787
|
+
|
|
10788
|
+
from topologicpy.Graph import Graph
|
|
10789
|
+
from topologicpy.Vertex import Vertex
|
|
10790
|
+
from topologicpy.Edge import Edge
|
|
10791
|
+
from topologicpy.Helper import Helper
|
|
10792
|
+
from topologicpy.Dictionary import Dictionary
|
|
10793
|
+
from topologicpy.Topology import Topology
|
|
10794
|
+
|
|
10795
|
+
import os
|
|
10796
|
+
import csv
|
|
10797
|
+
import math
|
|
10798
|
+
import random
|
|
10799
|
+
|
|
10800
|
+
# ----------------------------
|
|
10801
|
+
# Helpers
|
|
10802
|
+
# ----------------------------
|
|
10803
|
+
def _err(msg):
|
|
10804
|
+
if not silent:
|
|
10805
|
+
print(msg)
|
|
10806
|
+
return None
|
|
10807
|
+
|
|
10808
|
+
def _ensure_dir(p):
|
|
10809
|
+
if not os.path.exists(p):
|
|
10810
|
+
try:
|
|
10811
|
+
os.makedirs(p)
|
|
10812
|
+
except Exception:
|
|
10813
|
+
return False
|
|
10814
|
+
return True
|
|
10815
|
+
|
|
10816
|
+
def _last_graph_id_from_csv(csv_path):
|
|
10817
|
+
"""
|
|
10818
|
+
Return last integer in first column of last non-empty row.
|
|
10819
|
+
Reads only the tail of the file (fast).
|
|
10820
|
+
"""
|
|
10821
|
+
if not os.path.exists(csv_path):
|
|
10822
|
+
return -1
|
|
10823
|
+
try:
|
|
10824
|
+
with open(csv_path, "rb") as f:
|
|
10825
|
+
f.seek(0, os.SEEK_END)
|
|
10826
|
+
size = f.tell()
|
|
10827
|
+
if size == 0:
|
|
10828
|
+
return -1
|
|
10829
|
+
chunk = 4096
|
|
10830
|
+
data = b""
|
|
10831
|
+
pos = size
|
|
10832
|
+
while pos > 0:
|
|
10833
|
+
step = chunk if pos >= chunk else pos
|
|
10834
|
+
pos -= step
|
|
10835
|
+
f.seek(pos, os.SEEK_SET)
|
|
10836
|
+
data = f.read(step) + data
|
|
10837
|
+
lines = data.splitlines()
|
|
10838
|
+
if len(lines) >= 2:
|
|
10839
|
+
break
|
|
10840
|
+
for line in reversed(lines):
|
|
10841
|
+
if line.strip():
|
|
10842
|
+
s = line.decode("utf-8", errors="ignore")
|
|
10843
|
+
first = s.split(",", 1)[0].strip()
|
|
10844
|
+
try:
|
|
10845
|
+
return int(first)
|
|
10846
|
+
except Exception:
|
|
10847
|
+
return -1
|
|
10848
|
+
return -1
|
|
10849
|
+
except Exception:
|
|
10850
|
+
return -1
|
|
10851
|
+
|
|
10852
|
+
def _quant_key_xyz(v):
|
|
10853
|
+
return (round(float(Vertex.X(v, mantissa=mantissa)), mantissa),
|
|
10854
|
+
round(float(Vertex.Y(v, mantissa=mantissa)), mantissa),
|
|
10855
|
+
round(float(Vertex.Z(v, mantissa=mantissa)), mantissa))
|
|
10856
|
+
|
|
10857
|
+
def _mask_from_dict_or_ratio(d, mask_key, train_max, val_max, counts):
|
|
10858
|
+
"""
|
|
10859
|
+
counts: dict with keys train, val, test (mutated).
|
|
10860
|
+
"""
|
|
10861
|
+
if mask_key is not None and d is not None:
|
|
10862
|
+
try:
|
|
10863
|
+
keys = Dictionary.Keys(d)
|
|
10864
|
+
except Exception:
|
|
10865
|
+
keys = []
|
|
10866
|
+
if mask_key in keys:
|
|
10867
|
+
mv = Dictionary.ValueAtKey(d, mask_key)
|
|
10868
|
+
if mv in [0, 1, 2]:
|
|
10869
|
+
if mv == 0:
|
|
10870
|
+
counts["train"] += 1
|
|
10871
|
+
return True, False, False
|
|
10872
|
+
if mv == 1:
|
|
10873
|
+
counts["val"] += 1
|
|
10874
|
+
return False, True, False
|
|
10875
|
+
counts["test"] += 1
|
|
10876
|
+
return False, False, True
|
|
10877
|
+
|
|
10878
|
+
if counts["train"] < train_max:
|
|
10879
|
+
counts["train"] += 1
|
|
10880
|
+
return True, False, False
|
|
10881
|
+
if counts["val"] < val_max:
|
|
10882
|
+
counts["val"] += 1
|
|
10883
|
+
return False, True, False
|
|
10884
|
+
counts["test"] += 1
|
|
10885
|
+
return False, False, True
|
|
10886
|
+
|
|
10887
|
+
def _feat_list_from_keys(d, keys_flat):
|
|
10888
|
+
"""
|
|
10889
|
+
Returns list[float] with length = len(keys_flat).
|
|
10890
|
+
Missing/invalid values -> 0.0 (keeps vector length stable).
|
|
10891
|
+
"""
|
|
10892
|
+
if not keys_flat:
|
|
10893
|
+
return []
|
|
10894
|
+
out = []
|
|
10895
|
+
for k in keys_flat:
|
|
10896
|
+
try:
|
|
10897
|
+
val = Dictionary.ValueAtKey(d, k) if d is not None else None
|
|
10898
|
+
if val is None:
|
|
10899
|
+
out.append(0.0)
|
|
10900
|
+
else:
|
|
10901
|
+
out.append(round(float(val), mantissa))
|
|
10902
|
+
except Exception:
|
|
10903
|
+
out.append(0.0)
|
|
10904
|
+
return out
|
|
10905
|
+
|
|
10906
|
+
def _label_from_dict(d, key, default_val):
|
|
10907
|
+
try:
|
|
10908
|
+
if d is None:
|
|
10909
|
+
return default_val
|
|
10910
|
+
val = Dictionary.ValueAtKey(d, key)
|
|
10911
|
+
return default_val if val is None else val
|
|
10912
|
+
except Exception:
|
|
10913
|
+
return default_val
|
|
10914
|
+
|
|
10915
|
+
def _feature_headers(prefix, k):
|
|
10916
|
+
return [f"{prefix}_{i}" for i in range(int(k))]
|
|
10917
|
+
|
|
10918
|
+
# ----------------------------
|
|
10919
|
+
# Validate inputs
|
|
10920
|
+
# ----------------------------
|
|
10921
|
+
if graphs is None or (not isinstance(graphs, list)) or len(graphs) == 0:
|
|
10922
|
+
return _err("Graph.ExportGraphsToCSV - Error: 'graphs' must be a non-empty list. Returning None.")
|
|
10923
|
+
|
|
10924
|
+
if abs(nodeTrainRatio + nodeValidateRatio + nodeTestRatio - 1) > 0.001:
|
|
10925
|
+
return _err("Graph.ExportGraphsToCSV - Error: node train/val/test ratios must add up to 1. Returning None.")
|
|
10926
|
+
if abs(edgeTrainRatio + edgeValidateRatio + edgeTestRatio - 1) > 0.001:
|
|
10927
|
+
return _err("Graph.ExportGraphsToCSV - Error: edge train/val/test ratios must add up to 1. Returning None.")
|
|
10928
|
+
|
|
10929
|
+
if not _ensure_dir(path):
|
|
10930
|
+
return _err("Graph.ExportGraphsToCSV - Error: Could not create output folder. Returning None.")
|
|
10931
|
+
|
|
10932
|
+
graphs_csv = os.path.join(path, "graphs.csv")
|
|
10933
|
+
nodes_csv = os.path.join(path, "nodes.csv")
|
|
10934
|
+
edges_csv = os.path.join(path, "edges.csv")
|
|
10935
|
+
|
|
10936
|
+
if overwrite is False:
|
|
10937
|
+
if not os.path.exists(graphs_csv):
|
|
10938
|
+
return _err("Graph.ExportGraphsToCSV - Error: overwrite=False but graphs.csv not found. Returning None.")
|
|
10939
|
+
if not os.path.exists(nodes_csv):
|
|
10940
|
+
return _err("Graph.ExportGraphsToCSV - Error: overwrite=False but nodes.csv not found. Returning None.")
|
|
10941
|
+
if not os.path.exists(edges_csv):
|
|
10942
|
+
return _err("Graph.ExportGraphsToCSV - Error: overwrite=False but edges.csv not found. Returning None.")
|
|
10943
|
+
|
|
10944
|
+
# Determine starting graph_id once
|
|
10945
|
+
start_graph_id = 0 if overwrite else (_last_graph_id_from_csv(graphs_csv) + 1)
|
|
10946
|
+
|
|
10947
|
+
# Flatten feature keys once
|
|
10948
|
+
graphFeaturesKeys = [] if graphFeaturesKeys is None else graphFeaturesKeys
|
|
10949
|
+
nodeFeaturesKeys = [] if nodeFeaturesKeys is None else nodeFeaturesKeys
|
|
10950
|
+
edgeFeaturesKeys = [] if edgeFeaturesKeys is None else edgeFeaturesKeys
|
|
10951
|
+
|
|
10952
|
+
graph_feature_keys = Helper.Flatten(graphFeaturesKeys)
|
|
10953
|
+
node_feature_keys = Helper.Flatten(nodeFeaturesKeys)
|
|
10954
|
+
edge_feature_keys = Helper.Flatten(edgeFeaturesKeys)
|
|
10955
|
+
|
|
10956
|
+
# Build feature headers
|
|
10957
|
+
graph_feat_headers = _feature_headers(graphFeaturesHeader, len(graph_feature_keys))
|
|
10958
|
+
node_feat_headers = _feature_headers(nodeFeaturesHeader, len(node_feature_keys))
|
|
10959
|
+
edge_feat_headers = _feature_headers(edgeFeaturesHeader, len(edge_feature_keys))
|
|
10960
|
+
|
|
10961
|
+
# CSV modes
|
|
10962
|
+
graphs_mode = "w" if overwrite else "a"
|
|
10963
|
+
nodes_mode = "w" if overwrite else "a"
|
|
10964
|
+
edges_mode = "w" if overwrite else "a"
|
|
10965
|
+
|
|
10966
|
+
# Column headers
|
|
10967
|
+
graphs_header = [graphIDHeader, graphLabelHeader] + graph_feat_headers
|
|
10968
|
+
nodes_header = [graphIDHeader, nodeIDHeader, nodeLabelHeader,
|
|
10969
|
+
nodeTrainMaskHeader, nodeValidateMaskHeader, nodeTestMaskHeader] + node_feat_headers + ["X", "Y", "Z"]
|
|
10970
|
+
edges_header = [graphIDHeader, edgeSRCHeader, edgeDSTHeader, edgeLabelHeader,
|
|
10971
|
+
edgeTrainMaskHeader, edgeValidateMaskHeader, edgeTestMaskHeader] + edge_feat_headers
|
|
10972
|
+
|
|
10973
|
+
try:
|
|
10974
|
+
with open(graphs_csv, graphs_mode, newline="", encoding="utf-8") as f_g, \
|
|
10975
|
+
open(nodes_csv, nodes_mode, newline="", encoding="utf-8") as f_n, \
|
|
10976
|
+
open(edges_csv, edges_mode, newline="", encoding="utf-8") as f_e:
|
|
10977
|
+
|
|
10978
|
+
w_g = csv.writer(f_g)
|
|
10979
|
+
w_n = csv.writer(f_n)
|
|
10980
|
+
w_e = csv.writer(f_e)
|
|
10981
|
+
|
|
10982
|
+
# write headers if overwriting
|
|
10983
|
+
if overwrite:
|
|
10984
|
+
w_g.writerow(graphs_header)
|
|
10985
|
+
w_n.writerow(nodes_header)
|
|
10986
|
+
w_e.writerow(edges_header)
|
|
10987
|
+
|
|
10988
|
+
# ----------------------------
|
|
10989
|
+
# Main loop: per graph
|
|
10990
|
+
# ----------------------------
|
|
10991
|
+
for idx, graph in enumerate(graphs):
|
|
10992
|
+
if not Topology.IsInstance(graph, "Graph"):
|
|
10993
|
+
return _err(f"Graph.ExportGraphsToCSV - Error: item {idx} is not a valid topologic graph. Returning None.")
|
|
10994
|
+
|
|
10995
|
+
graph_id = start_graph_id + idx
|
|
10996
|
+
|
|
10997
|
+
# ---- Graph label/features from graph dictionary
|
|
10998
|
+
gd = Topology.Dictionary(graph)
|
|
10999
|
+
g_label = _label_from_dict(gd, graphLabelKey, defaultGraphLabel)
|
|
11000
|
+
g_feat = _feat_list_from_keys(gd, graph_feature_keys)
|
|
11001
|
+
|
|
11002
|
+
w_g.writerow([graph_id, g_label] + g_feat)
|
|
11003
|
+
|
|
11004
|
+
# ---- Nodes
|
|
11005
|
+
vertices = Graph.Vertices(graph)
|
|
11006
|
+
if vertices is None or len(vertices) < 3:
|
|
11007
|
+
return _err(f"Graph.ExportGraphsToCSV - Error: graph {graph_id} is too small (<3 vertices). Returning None.")
|
|
11008
|
+
|
|
11009
|
+
vertices = random.sample(vertices, len(vertices))
|
|
11010
|
+
|
|
11011
|
+
n = len(vertices)
|
|
11012
|
+
node_train_max = max(1, math.floor(n * float(nodeTrainRatio)))
|
|
11013
|
+
node_val_max = max(1, math.floor(n * float(nodeValidateRatio)))
|
|
11014
|
+
node_counts = {"train": 0, "val": 0, "test": 0}
|
|
11015
|
+
|
|
11016
|
+
# Vertex->index map for O(1) edge endpoint lookup
|
|
11017
|
+
v_index = { _quant_key_xyz(v): i for i, v in enumerate(vertices) }
|
|
11018
|
+
|
|
11019
|
+
for i, v in enumerate(vertices):
|
|
11020
|
+
nd = Topology.Dictionary(v)
|
|
11021
|
+
|
|
11022
|
+
v_label = _label_from_dict(nd, nodeLabelKey, defaultNodeLabel)
|
|
11023
|
+
tmask, vmask, smask = _mask_from_dict_or_ratio(nd, nodeMaskKey, node_train_max, node_val_max, node_counts)
|
|
11024
|
+
v_feat = _feat_list_from_keys(nd, node_feature_keys)
|
|
11025
|
+
|
|
11026
|
+
x = float(Vertex.X(v, mantissa=mantissa))
|
|
11027
|
+
y = float(Vertex.Y(v, mantissa=mantissa))
|
|
11028
|
+
z = float(Vertex.Z(v, mantissa=mantissa))
|
|
11029
|
+
|
|
11030
|
+
w_n.writerow([graph_id, i, v_label, tmask, vmask, smask] + v_feat + [x, y, z])
|
|
11031
|
+
|
|
11032
|
+
# ---- Edges
|
|
11033
|
+
edges = Graph.Edges(graph) or []
|
|
11034
|
+
m = len(edges)
|
|
11035
|
+
edge_train_max = math.floor(m * float(edgeTrainRatio))
|
|
11036
|
+
edge_val_max = math.floor(m * float(edgeValidateRatio))
|
|
11037
|
+
edge_counts = {"train": 0, "val": 0, "test": 0}
|
|
11038
|
+
|
|
11039
|
+
for e in edges:
|
|
11040
|
+
ed = Topology.Dictionary(e)
|
|
11041
|
+
|
|
11042
|
+
e_label = _label_from_dict(ed, edgeLabelKey, defaultEdgeLabel)
|
|
11043
|
+
tmask, vmask, smask = _mask_from_dict_or_ratio(ed, edgeMaskKey, edge_train_max, edge_val_max, edge_counts)
|
|
11044
|
+
e_feat = _feat_list_from_keys(ed, edge_feature_keys)
|
|
11045
|
+
|
|
11046
|
+
sv = Edge.StartVertex(e)
|
|
11047
|
+
tv = Edge.EndVertex(e)
|
|
11048
|
+
|
|
11049
|
+
src = v_index.get(_quant_key_xyz(sv), None)
|
|
11050
|
+
dst = v_index.get(_quant_key_xyz(tv), None)
|
|
11051
|
+
|
|
11052
|
+
# rare fallback
|
|
11053
|
+
if src is None or dst is None:
|
|
11054
|
+
try:
|
|
11055
|
+
src = Vertex.Index(sv, vertices, tolerance=tolerance)
|
|
11056
|
+
dst = Vertex.Index(tv, vertices, tolerance=tolerance)
|
|
11057
|
+
except Exception:
|
|
11058
|
+
src = None
|
|
11059
|
+
dst = None
|
|
11060
|
+
|
|
11061
|
+
if src is None or dst is None:
|
|
11062
|
+
continue
|
|
11063
|
+
|
|
11064
|
+
# forward
|
|
11065
|
+
w_e.writerow([graph_id, src, dst, e_label, tmask, vmask, smask] + e_feat)
|
|
11066
|
+
|
|
11067
|
+
if bidirectional:
|
|
11068
|
+
# reverse (correct)
|
|
11069
|
+
w_e.writerow([graph_id, dst, src, e_label, tmask, vmask, smask] + e_feat)
|
|
11070
|
+
|
|
11071
|
+
# meta.yaml
|
|
11072
|
+
with open(os.path.join(path, "meta.yaml"), "w", encoding="utf-8") as yaml_file:
|
|
11073
|
+
yaml_file.write(
|
|
11074
|
+
"dataset_name: topologic_dataset\n"
|
|
11075
|
+
"edge_data:\n- file_name: edges.csv\n"
|
|
11076
|
+
"node_data:\n- file_name: nodes.csv\n"
|
|
11077
|
+
"graph_data:\n file_name: graphs.csv\n"
|
|
11078
|
+
)
|
|
11079
|
+
|
|
11080
|
+
return True
|
|
11081
|
+
|
|
11082
|
+
except Exception as ex:
|
|
11083
|
+
return _err(f"Graph.ExportGraphsToCSV - Error: {ex}. Returning None.")
|
|
11084
|
+
|
|
11085
|
+
|
|
10203
11086
|
@staticmethod
|
|
10204
11087
|
def ExportToGEXF(graph, path: str = None, graphWidth: float = 20, graphLength: float = 20, graphHeight: float = 20,
|
|
10205
11088
|
defaultVertexColor: str = "black", defaultVertexSize: float = 3,
|