weave-python 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weave/datamanagement/db/models.py +131 -0
- weave/datamanagement/db/queries.py +140 -0
- weave/datamanagement/generate/v1/configuration_pb2.py +51 -0
- weave/datamanagement/generate/v1/configuration_pb2.pyi +283 -0
- weave/datamanagement/generate/v1/configuration_pb2_grpc.py +4 -0
- weave/datamanagement/generate/v1/configuration_pb2_grpc.pyi +17 -0
- weave/datamanagement/generate/v1/service_pb2.py +63 -0
- weave/datamanagement/generate/v1/service_pb2.pyi +159 -0
- weave/datamanagement/generate/v1/service_pb2_grpc.py +206 -0
- weave/datamanagement/generate/v1/service_pb2_grpc.pyi +92 -0
- weave/datamanagement/storage/v1/auth_pb2.py +37 -0
- weave/datamanagement/storage/v1/auth_pb2.pyi +32 -0
- weave/datamanagement/storage/v1/auth_pb2_grpc.py +4 -0
- weave/datamanagement/storage/v1/auth_pb2_grpc.pyi +17 -0
- weave/datamanagement/storage/v1/nosql_database_pb2.py +54 -0
- weave/datamanagement/storage/v1/nosql_database_pb2.pyi +263 -0
- weave/datamanagement/storage/v1/nosql_database_pb2_grpc.py +4 -0
- weave/datamanagement/storage/v1/nosql_database_pb2_grpc.pyi +17 -0
- weave/datamanagement/storage/v1/object_store_pb2.py +44 -0
- weave/datamanagement/storage/v1/object_store_pb2.pyi +117 -0
- weave/datamanagement/storage/v1/object_store_pb2_grpc.py +4 -0
- weave/datamanagement/storage/v1/object_store_pb2_grpc.pyi +17 -0
- weave/datamanagement/storage/v1/service_pb2.py +68 -0
- weave/datamanagement/storage/v1/service_pb2.pyi +386 -0
- weave/datamanagement/storage/v1/service_pb2_grpc.py +301 -0
- weave/datamanagement/storage/v1/service_pb2_grpc.pyi +150 -0
- weave/datamanagement/storage/v1/sql_database_pb2.py +53 -0
- weave/datamanagement/storage/v1/sql_database_pb2.pyi +284 -0
- weave/datamanagement/storage/v1/sql_database_pb2_grpc.py +4 -0
- weave/datamanagement/storage/v1/sql_database_pb2_grpc.pyi +17 -0
- weave/datamanagement/storage/v1/storage_pb2.py +40 -0
- weave/datamanagement/storage/v1/storage_pb2.pyi +49 -0
- weave/datamanagement/storage/v1/storage_pb2_grpc.py +4 -0
- weave/datamanagement/storage/v1/storage_pb2_grpc.pyi +17 -0
- weave/datamanagement/synthesize/v1/dataset_pb2.py +43 -0
- weave/datamanagement/synthesize/v1/dataset_pb2.pyi +143 -0
- weave/datamanagement/synthesize/v1/dataset_pb2_grpc.py +4 -0
- weave/datamanagement/synthesize/v1/dataset_pb2_grpc.pyi +17 -0
- weave/datamanagement/synthesize/v1/inline_data_pb2.py +39 -0
- weave/datamanagement/synthesize/v1/inline_data_pb2.pyi +67 -0
- weave/datamanagement/synthesize/v1/inline_data_pb2_grpc.py +4 -0
- weave/datamanagement/synthesize/v1/inline_data_pb2_grpc.pyi +17 -0
- weave/datamanagement/synthesize/v1/relationship_pb2.py +41 -0
- weave/datamanagement/synthesize/v1/relationship_pb2.pyi +109 -0
- weave/datamanagement/synthesize/v1/relationship_pb2_grpc.py +4 -0
- weave/datamanagement/synthesize/v1/relationship_pb2_grpc.pyi +17 -0
- weave/datamanagement/synthesize/v1/service_pb2.py +45 -0
- weave/datamanagement/synthesize/v1/service_pb2.pyi +52 -0
- weave/datamanagement/synthesize/v1/service_pb2_grpc.py +77 -0
- weave/datamanagement/synthesize/v1/service_pb2_grpc.pyi +41 -0
- weave/datamanagement/synthesize/v1/training_pb2.py +44 -0
- weave/datamanagement/synthesize/v1/training_pb2.pyi +120 -0
- weave/datamanagement/synthesize/v1/training_pb2_grpc.py +4 -0
- weave/datamanagement/synthesize/v1/training_pb2_grpc.pyi +17 -0
- weave_python-0.10.0.dist-info/METADATA +6 -0
- weave_python-0.10.0.dist-info/RECORD +57 -0
- weave_python-0.10.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Code generated by sqlc. DO NOT EDIT.
|
|
2
|
+
# versions:
|
|
3
|
+
# sqlc v1.28.0
|
|
4
|
+
import dataclasses
|
|
5
|
+
import datetime
|
|
6
|
+
import enum
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
import uuid
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StorageCategory(enum.StrEnum):
|
|
12
|
+
OBJECT_STORE = "object_store"
|
|
13
|
+
SQL_DATABASE = "sql_database"
|
|
14
|
+
NOSQL_DATABASE = "nosql_database"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StorageVariant(enum.StrEnum):
|
|
18
|
+
AWS_S3 = "aws_s3"
|
|
19
|
+
AZURE_BLOB_STORAGE = "azure_blob_storage"
|
|
20
|
+
GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
|
|
21
|
+
MYSQL = "mysql"
|
|
22
|
+
POSTGRES = "postgres"
|
|
23
|
+
SQL_SERVER = "sql_server"
|
|
24
|
+
ORACLE = "oracle"
|
|
25
|
+
MARIADB = "mariadb"
|
|
26
|
+
SNOWFLAKE = "snowflake"
|
|
27
|
+
BIGQUERY = "bigquery"
|
|
28
|
+
DATABRICKS = "databricks"
|
|
29
|
+
CASSANDRA = "cassandra"
|
|
30
|
+
MONGODB = "mongodb"
|
|
31
|
+
DYNAMODB = "dynamodb"
|
|
32
|
+
COSMOSDB = "cosmosdb"
|
|
33
|
+
REDIS = "redis"
|
|
34
|
+
VALKEY = "valkey"
|
|
35
|
+
ELASTICSEARCH = "elasticsearch"
|
|
36
|
+
OPENSEARCH = "opensearch"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SynthesizeV1ColumnDataType(enum.StrEnum):
|
|
40
|
+
STRING = "string"
|
|
41
|
+
INTEGER = "integer"
|
|
42
|
+
FLOAT = "float"
|
|
43
|
+
BOOLEAN = "boolean"
|
|
44
|
+
TIMESTAMP = "timestamp"
|
|
45
|
+
DATE = "date"
|
|
46
|
+
BINARY = "binary"
|
|
47
|
+
CATEGORICAL = "categorical"
|
|
48
|
+
ORDINAL = "ordinal"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SynthesizeV1ColumnRelationshipType(enum.StrEnum):
|
|
52
|
+
CONTEXT_FOREIGN_KEY = "context_foreign_key"
|
|
53
|
+
NON_CONTEXT_FOREIGN_KEY = "non_context_foreign_key"
|
|
54
|
+
ONE_TO_MANY = "one_to_many"
|
|
55
|
+
MANY_TO_ONE = "many_to_one"
|
|
56
|
+
MANY_TO_MANY = "many_to_many"
|
|
57
|
+
SELF_REFERENCE = "self_reference"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclasses.dataclass()
|
|
61
|
+
class Organization:
|
|
62
|
+
id: uuid.UUID
|
|
63
|
+
name: str
|
|
64
|
+
description: Optional[str]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclasses.dataclass()
|
|
68
|
+
class StorageConnection:
|
|
69
|
+
id: uuid.UUID
|
|
70
|
+
organization_id: uuid.UUID
|
|
71
|
+
name: str
|
|
72
|
+
description: Optional[str]
|
|
73
|
+
category: StorageCategory
|
|
74
|
+
variant: StorageVariant
|
|
75
|
+
details: Any
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclasses.dataclass()
|
|
79
|
+
class SynthesizeV1ColumnRelationship:
|
|
80
|
+
id: uuid.UUID
|
|
81
|
+
source_column_id: uuid.UUID
|
|
82
|
+
source_dataset_id: uuid.UUID
|
|
83
|
+
target_column_id: uuid.UUID
|
|
84
|
+
target_dataset_id: uuid.UUID
|
|
85
|
+
relationship_type: Any
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclasses.dataclass()
|
|
89
|
+
class SynthesizeV1DataColumn:
|
|
90
|
+
id: uuid.UUID
|
|
91
|
+
schema_id: uuid.UUID
|
|
92
|
+
name: str
|
|
93
|
+
data_type: Any
|
|
94
|
+
is_primary_key: Optional[bool]
|
|
95
|
+
description: Optional[str]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclasses.dataclass()
|
|
99
|
+
class SynthesizeV1DataSchema:
|
|
100
|
+
id: uuid.UUID
|
|
101
|
+
organization_id: uuid.UUID
|
|
102
|
+
name: str
|
|
103
|
+
description: Optional[str]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclasses.dataclass()
|
|
107
|
+
class SynthesizeV1Dataset:
|
|
108
|
+
id: uuid.UUID
|
|
109
|
+
organization_id: uuid.UUID
|
|
110
|
+
name: str
|
|
111
|
+
description: Optional[str]
|
|
112
|
+
storage_connection_id: uuid.UUID
|
|
113
|
+
schema_id: uuid.UUID
|
|
114
|
+
model_id: Optional[uuid.UUID]
|
|
115
|
+
is_synthetic: Optional[bool]
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclasses.dataclass()
|
|
119
|
+
class SynthesizeV1Model:
|
|
120
|
+
id: uuid.UUID
|
|
121
|
+
organization_id: uuid.UUID
|
|
122
|
+
name: str
|
|
123
|
+
description: Optional[str]
|
|
124
|
+
created_at: Optional[datetime.datetime]
|
|
125
|
+
configuration: Optional[Any]
|
|
126
|
+
storage_connection_id: uuid.UUID
|
|
127
|
+
storage_bucket: str
|
|
128
|
+
storage_key: str
|
|
129
|
+
model_type: str
|
|
130
|
+
model_version: Optional[str]
|
|
131
|
+
metadata: Optional[Any]
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Code generated by sqlc. DO NOT EDIT.
|
|
2
|
+
# versions:
|
|
3
|
+
# sqlc v1.28.0
|
|
4
|
+
# source: queries.sql
|
|
5
|
+
import dataclasses
|
|
6
|
+
from typing import AsyncIterator, Iterator, List, Optional
|
|
7
|
+
import uuid
|
|
8
|
+
|
|
9
|
+
import sqlalchemy
|
|
10
|
+
import sqlalchemy.ext.asyncio
|
|
11
|
+
|
|
12
|
+
from weave.datamanagement.db import models
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
GET_FOREIGN_KEY_RELATIONSHIPS = """-- name: get_foreign_key_relationships \\:many
|
|
16
|
+
SELECT
|
|
17
|
+
cr.source_dataset_id,
|
|
18
|
+
cr.target_dataset_id,
|
|
19
|
+
sc.name AS source_column_name,
|
|
20
|
+
tc.name AS target_column_name
|
|
21
|
+
FROM
|
|
22
|
+
synthesize_v1.column_relationship cr
|
|
23
|
+
JOIN
|
|
24
|
+
synthesize_v1.data_column sc ON cr.source_column_id = sc.id
|
|
25
|
+
JOIN
|
|
26
|
+
synthesize_v1.data_column tc ON cr.target_column_id = tc.id
|
|
27
|
+
WHERE
|
|
28
|
+
cr.relationship_type IN ('context_foreign_key', 'non_context_foreign_key')
|
|
29
|
+
AND cr.source_dataset_id = ANY(:p1\\:\\:UUID[])
|
|
30
|
+
AND cr.target_dataset_id = ANY(:p1\\:\\:UUID[])
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclasses.dataclass()
|
|
35
|
+
class GetForeignKeyRelationshipsRow:
|
|
36
|
+
source_dataset_id: uuid.UUID
|
|
37
|
+
target_dataset_id: uuid.UUID
|
|
38
|
+
source_column_name: str
|
|
39
|
+
target_column_name: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
GET_STORAGE_CONNECTION = """-- name: get_storage_connection \\:one
|
|
43
|
+
SELECT id, organization_id, name, description, category, variant, details
|
|
44
|
+
FROM storage_connection
|
|
45
|
+
WHERE id = :p1
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
GET_STORAGE_CONNECTION_BY_DATASET_ID = """-- name: get_storage_connection_by_dataset_id \\:one
|
|
50
|
+
SELECT sc.id, sc.organization_id, sc.name, sc.description, sc.category, sc.variant, sc.details
|
|
51
|
+
FROM storage_connection sc
|
|
52
|
+
JOIN synthesize_v1.dataset d ON d.storage_connection_id = sc.id
|
|
53
|
+
WHERE d.id = :p1
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Querier:
|
|
58
|
+
def __init__(self, conn: sqlalchemy.engine.Connection):
|
|
59
|
+
self._conn = conn
|
|
60
|
+
|
|
61
|
+
def get_foreign_key_relationships(self, *, dataset_ids: List[uuid.UUID]) -> Iterator[GetForeignKeyRelationshipsRow]:
|
|
62
|
+
result = self._conn.execute(sqlalchemy.text(GET_FOREIGN_KEY_RELATIONSHIPS), {"p1": dataset_ids})
|
|
63
|
+
for row in result:
|
|
64
|
+
yield GetForeignKeyRelationshipsRow(
|
|
65
|
+
source_dataset_id=row[0],
|
|
66
|
+
target_dataset_id=row[1],
|
|
67
|
+
source_column_name=row[2],
|
|
68
|
+
target_column_name=row[3],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def get_storage_connection(self, *, id: uuid.UUID) -> Optional[models.StorageConnection]:
|
|
72
|
+
row = self._conn.execute(sqlalchemy.text(GET_STORAGE_CONNECTION), {"p1": id}).first()
|
|
73
|
+
if row is None:
|
|
74
|
+
return None
|
|
75
|
+
return models.StorageConnection(
|
|
76
|
+
id=row[0],
|
|
77
|
+
organization_id=row[1],
|
|
78
|
+
name=row[2],
|
|
79
|
+
description=row[3],
|
|
80
|
+
category=row[4],
|
|
81
|
+
variant=row[5],
|
|
82
|
+
details=row[6],
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def get_storage_connection_by_dataset_id(self, *, dataset_id: uuid.UUID) -> Optional[models.StorageConnection]:
|
|
86
|
+
row = self._conn.execute(sqlalchemy.text(GET_STORAGE_CONNECTION_BY_DATASET_ID), {"p1": dataset_id}).first()
|
|
87
|
+
if row is None:
|
|
88
|
+
return None
|
|
89
|
+
return models.StorageConnection(
|
|
90
|
+
id=row[0],
|
|
91
|
+
organization_id=row[1],
|
|
92
|
+
name=row[2],
|
|
93
|
+
description=row[3],
|
|
94
|
+
category=row[4],
|
|
95
|
+
variant=row[5],
|
|
96
|
+
details=row[6],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class AsyncQuerier:
|
|
101
|
+
def __init__(self, conn: sqlalchemy.ext.asyncio.AsyncConnection):
|
|
102
|
+
self._conn = conn
|
|
103
|
+
|
|
104
|
+
async def get_foreign_key_relationships(self, *, dataset_ids: List[uuid.UUID]) -> AsyncIterator[GetForeignKeyRelationshipsRow]:
|
|
105
|
+
result = await self._conn.stream(sqlalchemy.text(GET_FOREIGN_KEY_RELATIONSHIPS), {"p1": dataset_ids})
|
|
106
|
+
async for row in result:
|
|
107
|
+
yield GetForeignKeyRelationshipsRow(
|
|
108
|
+
source_dataset_id=row[0],
|
|
109
|
+
target_dataset_id=row[1],
|
|
110
|
+
source_column_name=row[2],
|
|
111
|
+
target_column_name=row[3],
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
async def get_storage_connection(self, *, id: uuid.UUID) -> Optional[models.StorageConnection]:
|
|
115
|
+
row = (await self._conn.execute(sqlalchemy.text(GET_STORAGE_CONNECTION), {"p1": id})).first()
|
|
116
|
+
if row is None:
|
|
117
|
+
return None
|
|
118
|
+
return models.StorageConnection(
|
|
119
|
+
id=row[0],
|
|
120
|
+
organization_id=row[1],
|
|
121
|
+
name=row[2],
|
|
122
|
+
description=row[3],
|
|
123
|
+
category=row[4],
|
|
124
|
+
variant=row[5],
|
|
125
|
+
details=row[6],
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
async def get_storage_connection_by_dataset_id(self, *, dataset_id: uuid.UUID) -> Optional[models.StorageConnection]:
|
|
129
|
+
row = (await self._conn.execute(sqlalchemy.text(GET_STORAGE_CONNECTION_BY_DATASET_ID), {"p1": dataset_id})).first()
|
|
130
|
+
if row is None:
|
|
131
|
+
return None
|
|
132
|
+
return models.StorageConnection(
|
|
133
|
+
id=row[0],
|
|
134
|
+
organization_id=row[1],
|
|
135
|
+
name=row[2],
|
|
136
|
+
description=row[3],
|
|
137
|
+
category=row[4],
|
|
138
|
+
variant=row[5],
|
|
139
|
+
details=row[6],
|
|
140
|
+
)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
|
+
# source: weave/datamanagement/generate/v1/configuration.proto
|
|
5
|
+
# Protobuf Python Version: 6.30.2
|
|
6
|
+
"""Generated protocol buffer code."""
|
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
11
|
+
from google.protobuf.internal import builder as _builder
|
|
12
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
|
13
|
+
_runtime_version.Domain.PUBLIC,
|
|
14
|
+
6,
|
|
15
|
+
30,
|
|
16
|
+
2,
|
|
17
|
+
'',
|
|
18
|
+
'weave/datamanagement/generate/v1/configuration.proto'
|
|
19
|
+
)
|
|
20
|
+
# @@protoc_insertion_point(imports)
|
|
21
|
+
|
|
22
|
+
_sym_db = _symbol_database.Default()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n4weave/datamanagement/generate/v1/configuration.proto\x12 weave.datamanagement.generate.v1\"\xbb\x02\n\rConfiguration\x12\x1f\n\x0bnum_records\x18\x01 \x01(\x03R\nnumRecords\x12!\n\x0c\x64\x61taset_name\x18\x02 \x01(\tR\x0b\x64\x61tasetName\x12V\n\x0c\x64\x61ta_quality\x18\x03 \x01(\x0b\x32\x33.weave.datamanagement.generate.v1.DataQualityConfigR\x0b\x64\x61taQuality\x12I\n\x07privacy\x18\x04 \x01(\x0b\x32/.weave.datamanagement.generate.v1.PrivacyConfigR\x07privacy\x12\x43\n\x05model\x18\x05 \x01(\x0b\x32-.weave.datamanagement.generate.v1.ModelConfigR\x05model\"\xb1\x02\n\x11\x44\x61taQualityConfig\x12\x31\n\x14\x63orrelation_accuracy\x18\x01 \x01(\x02R\x13\x63orrelationAccuracy\x12\x35\n\x16statistical_similarity\x18\x02 \x01(\x02R\x15statisticalSimilarity\x12+\n\x11preserve_outliers\x18\x03 \x01(\x08R\x10preserveOutliers\x12/\n\x13\x65nforce_constraints\x18\x04 \x01(\x08R\x12\x65nforceConstraints\x12T\n\x0b\x63onstraints\x18\x05 \x03(\x0b\x32\x32.weave.datamanagement.generate.v1.ColumnConstraintR\x0b\x63onstraints\"\xe2\x01\n\x10\x43olumnConstraint\x12\x1f\n\x0b\x63olumn_name\x18\x01 \x01(\tR\ncolumnName\x12\'\n\x0f\x63onstraint_type\x18\x02 \x01(\tR\x0e\x63onstraintType\x12\x1b\n\tmin_value\x18\x03 \x01(\tR\x08minValue\x12\x1b\n\tmax_value\x18\x04 \x01(\tR\x08maxValue\x12#\n\rregex_pattern\x18\x05 \x01(\tR\x0cregexPattern\x12%\n\x0e\x61llowed_values\x18\x06 \x03(\tR\rallowedValues\"\xd1\x01\n\rPrivacyConfig\x12\x18\n\x07\x65psilon\x18\x01 \x01(\x02R\x07\x65psilon\x12*\n\x11k_anonymity_check\x18\x02 \x01(\x08R\x0fkAnonymityCheck\x12\x17\n\x07k_value\x18\x03 \x01(\x05R\x06kValue\x12+\n\x11sensitive_columns\x18\x04 \x03(\tR\x10sensitiveColumns\x12\x34\n\x16\x65nable_privacy_metrics\x18\x05 \x01(\x08R\x14\x65nablePrivacyMetrics\"\xec\x02\n\x0bModelConfig\x12J\n\nmodel_type\x18\x01 \x01(\x0e\x32+.weave.datamanagement.generate.v1.ModelTypeR\tmodelType\x12\x16\n\x06\x65pochs\x18\x02 \x01(\x05R\x06\x65pochs\x12#\n\rlearning_rate\x18\x03 \x01(\x02R\x0clearningRate\x12\x1d\n\nbatch_size\x18\x04 \x01(\x05R\tbatchSize\x12p\n\x11\x63ustom_parameters\x18\x05 \x03(\x0b\x32\x43.weave.datamanagement.generate.v1.ModelConfig.CustomParametersEntryR\x10\x63ustomParameters\x1a\x43\n\x15\x43ustomParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01*\xea\x01\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x14\n\x10MODEL_TYPE_CTGAN\x10\x01\x12\x13\n\x0fMODEL_TYPE_TVAE\x10\x02\x12\x15\n\x11MODEL_TYPE_COPULA\x10\x03\x12\x13\n\x0fMODEL_TYPE_FAST\x10\x04\x12\x12\n\x0eMODEL_TYPE_GAN\x10\x05\x12\x13\n\x0fMODEL_TYPE_WGAN\x10\x06\x12\x14\n\x10MODEL_TYPE_SMOTE\x10\x07\x12\x17\n\x13MODEL_TYPE_BAYESIAN\x10\x08\x12\x12\n\x0eMODEL_TYPE_HMA\x10\tB\xa7\x02\n$com.weave.datamanagement.generate.v1B\x12\x43onfigurationProtoP\x01ZHgithub.com/weave-labs/weave-go/weave/datamanagement/generate/v1;generate\xa2\x02\x03WDG\xaa\x02 Weave.Datamanagement.Generate.V1\xca\x02 Weave\\Datamanagement\\Generate\\V1\xe2\x02,Weave\\Datamanagement\\Generate\\V1\\GPBMetadata\xea\x02#Weave::Datamanagement::Generate::V1b\x06proto3')
|
|
28
|
+
|
|
29
|
+
_globals = globals()
|
|
30
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
31
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'weave.datamanagement.generate.v1.configuration_pb2', _globals)
|
|
32
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
33
|
+
_globals['DESCRIPTOR']._loaded_options = None
|
|
34
|
+
_globals['DESCRIPTOR']._serialized_options = b'\n$com.weave.datamanagement.generate.v1B\022ConfigurationProtoP\001ZHgithub.com/weave-labs/weave-go/weave/datamanagement/generate/v1;generate\242\002\003WDG\252\002 Weave.Datamanagement.Generate.V1\312\002 Weave\\Datamanagement\\Generate\\V1\342\002,Weave\\Datamanagement\\Generate\\V1\\GPBMetadata\352\002#Weave::Datamanagement::Generate::V1'
|
|
35
|
+
_globals['_MODELCONFIG_CUSTOMPARAMETERSENTRY']._loaded_options = None
|
|
36
|
+
_globals['_MODELCONFIG_CUSTOMPARAMETERSENTRY']._serialized_options = b'8\001'
|
|
37
|
+
_globals['_MODELTYPE']._serialized_start=1525
|
|
38
|
+
_globals['_MODELTYPE']._serialized_end=1759
|
|
39
|
+
_globals['_CONFIGURATION']._serialized_start=91
|
|
40
|
+
_globals['_CONFIGURATION']._serialized_end=406
|
|
41
|
+
_globals['_DATAQUALITYCONFIG']._serialized_start=409
|
|
42
|
+
_globals['_DATAQUALITYCONFIG']._serialized_end=714
|
|
43
|
+
_globals['_COLUMNCONSTRAINT']._serialized_start=717
|
|
44
|
+
_globals['_COLUMNCONSTRAINT']._serialized_end=943
|
|
45
|
+
_globals['_PRIVACYCONFIG']._serialized_start=946
|
|
46
|
+
_globals['_PRIVACYCONFIG']._serialized_end=1155
|
|
47
|
+
_globals['_MODELCONFIG']._serialized_start=1158
|
|
48
|
+
_globals['_MODELCONFIG']._serialized_end=1522
|
|
49
|
+
_globals['_MODELCONFIG_CUSTOMPARAMETERSENTRY']._serialized_start=1455
|
|
50
|
+
_globals['_MODELCONFIG_CUSTOMPARAMETERSENTRY']._serialized_end=1522
|
|
51
|
+
# @@protoc_insertion_point(module_scope)
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@generated by mypy-protobuf. Do not edit manually!
|
|
3
|
+
isort:skip_file
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import builtins
|
|
7
|
+
import collections.abc
|
|
8
|
+
import google.protobuf.descriptor
|
|
9
|
+
import google.protobuf.internal.containers
|
|
10
|
+
import google.protobuf.internal.enum_type_wrapper
|
|
11
|
+
import google.protobuf.message
|
|
12
|
+
import sys
|
|
13
|
+
import typing
|
|
14
|
+
|
|
15
|
+
if sys.version_info >= (3, 10):
|
|
16
|
+
import typing as typing_extensions
|
|
17
|
+
else:
|
|
18
|
+
import typing_extensions
|
|
19
|
+
|
|
20
|
+
DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
|
|
21
|
+
|
|
22
|
+
class _ModelType:
|
|
23
|
+
ValueType = typing.NewType("ValueType", builtins.int)
|
|
24
|
+
V: typing_extensions.TypeAlias = ValueType
|
|
25
|
+
|
|
26
|
+
class _ModelTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_ModelType.ValueType], builtins.type):
|
|
27
|
+
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
|
|
28
|
+
MODEL_TYPE_UNSPECIFIED: _ModelType.ValueType # 0
|
|
29
|
+
MODEL_TYPE_CTGAN: _ModelType.ValueType # 1
|
|
30
|
+
"""Conditional Tabular GAN"""
|
|
31
|
+
MODEL_TYPE_TVAE: _ModelType.ValueType # 2
|
|
32
|
+
"""Tabular Variational Autoencoder"""
|
|
33
|
+
MODEL_TYPE_COPULA: _ModelType.ValueType # 3
|
|
34
|
+
"""Gaussian Copula"""
|
|
35
|
+
MODEL_TYPE_FAST: _ModelType.ValueType # 4
|
|
36
|
+
"""Fast mode (less accurate but quicker)"""
|
|
37
|
+
MODEL_TYPE_GAN: _ModelType.ValueType # 5
|
|
38
|
+
"""Generic GAN implementation"""
|
|
39
|
+
MODEL_TYPE_WGAN: _ModelType.ValueType # 6
|
|
40
|
+
"""Wasserstein GAN"""
|
|
41
|
+
MODEL_TYPE_SMOTE: _ModelType.ValueType # 7
|
|
42
|
+
"""Synthetic Minority Over-sampling Technique"""
|
|
43
|
+
MODEL_TYPE_BAYESIAN: _ModelType.ValueType # 8
|
|
44
|
+
"""Bayesian Network"""
|
|
45
|
+
MODEL_TYPE_HMA: _ModelType.ValueType # 9
|
|
46
|
+
"""Hierarchical Modeling Algorithm"""
|
|
47
|
+
|
|
48
|
+
class ModelType(_ModelType, metaclass=_ModelTypeEnumTypeWrapper):
|
|
49
|
+
"""ModelType defines the available synthetic data generation models"""
|
|
50
|
+
|
|
51
|
+
MODEL_TYPE_UNSPECIFIED: ModelType.ValueType # 0
|
|
52
|
+
MODEL_TYPE_CTGAN: ModelType.ValueType # 1
|
|
53
|
+
"""Conditional Tabular GAN"""
|
|
54
|
+
MODEL_TYPE_TVAE: ModelType.ValueType # 2
|
|
55
|
+
"""Tabular Variational Autoencoder"""
|
|
56
|
+
MODEL_TYPE_COPULA: ModelType.ValueType # 3
|
|
57
|
+
"""Gaussian Copula"""
|
|
58
|
+
MODEL_TYPE_FAST: ModelType.ValueType # 4
|
|
59
|
+
"""Fast mode (less accurate but quicker)"""
|
|
60
|
+
MODEL_TYPE_GAN: ModelType.ValueType # 5
|
|
61
|
+
"""Generic GAN implementation"""
|
|
62
|
+
MODEL_TYPE_WGAN: ModelType.ValueType # 6
|
|
63
|
+
"""Wasserstein GAN"""
|
|
64
|
+
MODEL_TYPE_SMOTE: ModelType.ValueType # 7
|
|
65
|
+
"""Synthetic Minority Over-sampling Technique"""
|
|
66
|
+
MODEL_TYPE_BAYESIAN: ModelType.ValueType # 8
|
|
67
|
+
"""Bayesian Network"""
|
|
68
|
+
MODEL_TYPE_HMA: ModelType.ValueType # 9
|
|
69
|
+
"""Hierarchical Modeling Algorithm"""
|
|
70
|
+
global___ModelType = ModelType
|
|
71
|
+
|
|
72
|
+
@typing.final
|
|
73
|
+
class Configuration(google.protobuf.message.Message):
|
|
74
|
+
"""SynthesizerConfig defines customization options for synthetic data generation"""
|
|
75
|
+
|
|
76
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
77
|
+
|
|
78
|
+
NUM_RECORDS_FIELD_NUMBER: builtins.int
|
|
79
|
+
DATASET_NAME_FIELD_NUMBER: builtins.int
|
|
80
|
+
DATA_QUALITY_FIELD_NUMBER: builtins.int
|
|
81
|
+
PRIVACY_FIELD_NUMBER: builtins.int
|
|
82
|
+
MODEL_FIELD_NUMBER: builtins.int
|
|
83
|
+
num_records: builtins.int
|
|
84
|
+
"""Basic configuration
|
|
85
|
+
Number of synthetic records to generate
|
|
86
|
+
"""
|
|
87
|
+
dataset_name: builtins.str
|
|
88
|
+
"""Name for the generated dataset"""
|
|
89
|
+
@property
|
|
90
|
+
def data_quality(self) -> global___DataQualityConfig:
|
|
91
|
+
"""Data quality settings
|
|
92
|
+
Controls data quality aspects
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def privacy(self) -> global___PrivacyConfig:
|
|
97
|
+
"""Privacy settings
|
|
98
|
+
Privacy and anonymization settings
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def model(self) -> global___ModelConfig:
|
|
103
|
+
"""Advanced model configuration
|
|
104
|
+
ML model configuration options
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
*,
|
|
110
|
+
num_records: builtins.int = ...,
|
|
111
|
+
dataset_name: builtins.str = ...,
|
|
112
|
+
data_quality: global___DataQualityConfig | None = ...,
|
|
113
|
+
privacy: global___PrivacyConfig | None = ...,
|
|
114
|
+
model: global___ModelConfig | None = ...,
|
|
115
|
+
) -> None: ...
|
|
116
|
+
def HasField(self, field_name: typing.Literal["data_quality", b"data_quality", "model", b"model", "privacy", b"privacy"]) -> builtins.bool: ...
|
|
117
|
+
def ClearField(self, field_name: typing.Literal["data_quality", b"data_quality", "dataset_name", b"dataset_name", "model", b"model", "num_records", b"num_records", "privacy", b"privacy"]) -> None: ...
|
|
118
|
+
|
|
119
|
+
global___Configuration = Configuration
|
|
120
|
+
|
|
121
|
+
@typing.final
|
|
122
|
+
class DataQualityConfig(google.protobuf.message.Message):
|
|
123
|
+
"""DataQualityConfig controls the quality of generated data"""
|
|
124
|
+
|
|
125
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
126
|
+
|
|
127
|
+
CORRELATION_ACCURACY_FIELD_NUMBER: builtins.int
|
|
128
|
+
STATISTICAL_SIMILARITY_FIELD_NUMBER: builtins.int
|
|
129
|
+
PRESERVE_OUTLIERS_FIELD_NUMBER: builtins.int
|
|
130
|
+
ENFORCE_CONSTRAINTS_FIELD_NUMBER: builtins.int
|
|
131
|
+
CONSTRAINTS_FIELD_NUMBER: builtins.int
|
|
132
|
+
correlation_accuracy: builtins.float
|
|
133
|
+
"""How closely to maintain correlations (0.0-1.0)"""
|
|
134
|
+
statistical_similarity: builtins.float
|
|
135
|
+
"""Statistical similarity to original data (0.0-1.0)"""
|
|
136
|
+
preserve_outliers: builtins.bool
|
|
137
|
+
"""Whether to preserve outlier patterns"""
|
|
138
|
+
enforce_constraints: builtins.bool
|
|
139
|
+
"""Whether to enforce data constraints"""
|
|
140
|
+
@property
|
|
141
|
+
def constraints(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___ColumnConstraint]:
|
|
142
|
+
"""Column-specific constraints"""
|
|
143
|
+
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
*,
|
|
147
|
+
correlation_accuracy: builtins.float = ...,
|
|
148
|
+
statistical_similarity: builtins.float = ...,
|
|
149
|
+
preserve_outliers: builtins.bool = ...,
|
|
150
|
+
enforce_constraints: builtins.bool = ...,
|
|
151
|
+
constraints: collections.abc.Iterable[global___ColumnConstraint] | None = ...,
|
|
152
|
+
) -> None: ...
|
|
153
|
+
def ClearField(self, field_name: typing.Literal["constraints", b"constraints", "correlation_accuracy", b"correlation_accuracy", "enforce_constraints", b"enforce_constraints", "preserve_outliers", b"preserve_outliers", "statistical_similarity", b"statistical_similarity"]) -> None: ...
|
|
154
|
+
|
|
155
|
+
global___DataQualityConfig = DataQualityConfig
|
|
156
|
+
|
|
157
|
+
@typing.final
|
|
158
|
+
class ColumnConstraint(google.protobuf.message.Message):
|
|
159
|
+
"""ColumnConstraint defines constraints for specific columns"""
|
|
160
|
+
|
|
161
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
162
|
+
|
|
163
|
+
COLUMN_NAME_FIELD_NUMBER: builtins.int
|
|
164
|
+
CONSTRAINT_TYPE_FIELD_NUMBER: builtins.int
|
|
165
|
+
MIN_VALUE_FIELD_NUMBER: builtins.int
|
|
166
|
+
MAX_VALUE_FIELD_NUMBER: builtins.int
|
|
167
|
+
REGEX_PATTERN_FIELD_NUMBER: builtins.int
|
|
168
|
+
ALLOWED_VALUES_FIELD_NUMBER: builtins.int
|
|
169
|
+
column_name: builtins.str
|
|
170
|
+
"""Name of the column"""
|
|
171
|
+
constraint_type: builtins.str
|
|
172
|
+
"""Type of constraint (e.g., "range", "regex", "categorical")"""
|
|
173
|
+
min_value: builtins.str
|
|
174
|
+
"""Minimum value (for numeric constraints)"""
|
|
175
|
+
max_value: builtins.str
|
|
176
|
+
"""Maximum value (for numeric constraints)"""
|
|
177
|
+
regex_pattern: builtins.str
|
|
178
|
+
"""Regex pattern (for string constraints)"""
|
|
179
|
+
@property
|
|
180
|
+
def allowed_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
|
|
181
|
+
"""List of allowed values (for categorical constraints)"""
|
|
182
|
+
|
|
183
|
+
def __init__(
|
|
184
|
+
self,
|
|
185
|
+
*,
|
|
186
|
+
column_name: builtins.str = ...,
|
|
187
|
+
constraint_type: builtins.str = ...,
|
|
188
|
+
min_value: builtins.str = ...,
|
|
189
|
+
max_value: builtins.str = ...,
|
|
190
|
+
regex_pattern: builtins.str = ...,
|
|
191
|
+
allowed_values: collections.abc.Iterable[builtins.str] | None = ...,
|
|
192
|
+
) -> None: ...
|
|
193
|
+
def ClearField(self, field_name: typing.Literal["allowed_values", b"allowed_values", "column_name", b"column_name", "constraint_type", b"constraint_type", "max_value", b"max_value", "min_value", b"min_value", "regex_pattern", b"regex_pattern"]) -> None: ...
|
|
194
|
+
|
|
195
|
+
global___ColumnConstraint = ColumnConstraint
|
|
196
|
+
|
|
197
|
+
@typing.final
|
|
198
|
+
class PrivacyConfig(google.protobuf.message.Message):
|
|
199
|
+
"""PrivacyConfig controls privacy aspects of synthetic data"""
|
|
200
|
+
|
|
201
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
202
|
+
|
|
203
|
+
EPSILON_FIELD_NUMBER: builtins.int
|
|
204
|
+
K_ANONYMITY_CHECK_FIELD_NUMBER: builtins.int
|
|
205
|
+
K_VALUE_FIELD_NUMBER: builtins.int
|
|
206
|
+
SENSITIVE_COLUMNS_FIELD_NUMBER: builtins.int
|
|
207
|
+
ENABLE_PRIVACY_METRICS_FIELD_NUMBER: builtins.int
|
|
208
|
+
epsilon: builtins.float
|
|
209
|
+
"""Differential privacy epsilon parameter"""
|
|
210
|
+
k_anonymity_check: builtins.bool
|
|
211
|
+
"""Whether to enforce k-anonymity"""
|
|
212
|
+
k_value: builtins.int
|
|
213
|
+
"""k value for k-anonymity (if enabled)"""
|
|
214
|
+
enable_privacy_metrics: builtins.bool
|
|
215
|
+
"""Whether to compute privacy metrics"""
|
|
216
|
+
@property
|
|
217
|
+
def sensitive_columns(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
|
|
218
|
+
"""Columns that contain sensitive information"""
|
|
219
|
+
|
|
220
|
+
def __init__(
|
|
221
|
+
self,
|
|
222
|
+
*,
|
|
223
|
+
epsilon: builtins.float = ...,
|
|
224
|
+
k_anonymity_check: builtins.bool = ...,
|
|
225
|
+
k_value: builtins.int = ...,
|
|
226
|
+
sensitive_columns: collections.abc.Iterable[builtins.str] | None = ...,
|
|
227
|
+
enable_privacy_metrics: builtins.bool = ...,
|
|
228
|
+
) -> None: ...
|
|
229
|
+
def ClearField(self, field_name: typing.Literal["enable_privacy_metrics", b"enable_privacy_metrics", "epsilon", b"epsilon", "k_anonymity_check", b"k_anonymity_check", "k_value", b"k_value", "sensitive_columns", b"sensitive_columns"]) -> None: ...
|
|
230
|
+
|
|
231
|
+
global___PrivacyConfig = PrivacyConfig
|
|
232
|
+
|
|
233
|
+
@typing.final
|
|
234
|
+
class ModelConfig(google.protobuf.message.Message):
|
|
235
|
+
"""ModelConfig controls the ML model used for synthesis"""
|
|
236
|
+
|
|
237
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
238
|
+
|
|
239
|
+
@typing.final
|
|
240
|
+
class CustomParametersEntry(google.protobuf.message.Message):
|
|
241
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
242
|
+
|
|
243
|
+
KEY_FIELD_NUMBER: builtins.int
|
|
244
|
+
VALUE_FIELD_NUMBER: builtins.int
|
|
245
|
+
key: builtins.str
|
|
246
|
+
value: builtins.str
|
|
247
|
+
def __init__(
|
|
248
|
+
self,
|
|
249
|
+
*,
|
|
250
|
+
key: builtins.str = ...,
|
|
251
|
+
value: builtins.str = ...,
|
|
252
|
+
) -> None: ...
|
|
253
|
+
def ClearField(self, field_name: typing.Literal["key", b"key", "value", b"value"]) -> None: ...
|
|
254
|
+
|
|
255
|
+
MODEL_TYPE_FIELD_NUMBER: builtins.int
|
|
256
|
+
EPOCHS_FIELD_NUMBER: builtins.int
|
|
257
|
+
LEARNING_RATE_FIELD_NUMBER: builtins.int
|
|
258
|
+
BATCH_SIZE_FIELD_NUMBER: builtins.int
|
|
259
|
+
CUSTOM_PARAMETERS_FIELD_NUMBER: builtins.int
|
|
260
|
+
model_type: global___ModelType.ValueType
|
|
261
|
+
"""Type of model to use for synthesis"""
|
|
262
|
+
epochs: builtins.int
|
|
263
|
+
"""Training epochs"""
|
|
264
|
+
learning_rate: builtins.float
|
|
265
|
+
"""Learning rate for training"""
|
|
266
|
+
batch_size: builtins.int
|
|
267
|
+
"""Batch size for training"""
|
|
268
|
+
@property
|
|
269
|
+
def custom_parameters(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
|
|
270
|
+
"""Additional model-specific parameters"""
|
|
271
|
+
|
|
272
|
+
def __init__(
|
|
273
|
+
self,
|
|
274
|
+
*,
|
|
275
|
+
model_type: global___ModelType.ValueType = ...,
|
|
276
|
+
epochs: builtins.int = ...,
|
|
277
|
+
learning_rate: builtins.float = ...,
|
|
278
|
+
batch_size: builtins.int = ...,
|
|
279
|
+
custom_parameters: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
|
|
280
|
+
) -> None: ...
|
|
281
|
+
def ClearField(self, field_name: typing.Literal["batch_size", b"batch_size", "custom_parameters", b"custom_parameters", "epochs", b"epochs", "learning_rate", b"learning_rate", "model_type", b"model_type"]) -> None: ...
|
|
282
|
+
|
|
283
|
+
global___ModelConfig = ModelConfig
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@generated by mypy-protobuf. Do not edit manually!
|
|
3
|
+
isort:skip_file
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import abc
|
|
7
|
+
import collections.abc
|
|
8
|
+
import grpc
|
|
9
|
+
import grpc.aio
|
|
10
|
+
import typing
|
|
11
|
+
|
|
12
|
+
_T = typing.TypeVar("_T")
|
|
13
|
+
|
|
14
|
+
class _MaybeAsyncIterator(collections.abc.AsyncIterator[_T], collections.abc.Iterator[_T], metaclass=abc.ABCMeta): ...
|
|
15
|
+
|
|
16
|
+
class _ServicerContext(grpc.ServicerContext, grpc.aio.ServicerContext): # type: ignore[misc, type-arg]
|
|
17
|
+
...
|