snowflake-code-unit-registry 0.5.12__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/Cargo.lock +9 -9
  2. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/Cargo.toml +1 -1
  3. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/PKG-INFO +1 -1
  4. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/examples/code-unit.example.json +6 -2
  5. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/schemas/code-unit.schema.json +13 -0
  6. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/generated/query_reference.rs +3 -0
  7. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/tests.rs +349 -0
  8. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry.rs +127 -1
  9. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_crud.py +6 -0
  10. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_serialization.py +4 -0
  11. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/pyproject.toml +1 -1
  12. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/python/snowflake_code_unit_registry/__init__.py +1 -1
  13. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/python/snowflake_code_unit_registry/types.py +17 -7
  14. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/README.md +0 -0
  15. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-error-derive/Cargo.toml +0 -0
  16. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-error-derive/src/lib.rs +0 -0
  17. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-error-derive/tests/derive_integration.rs +0 -0
  18. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/Cargo.toml +0 -0
  19. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/build.rs +0 -0
  20. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/schemas/history/README.md +0 -0
  21. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/schemas/query-reference.rs.tmpl +0 -0
  22. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/schemas/query-reference.tmpl +0 -0
  23. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/checksum.rs +0 -0
  24. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/error.rs +0 -0
  25. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/error_trace.rs +0 -0
  26. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/filter.rs +0 -0
  27. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/generated/file_path_fields.rs +0 -0
  28. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/generated/mod.rs +0 -0
  29. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/generated/updated_at_paths.rs +0 -0
  30. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/identity.rs +0 -0
  31. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/lib.rs +0 -0
  32. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/migration/mod.rs +0 -0
  33. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/migration/versions/mod.rs +0 -0
  34. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/README.md +0 -0
  35. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/graph.rs +0 -0
  36. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/in_memory.rs +0 -0
  37. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/loader.rs +0 -0
  38. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/paths.rs +0 -0
  39. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/query.rs +0 -0
  40. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/registry/test_helpers.rs +0 -0
  41. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-core/src/validation.rs +0 -0
  42. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/Cargo.toml +0 -0
  43. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/README.md +0 -0
  44. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/src/lib.rs +0 -0
  45. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/conftest.py +0 -0
  46. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_batch.py +0 -0
  47. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_checksum.py +0 -0
  48. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_errors.py +0 -0
  49. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_migration.py +0 -0
  50. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_query.py +0 -0
  51. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_refresh.py +0 -0
  52. {snowflake_code_unit_registry-0.5.12 → snowflake_code_unit_registry-0.7.2}/crates/scai-state-python/tests/test_validation.py +0 -0
@@ -499,9 +499,9 @@ dependencies = [
499
499
 
500
500
  [[package]]
501
501
  name = "fastrand"
502
- version = "2.3.0"
502
+ version = "2.4.1"
503
503
  source = "registry+https://github.com/rust-lang/crates.io-index"
504
- checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
504
+ checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
505
505
 
506
506
  [[package]]
507
507
  name = "find-msvc-tools"
@@ -2095,7 +2095,7 @@ dependencies = [
2095
2095
 
2096
2096
  [[package]]
2097
2097
  name = "scai-error-derive"
2098
- version = "0.5.12"
2098
+ version = "0.7.2"
2099
2099
  dependencies = [
2100
2100
  "err_code",
2101
2101
  "heck",
@@ -2109,7 +2109,7 @@ dependencies = [
2109
2109
 
2110
2110
  [[package]]
2111
2111
  name = "scai-state-core"
2112
- version = "0.5.12"
2112
+ version = "0.7.2"
2113
2113
  dependencies = [
2114
2114
  "chrono",
2115
2115
  "err_code",
@@ -2134,7 +2134,7 @@ dependencies = [
2134
2134
 
2135
2135
  [[package]]
2136
2136
  name = "scai-state-csharp"
2137
- version = "0.5.12"
2137
+ version = "0.7.2"
2138
2138
  dependencies = [
2139
2139
  "interoptopus",
2140
2140
  "interoptopus_backend_csharp",
@@ -2145,7 +2145,7 @@ dependencies = [
2145
2145
 
2146
2146
  [[package]]
2147
2147
  name = "scai-state-node"
2148
- version = "0.5.12"
2148
+ version = "0.7.2"
2149
2149
  dependencies = [
2150
2150
  "napi",
2151
2151
  "napi-build",
@@ -2157,7 +2157,7 @@ dependencies = [
2157
2157
 
2158
2158
  [[package]]
2159
2159
  name = "scai-state-python"
2160
- version = "0.5.12"
2160
+ version = "0.7.2"
2161
2161
  dependencies = [
2162
2162
  "pyo3",
2163
2163
  "pythonize",
@@ -2578,9 +2578,9 @@ dependencies = [
2578
2578
 
2579
2579
  [[package]]
2580
2580
  name = "tokio"
2581
- version = "1.51.0"
2581
+ version = "1.51.1"
2582
2582
  source = "registry+https://github.com/rust-lang/crates.io-index"
2583
- checksum = "2bd1c4c0fc4a7ab90fc15ef6daaa3ec3b893f004f915f2392557ed23237820cd"
2583
+ checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c"
2584
2584
  dependencies = [
2585
2585
  "bytes",
2586
2586
  "libc",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/scai-error-derive", "crates/scai-state-core", "crates/scai-state-python"]
4
4
 
5
5
  [workspace.package]
6
- version = "0.5.12"
6
+ version = "0.7.2"
7
7
  edition = "2021"
8
8
  license = "SEE LICENSE IN LICENSE"
9
9
  repository = "https://github.com/snowflake-eng/scai-state"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowflake-code-unit-registry
3
- Version: 0.5.12
3
+ Version: 0.7.2
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: Apache Software License
@@ -2,6 +2,7 @@
2
2
  "schemaVersion": 1,
3
3
  "isMissing": false,
4
4
  "inScope": true,
5
+ "containsCommit": true,
5
6
  "kind": "databaseObject",
6
7
  "id": "a3f1b2c4-5d6e-7f89-0a1b-2c3d4e5f6a7b",
7
8
  "updatedAt": "2025-01-15T10:30:00Z",
@@ -9,13 +10,15 @@
9
10
  "objectType": "procedure",
10
11
  "database": "AdventureWorks",
11
12
  "schema": "Sales",
12
- "name": "usp_GetMonthlyRevenue"
13
+ "name": "usp_GetMonthlyRevenue",
14
+ "canonicalName": "AdventureWorks.Sales.usp_GetMonthlyRevenue(DATETIME,DATETIME,NVARCHAR)"
13
15
  },
14
16
  "target": {
15
17
  "objectType": "procedure",
16
18
  "database": "ADVENTUREWORKS",
17
19
  "schema": "SALES",
18
- "name": "USP_GET_MONTHLY_REVENUE"
20
+ "name": "USP_GET_MONTHLY_REVENUE",
21
+ "canonicalName": "ADVENTUREWORKS.SALES.USP_GET_MONTHLY_REVENUE(TIMESTAMP_NTZ,TIMESTAMP_NTZ,VARCHAR)"
19
22
  },
20
23
  "files": {
21
24
  "source": {
@@ -130,6 +133,7 @@
130
133
  "name": "MonthYear",
131
134
  "type": "VARCHAR(7)",
132
135
  "nullable": false,
136
+ "isPrimaryKey": true,
133
137
  "targetName": "MONTH_YEAR",
134
138
  "targetType": "VARCHAR(7)"
135
139
  }
@@ -58,6 +58,10 @@
58
58
  "description": "true if CodeUnit is in the migration scope",
59
59
  "default": true
60
60
  },
61
+ "containsCommit": {
62
+ "type": "boolean",
63
+ "description": "True if the stored procedure contains a commit transaction"
64
+ },
61
65
  "kind": {
62
66
  "type": "string",
63
67
  "title": "Kind",
@@ -93,6 +97,10 @@
93
97
  "name": {
94
98
  "type": "string",
95
99
  "description": "Original object name preserving casing"
100
+ },
101
+ "canonicalName": {
102
+ "type": "string",
103
+ "description": "Computed canonical identifier: database.schema.name or database.schema.name(paramTypes) for procedures/functions"
96
104
  }
97
105
  }
98
106
  },
@@ -115,6 +123,10 @@
115
123
  "name": {
116
124
  "type": "string",
117
125
  "description": "Object name preserving casing"
126
+ },
127
+ "canonicalName": {
128
+ "type": "string",
129
+ "description": "Computed canonical identifier: database.schema.name or database.schema.name(paramTypes) for procedures/functions"
118
130
  }
119
131
  }
120
132
  },
@@ -366,6 +378,7 @@
366
378
  "name": { "type": "string", "description": "Column name" },
367
379
  "type": { "type": "string", "description": "Data type" },
368
380
  "nullable": { "type": "boolean", "default": true },
381
+ "isPrimaryKey": { "type": "boolean", "description": "True if this column is part of the table's primary key" },
369
382
  "targetName": { "type": "string" },
370
383
  "targetType": { "type": "string" }
371
384
  }
@@ -55,6 +55,7 @@ QUERYABLE FIELDS
55
55
  codeStatus.registration.sourceId string Identifier for the source system
56
56
  codeStatus.registration.status enum pending, inProgress, completed, failed, excluded (Latest status of this operation)
57
57
  codeStatus.registration.updatedAt string Timestamp of last registration status update
58
+ containsCommit boolean True if the stored procedure contains a commit transaction
58
59
  dependencies object Dependency relationships for a code unit
59
60
  dependencies.dependsOn array Code units this unit depends on; items: {id, isMissing, relationTypes}
60
61
  dependencies.hasTransitiveMissingDependencies boolean True when this unit depends (directly or indirectly) on at least one missing dependency
@@ -69,11 +70,13 @@ QUERYABLE FIELDS
69
70
  planning.topologicalRank integer Dependency rank hint used for sorting with dependencies. Valid ranks are >= 0. A value of -1 indicates a cycle or downstream dependency on a cycle.
70
71
  planning.wave integer 1-based wave assignment
71
72
  source object Source database object metadata
73
+ source.canonicalName string Computed canonical identifier: database.schema.name or database.schema.name(paramTypes) for procedures/functions
72
74
  source.database string Source database name
73
75
  source.name string Original object name preserving casing
74
76
  source.objectType enum database, schema, table, view, materializedView, function, procedure, stage, udfHelper, sequence, tableType, trigger, other (Type of database object)
75
77
  source.schema string Source schema name
76
78
  target object Target database object metadata
79
+ target.canonicalName string Computed canonical identifier: database.schema.name or database.schema.name(paramTypes) for procedures/functions
77
80
  target.database string Database name
78
81
  target.name string Object name preserving casing
79
82
  target.objectType enum database, schema, table, view, materializedView, function, procedure, stage, udfHelper, sequence, tableType, trigger, other (Type of database object)
@@ -30,12 +30,14 @@ fn make_code_unit(id: &str, name: &str, object_type: CodeUnitObjectType) -> Code
30
30
  database: Some("DB".to_string()),
31
31
  schema: Some("dbo".to_string()),
32
32
  name: Some(name.to_string()),
33
+ ..Default::default()
33
34
  }),
34
35
  target: Some(TargetMetadata {
35
36
  object_type: Some(object_type),
36
37
  database: Some("DB".to_string()),
37
38
  schema: Some("DBO".to_string()),
38
39
  name: Some(name.to_uppercase()),
40
+ ..Default::default()
39
41
  }),
40
42
  ..Default::default()
41
43
  }
@@ -351,6 +353,7 @@ fn test_create_generates_id_when_missing() {
351
353
  database: None,
352
354
  schema: None,
353
355
  name: None,
356
+ ..Default::default()
354
357
  }),
355
358
  ..Default::default()
356
359
  };
@@ -571,6 +574,7 @@ fn test_find_by_object_match_nested_fields() {
571
574
  database: Some("DB".to_string()),
572
575
  schema: Some("sales".to_string()),
573
576
  name: Some("Table2".to_string()),
577
+ ..Default::default()
574
578
  });
575
579
 
576
580
  registry.create(&mut cu1, None).unwrap();
@@ -604,6 +608,7 @@ fn test_find_by_object_multiple_fields() {
604
608
  database: Some("DB".to_string()),
605
609
  schema: Some("sales".to_string()),
606
610
  name: Some("Table2".to_string()),
611
+ ..Default::default()
607
612
  });
608
613
 
609
614
  registry.create(&mut cu1, None).unwrap();
@@ -743,6 +748,7 @@ fn test_find_by_object_match_by_source_database_and_name() {
743
748
  database: Some("OTHER_DB".to_string()),
744
749
  schema: Some("dbo".to_string()),
745
750
  name: Some("Table2".to_string()),
751
+ ..Default::default()
746
752
  });
747
753
 
748
754
  registry.create(&mut cu1, None).unwrap();
@@ -963,6 +969,7 @@ fn make_code_unit_with_files(
963
969
  database: None,
964
970
  schema: None,
965
971
  name: None,
972
+ ..Default::default()
966
973
  }),
967
974
  target: Some(TargetMetadata {
968
975
  object_type: Some(crate::generated::types::ObjectType::Table),
@@ -3743,3 +3750,345 @@ fn test_empty_vec_fields_not_skipped_during_serialization() {
3743
3750
  "empty requiredBy must be serialized"
3744
3751
  );
3745
3752
  }
3753
+
3754
+ // ── CanonicalNameHook tests ──────────────────────────────────────────
3755
+
3756
+ #[test]
3757
+ fn canonical_name_set_on_create_table() {
3758
+ let dir = temp_dir();
3759
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3760
+
3761
+ let mut cu = make_code_unit("cn1", "MyTable", CodeUnitObjectType::Table);
3762
+ registry.create(&mut cu, None).unwrap();
3763
+
3764
+ let stored = registry.get_by_id("cn1", None).unwrap();
3765
+ assert_eq!(
3766
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
3767
+ Some("DB.dbo.MyTable"),
3768
+ );
3769
+ assert_eq!(
3770
+ stored.target.as_ref().unwrap().canonical_name.as_deref(),
3771
+ Some("DB.DBO.MYTABLE"),
3772
+ );
3773
+ }
3774
+
3775
+ #[test]
3776
+ fn canonical_name_includes_params_for_procedure() {
3777
+ use crate::generated::types::{ParameterDef, Parameters, Signature};
3778
+
3779
+ let dir = temp_dir();
3780
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3781
+
3782
+ let mut cu = make_code_unit("cn2", "MyProc", CodeUnitObjectType::Procedure);
3783
+ cu.signature = Some(Signature {
3784
+ parameters: Some(Parameters {
3785
+ arguments: vec![
3786
+ ParameterDef {
3787
+ name: Some("@StartDate".to_string()),
3788
+ type_: Some("DATETIME".to_string()),
3789
+ target_type: Some("TIMESTAMP_NTZ".to_string()),
3790
+ ..Default::default()
3791
+ },
3792
+ ParameterDef {
3793
+ name: Some("@Region".to_string()),
3794
+ type_: Some("NVARCHAR(50)".to_string()),
3795
+ target_type: Some("VARCHAR(50)".to_string()),
3796
+ ..Default::default()
3797
+ },
3798
+ ],
3799
+ ..Default::default()
3800
+ }),
3801
+ ..Default::default()
3802
+ });
3803
+ registry.create(&mut cu, None).unwrap();
3804
+
3805
+ let stored = registry.get_by_id("cn2", None).unwrap();
3806
+ assert_eq!(
3807
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
3808
+ Some("DB.dbo.MyProc(DATETIME,NVARCHAR)"),
3809
+ );
3810
+ assert_eq!(
3811
+ stored.target.as_ref().unwrap().canonical_name.as_deref(),
3812
+ Some("DB.DBO.MYPROC(TIMESTAMP_NTZ,VARCHAR)"),
3813
+ );
3814
+ }
3815
+
3816
+ #[test]
3817
+ fn canonical_name_includes_params_for_function() {
3818
+ use crate::generated::types::{ParameterDef, Parameters, Signature};
3819
+
3820
+ let dir = temp_dir();
3821
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3822
+
3823
+ let mut cu = make_code_unit("cn3", "MyFunc", CodeUnitObjectType::Function);
3824
+ cu.signature = Some(Signature {
3825
+ parameters: Some(Parameters {
3826
+ arguments: vec![ParameterDef {
3827
+ name: Some("@X".to_string()),
3828
+ type_: Some("INT".to_string()),
3829
+ target_type: Some("NUMBER".to_string()),
3830
+ ..Default::default()
3831
+ }],
3832
+ ..Default::default()
3833
+ }),
3834
+ ..Default::default()
3835
+ });
3836
+ registry.create(&mut cu, None).unwrap();
3837
+
3838
+ let stored = registry.get_by_id("cn3", None).unwrap();
3839
+ assert_eq!(
3840
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
3841
+ Some("DB.dbo.MyFunc(INT)"),
3842
+ );
3843
+ assert_eq!(
3844
+ stored.target.as_ref().unwrap().canonical_name.as_deref(),
3845
+ Some("DB.DBO.MYFUNC(NUMBER)"),
3846
+ );
3847
+ }
3848
+
3849
+ #[test]
3850
+ fn canonical_name_no_params_for_view() {
3851
+ use crate::generated::types::{ParameterDef, Parameters, Signature};
3852
+
3853
+ let dir = temp_dir();
3854
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3855
+
3856
+ let mut cu = make_code_unit("cn4", "MyView", CodeUnitObjectType::View);
3857
+ cu.signature = Some(Signature {
3858
+ parameters: Some(Parameters {
3859
+ arguments: vec![ParameterDef {
3860
+ type_: Some("INT".to_string()),
3861
+ ..Default::default()
3862
+ }],
3863
+ ..Default::default()
3864
+ }),
3865
+ ..Default::default()
3866
+ });
3867
+ registry.create(&mut cu, None).unwrap();
3868
+
3869
+ let stored = registry.get_by_id("cn4", None).unwrap();
3870
+ assert_eq!(
3871
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
3872
+ Some("DB.dbo.MyView"),
3873
+ "views should not include parameter signature"
3874
+ );
3875
+ }
3876
+
3877
+ #[test]
3878
+ fn canonical_name_schema_and_name_only() {
3879
+ let dir = temp_dir();
3880
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3881
+
3882
+ let mut cu = CodeUnit {
3883
+ id: Some("cn5".to_string()),
3884
+ kind: Some(CodeUnitKind::DatabaseObject),
3885
+ source: Some(SourceMetadata {
3886
+ object_type: Some(CodeUnitObjectType::Table),
3887
+ database: None,
3888
+ schema: Some("dbo".to_string()),
3889
+ name: Some("Orphan".to_string()),
3890
+ ..Default::default()
3891
+ }),
3892
+ ..Default::default()
3893
+ };
3894
+ registry.create(&mut cu, None).unwrap();
3895
+
3896
+ let stored = registry.get_by_id("cn5", None).unwrap();
3897
+ assert_eq!(
3898
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
3899
+ Some("dbo.Orphan"),
3900
+ "canonicalName should be schema.name when database is missing"
3901
+ );
3902
+ }
3903
+
3904
+ #[test]
3905
+ fn canonical_name_name_only() {
3906
+ let dir = temp_dir();
3907
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3908
+
3909
+ let mut cu = CodeUnit {
3910
+ id: Some("cn5b".to_string()),
3911
+ kind: Some(CodeUnitKind::DatabaseObject),
3912
+ source: Some(SourceMetadata {
3913
+ object_type: Some(CodeUnitObjectType::Table),
3914
+ database: None,
3915
+ schema: None,
3916
+ name: Some("Lonely".to_string()),
3917
+ ..Default::default()
3918
+ }),
3919
+ ..Default::default()
3920
+ };
3921
+ registry.create(&mut cu, None).unwrap();
3922
+
3923
+ let stored = registry.get_by_id("cn5b", None).unwrap();
3924
+ assert_eq!(
3925
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
3926
+ Some("Lonely"),
3927
+ "canonicalName should be just name when database and schema are missing"
3928
+ );
3929
+ }
3930
+
3931
+ #[test]
3932
+ fn canonical_name_unknown_schema_fallback() {
3933
+ let dir = temp_dir();
3934
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3935
+
3936
+ let mut cu = CodeUnit {
3937
+ id: Some("cn5c".to_string()),
3938
+ kind: Some(CodeUnitKind::DatabaseObject),
3939
+ source: Some(SourceMetadata {
3940
+ object_type: Some(CodeUnitObjectType::Table),
3941
+ database: Some("MyDB".to_string()),
3942
+ schema: None,
3943
+ name: Some("Widget".to_string()),
3944
+ ..Default::default()
3945
+ }),
3946
+ ..Default::default()
3947
+ };
3948
+ registry.create(&mut cu, None).unwrap();
3949
+
3950
+ let stored = registry.get_by_id("cn5c", None).unwrap();
3951
+ assert_eq!(
3952
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
3953
+ Some("MyDB.UNKNOWN_SCHEMA.Widget"),
3954
+ "canonicalName should use UNKNOWN_SCHEMA when database is present but schema is missing"
3955
+ );
3956
+ }
3957
+
3958
+ #[test]
3959
+ fn canonical_name_none_when_name_missing() {
3960
+ let dir = temp_dir();
3961
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3962
+
3963
+ let mut cu = CodeUnit {
3964
+ id: Some("cn5d".to_string()),
3965
+ kind: Some(CodeUnitKind::DatabaseObject),
3966
+ source: Some(SourceMetadata {
3967
+ object_type: Some(CodeUnitObjectType::Table),
3968
+ database: Some("DB".to_string()),
3969
+ schema: Some("dbo".to_string()),
3970
+ name: None,
3971
+ ..Default::default()
3972
+ }),
3973
+ ..Default::default()
3974
+ };
3975
+ registry.create(&mut cu, None).unwrap();
3976
+
3977
+ let stored = registry.get_by_id("cn5d", None).unwrap();
3978
+ assert_eq!(
3979
+ stored.source.as_ref().unwrap().canonical_name,
3980
+ None,
3981
+ "canonicalName should be None when name is missing for non-container types"
3982
+ );
3983
+ }
3984
+
3985
+ #[test]
3986
+ fn canonical_name_database_object_type() {
3987
+ let dir = temp_dir();
3988
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
3989
+
3990
+ let mut cu = CodeUnit {
3991
+ id: Some("cn5e".to_string()),
3992
+ kind: Some(CodeUnitKind::DatabaseObject),
3993
+ source: Some(SourceMetadata {
3994
+ object_type: Some(CodeUnitObjectType::Database),
3995
+ database: Some("MyDB".to_string()),
3996
+ schema: None,
3997
+ name: None,
3998
+ ..Default::default()
3999
+ }),
4000
+ ..Default::default()
4001
+ };
4002
+ registry.create(&mut cu, None).unwrap();
4003
+
4004
+ let stored = registry.get_by_id("cn5e", None).unwrap();
4005
+ assert_eq!(
4006
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
4007
+ Some("MyDB"),
4008
+ "database objectType should use just the database name, no schema"
4009
+ );
4010
+ }
4011
+
4012
+ #[test]
4013
+ fn canonical_name_schema_object_type_with_database() {
4014
+ let dir = temp_dir();
4015
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
4016
+
4017
+ let mut cu = CodeUnit {
4018
+ id: Some("cn5f".to_string()),
4019
+ kind: Some(CodeUnitKind::DatabaseObject),
4020
+ source: Some(SourceMetadata {
4021
+ object_type: Some(CodeUnitObjectType::Schema),
4022
+ database: Some("MyDB".to_string()),
4023
+ schema: Some("Sales".to_string()),
4024
+ name: None,
4025
+ ..Default::default()
4026
+ }),
4027
+ ..Default::default()
4028
+ };
4029
+ registry.create(&mut cu, None).unwrap();
4030
+
4031
+ let stored = registry.get_by_id("cn5f", None).unwrap();
4032
+ assert_eq!(
4033
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
4034
+ Some("MyDB.Sales"),
4035
+ "schema objectType with database should be database.schema"
4036
+ );
4037
+ }
4038
+
4039
+ #[test]
4040
+ fn canonical_name_schema_object_type_without_database() {
4041
+ let dir = temp_dir();
4042
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
4043
+
4044
+ let mut cu = CodeUnit {
4045
+ id: Some("cn5g".to_string()),
4046
+ kind: Some(CodeUnitKind::DatabaseObject),
4047
+ source: Some(SourceMetadata {
4048
+ object_type: Some(CodeUnitObjectType::Schema),
4049
+ database: None,
4050
+ schema: Some("Sales".to_string()),
4051
+ name: None,
4052
+ ..Default::default()
4053
+ }),
4054
+ ..Default::default()
4055
+ };
4056
+ registry.create(&mut cu, None).unwrap();
4057
+
4058
+ let stored = registry.get_by_id("cn5g", None).unwrap();
4059
+ assert_eq!(
4060
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
4061
+ Some("Sales"),
4062
+ "schema objectType without database should be just the schema name"
4063
+ );
4064
+ }
4065
+
4066
+ #[test]
4067
+ fn canonical_name_updated_on_update() {
4068
+ let dir = temp_dir();
4069
+ let registry = CodeUnitRegistry::init(dir.path()).unwrap();
4070
+
4071
+ let mut cu = make_code_unit("cn6", "OldName", CodeUnitObjectType::Table);
4072
+ registry.create(&mut cu, None).unwrap();
4073
+
4074
+ let stored = registry.get_by_id("cn6", None).unwrap();
4075
+ assert_eq!(
4076
+ stored.source.as_ref().unwrap().canonical_name.as_deref(),
4077
+ Some("DB.dbo.OldName"),
4078
+ );
4079
+
4080
+ registry
4081
+ .update(
4082
+ "cn6",
4083
+ &[("source.name", serde_json::json!("NewName"))],
4084
+ None,
4085
+ )
4086
+ .unwrap();
4087
+
4088
+ let updated = registry.get_by_id("cn6", None).unwrap();
4089
+ assert_eq!(
4090
+ updated.source.as_ref().unwrap().canonical_name.as_deref(),
4091
+ Some("DB.dbo.NewName"),
4092
+ "canonicalName should reflect the updated name"
4093
+ );
4094
+ }
@@ -504,16 +504,142 @@ impl Drop for HookGuard<'_> {
504
504
  }
505
505
  }
506
506
 
507
+ /// Built-in hook that computes `canonicalName` on both `source` and `target`
508
+ /// metadata before persisting. Format:
509
+ /// - `database.schema.name` for most object types
510
+ /// - `database.schema.name(TYPE,TYPE,...)` for procedures and functions,
511
+ /// using base parameter types with length/precision stripped
512
+ pub(crate) struct CanonicalNameHook;
513
+
514
+ impl CanonicalNameHook {
515
+ /// Strip length/precision from a SQL type: `NVARCHAR(50)` -> `NVARCHAR`.
516
+ fn base_type(ty: &str) -> &str {
517
+ match ty.find('(') {
518
+ Some(pos) => &ty[..pos],
519
+ None => ty,
520
+ }
521
+ }
522
+
523
+ /// Build the canonical name from metadata fields and optional signature.
524
+ ///
525
+ /// Rules:
526
+ /// - `name` is always required unless objectType is `database` or `schema`
527
+ /// - `database` and `schema` are optional: result can be `name`, `schema.name`,
528
+ /// or `database.schema.name`
529
+ /// - If `database` is present but `schema` is missing, substitute `UNKNOWN_SCHEMA`
530
+ /// (except for objectType `database`, which has no schema)
531
+ fn compute(metadata: &Value, signature: Option<&Value>, type_field: &str) -> Option<String> {
532
+ let obj = metadata.as_object()?;
533
+ let database = obj.get("database").and_then(|v| v.as_str());
534
+ let schema = obj.get("schema").and_then(|v| v.as_str());
535
+ let name = obj.get("name").and_then(|v| v.as_str());
536
+ let object_type = obj.get("objectType").and_then(|v| v.as_str());
537
+
538
+ let is_container = matches!(object_type, Some("database" | "schema"));
539
+
540
+ if name.is_none() && !is_container {
541
+ return None;
542
+ }
543
+
544
+ let mut parts: Vec<&str> = Vec::new();
545
+
546
+ if let Some(db) = database {
547
+ parts.push(db);
548
+ if !matches!(object_type, Some("database")) {
549
+ parts.push(schema.unwrap_or("UNKNOWN_SCHEMA"));
550
+ }
551
+ } else if let Some(sch) = schema {
552
+ parts.push(sch);
553
+ }
554
+
555
+ if let Some(n) = name {
556
+ parts.push(n);
557
+ }
558
+
559
+ let mut canonical = parts.join(".");
560
+
561
+ let needs_signature = matches!(object_type, Some("procedure" | "function"));
562
+ if needs_signature {
563
+ if let Some(args) = signature
564
+ .and_then(|s| s.get("parameters"))
565
+ .and_then(|p| p.get("arguments"))
566
+ .and_then(|a| a.as_array())
567
+ {
568
+ let types: Vec<&str> = args
569
+ .iter()
570
+ .filter_map(|arg| arg.get(type_field).and_then(|v| v.as_str()))
571
+ .map(Self::base_type)
572
+ .collect();
573
+ if !types.is_empty() {
574
+ canonical.push('(');
575
+ canonical.push_str(&types.join(","));
576
+ canonical.push(')');
577
+ }
578
+ }
579
+ }
580
+
581
+ Some(canonical)
582
+ }
583
+ }
584
+
585
+ impl RegistryHook for CanonicalNameHook {
586
+ fn before_persist(
587
+ &self,
588
+ _kind: ChangeType,
589
+ changes: Vec<CodeUnitChange>,
590
+ _registry: &CodeUnitRegistry,
591
+ _options: &WriteOptions,
592
+ ) -> Result<Vec<CodeUnitChange>> {
593
+ let mut result = changes;
594
+ for change in &mut result {
595
+ let Some(unit) = &mut change.after else {
596
+ continue;
597
+ };
598
+ let mut json = serde_json::to_value(&*unit)?;
599
+ let signature = json.get("signature").cloned();
600
+
601
+ if let Some(source) = json.get("source") {
602
+ if let Some(cn) = Self::compute(source, signature.as_ref(), "type") {
603
+ json.as_object_mut()
604
+ .unwrap()
605
+ .get_mut("source")
606
+ .unwrap()
607
+ .as_object_mut()
608
+ .unwrap()
609
+ .insert("canonicalName".to_string(), Value::String(cn));
610
+ }
611
+ }
612
+
613
+ if let Some(target) = json.get("target") {
614
+ if let Some(cn) = Self::compute(target, signature.as_ref(), "targetType") {
615
+ json.as_object_mut()
616
+ .unwrap()
617
+ .get_mut("target")
618
+ .unwrap()
619
+ .as_object_mut()
620
+ .unwrap()
621
+ .insert("canonicalName".to_string(), Value::String(cn));
622
+ }
623
+ }
624
+
625
+ *unit = serde_json::from_value(json)?;
626
+ }
627
+ Ok(result)
628
+ }
629
+ }
630
+
507
631
  /// The ordered set of built-in hooks that run on every write operation.
508
632
  ///
509
633
  /// Order matters: PathNormalizationHook first (so downstream hooks see
510
634
  /// canonical paths), ChecksumHook second (so checksums are based on
511
- /// resolved paths), then DependencyRefreshHook, then UpdatedAtHook last.
635
+ /// resolved paths), then DependencyRefreshHook, CanonicalNameHook,
636
+ /// then UpdatedAtHook last.
512
637
  fn builtin_hooks() -> Vec<Box<dyn RegistryHook + Send + Sync>> {
513
638
  vec![
514
639
  Box::new(PathNormalizationHook),
515
640
  Box::new(ChecksumHook),
516
641
  Box::new(DependencyRefreshHook),
642
+ Box::new(CanonicalNameHook),
517
643
  Box::new(UpdatedAtHook),
518
644
  ]
519
645
  }
@@ -58,6 +58,11 @@ def test_crud_lifecycle(registry_dir: str):
58
58
  created_id = registry.create(cu)
59
59
  assert created_id == "lifecycle-001"
60
60
 
61
+ # Verify canonicalName computed by hook
62
+ created = registry.get_by_id("lifecycle-001")
63
+ assert created.source.canonicalName == "DB.dbo.Orders"
64
+ assert created.target.canonicalName == "DB.DBO.ORDERS"
65
+
61
66
  # Find with filter
62
67
  results = registry.find_all(
63
68
  FindOptions(filter="source.objectType = 'table'")
@@ -71,6 +76,7 @@ def test_crud_lifecycle(registry_dir: str):
71
76
  # Verify update
72
77
  loaded = registry.get_by_id("lifecycle-001")
73
78
  assert loaded.source.name == "UpdatedOrders"
79
+ assert loaded.source.canonicalName == "DB.dbo.UpdatedOrders"
74
80
 
75
81
  # Delete
76
82
  registry.delete("lifecycle-001")
@@ -50,6 +50,7 @@ def test_roundtrip_full_document(registry_dir: str):
50
50
  original = CodeUnit(
51
51
  id="roundtrip-001",
52
52
  kind=Kind.databaseObject,
53
+ containsCommit=True,
53
54
  source=SourceMetadata.model_validate(
54
55
  {
55
56
  "objectType": ObjectType.procedure,
@@ -111,6 +112,7 @@ def test_roundtrip_full_document(registry_dir: str):
111
112
  name="Revenue",
112
113
  type="DECIMAL(18,2)",
113
114
  nullable=False,
115
+ isPrimaryKey=True,
114
116
  targetName="REVENUE",
115
117
  targetType="NUMBER(18,2)",
116
118
  )
@@ -150,6 +152,7 @@ def test_roundtrip_full_document(registry_dir: str):
150
152
  assert loaded.schemaVersion == 1
151
153
  assert loaded.isMissing is False
152
154
  assert loaded.inScope is True
155
+ assert loaded.containsCommit is True
153
156
 
154
157
  # Source / Target
155
158
  assert loaded.source.database == "AdventureWorks"
@@ -201,6 +204,7 @@ def test_roundtrip_full_document(registry_dir: str):
201
204
  # Signature
202
205
  assert len(loaded.signature.columns) == 1
203
206
  assert loaded.signature.columns[0].name == "Revenue"
207
+ assert loaded.signature.columns[0].isPrimaryKey is True
204
208
  assert len(loaded.signature.parameters.arguments) == 1
205
209
  assert loaded.signature.parameters.arguments[0].name == "@StartDate"
206
210
  assert len(loaded.signature.parameters.returns) == 1
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "snowflake-code-unit-registry"
7
- version = "0.5.12"
7
+ version = "0.7.2"
8
8
  description = "SCAI state management library for database migrations"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -41,7 +41,7 @@ from snowflake_code_unit_registry.types import CodeUnit
41
41
  if TYPE_CHECKING:
42
42
  from collections.abc import Sequence
43
43
 
44
- __version__ = "0.5.12"
44
+ __version__ = "0.7.2"
45
45
  __all__ = [
46
46
  "CodeUnitRegistry",
47
47
  "generate_id",
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: code-unit.schema.json
3
- # timestamp: 2026-03-27T15:48:24+00:00
3
+ # timestamp: 2026-04-08T04:47:25+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
 
@@ -58,9 +58,10 @@ class Issue(BaseModel):
58
58
 
59
59
  class Issues(RootModel[list[Issue]]):
60
60
  root: list[Issue] = Field(
61
- default_factory=list,
61
+ [],
62
62
  description='Derived issues from converted code scanning',
63
63
  title='Issues',
64
+ validate_default=True,
64
65
  )
65
66
 
66
67
 
@@ -109,8 +110,8 @@ class Parameters(BaseModel):
109
110
  model_config = ConfigDict(
110
111
  extra='allow',
111
112
  )
112
- arguments: list[ParameterDef] | None = Field(default_factory=list)
113
- returns: list[ReturnDef] | None = Field(default_factory=list)
113
+ arguments: list[ParameterDef] | None = Field([], validate_default=True)
114
+ returns: list[ReturnDef] | None = Field([], validate_default=True)
114
115
 
115
116
 
116
117
  class Signature(BaseModel):
@@ -122,7 +123,7 @@ class Signature(BaseModel):
122
123
  extra='allow',
123
124
  )
124
125
  columns: list[ColumnDef] | None = Field(
125
- default_factory=list, description='Column definitions'
126
+ [], description='Column definitions', validate_default=True
126
127
  )
127
128
  parameters: Parameters | None = Field(None, title='Parameters')
128
129
 
@@ -194,6 +195,10 @@ class SourceMetadata(BaseModel):
194
195
  database: str | None = Field(None, description='Source database name')
195
196
  schema_: str | None = Field(None, alias='schema', description='Source schema name')
196
197
  name: str | None = Field(None, description='Original object name preserving casing')
198
+ canonicalName: str | None = Field(
199
+ None,
200
+ description='Computed canonical identifier: database.schema.name or database.schema.name(paramTypes) for procedures/functions',
201
+ )
197
202
 
198
203
 
199
204
  class TargetMetadata(BaseModel):
@@ -208,6 +213,10 @@ class TargetMetadata(BaseModel):
208
213
  database: str | None = Field(None, description='Database name')
209
214
  schema_: str | None = Field(None, alias='schema', description='Schema name')
210
215
  name: str | None = Field(None, description='Object name preserving casing')
216
+ canonicalName: str | None = Field(
217
+ None,
218
+ description='Computed canonical identifier: database.schema.name or database.schema.name(paramTypes) for procedures/functions',
219
+ )
211
220
 
212
221
 
213
222
  class Files(BaseModel):
@@ -363,7 +372,7 @@ class Dependencies(BaseModel):
363
372
  extra='allow',
364
373
  )
365
374
  dependsOn: list[Dependency] | None = Field(
366
- default_factory=list, description='Code units this unit depends on'
375
+ [], description='Code units this unit depends on', validate_default=True
367
376
  )
368
377
  requiredBy: list[str] | None = Field(
369
378
  [], description='Code units that depend on this unit'
@@ -424,9 +433,10 @@ class CodeUnit(BaseModel):
424
433
  None, description='Wave assignment and dependency rank hints', title='Planning'
425
434
  )
426
435
  issues: Issues | None = Field(
427
- default_factory=lambda: Issues([]),
436
+ [],
428
437
  description='Derived issues from converted code scanning',
429
438
  title='Issues',
439
+ validate_default=True,
430
440
  )
431
441
  signature: Signature | None = Field(
432
442
  None,