snowpark-connect 0.25.0__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. snowflake/snowpark_connect/config.py +10 -3
  2. snowflake/snowpark_connect/dataframe_container.py +16 -0
  3. snowflake/snowpark_connect/expression/map_expression.py +15 -0
  4. snowflake/snowpark_connect/expression/map_udf.py +68 -27
  5. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +18 -0
  6. snowflake/snowpark_connect/expression/map_unresolved_function.py +38 -28
  7. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  8. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  9. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  10. snowflake/snowpark_connect/relation/map_extension.py +9 -7
  11. snowflake/snowpark_connect/relation/map_map_partitions.py +36 -72
  12. snowflake/snowpark_connect/relation/map_relation.py +15 -2
  13. snowflake/snowpark_connect/relation/map_row_ops.py +8 -1
  14. snowflake/snowpark_connect/relation/map_show_string.py +2 -0
  15. snowflake/snowpark_connect/relation/map_sql.py +63 -2
  16. snowflake/snowpark_connect/relation/map_udtf.py +96 -44
  17. snowflake/snowpark_connect/relation/utils.py +44 -0
  18. snowflake/snowpark_connect/relation/write/map_write.py +135 -24
  19. snowflake/snowpark_connect/resources_initializer.py +18 -5
  20. snowflake/snowpark_connect/server.py +12 -2
  21. snowflake/snowpark_connect/utils/artifacts.py +4 -5
  22. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  23. snowflake/snowpark_connect/utils/context.py +41 -1
  24. snowflake/snowpark_connect/utils/external_udxf_cache.py +36 -0
  25. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +86 -2
  26. snowflake/snowpark_connect/utils/scala_udf_utils.py +250 -242
  27. snowflake/snowpark_connect/utils/session.py +4 -0
  28. snowflake/snowpark_connect/utils/udf_utils.py +71 -118
  29. snowflake/snowpark_connect/utils/udtf_helper.py +17 -7
  30. snowflake/snowpark_connect/utils/udtf_utils.py +3 -16
  31. snowflake/snowpark_connect/version.py +2 -3
  32. {snowpark_connect-0.25.0.dist-info → snowpark_connect-0.27.0.dist-info}/METADATA +2 -2
  33. {snowpark_connect-0.25.0.dist-info → snowpark_connect-0.27.0.dist-info}/RECORD +41 -37
  34. {snowpark_connect-0.25.0.data → snowpark_connect-0.27.0.data}/scripts/snowpark-connect +0 -0
  35. {snowpark_connect-0.25.0.data → snowpark_connect-0.27.0.data}/scripts/snowpark-session +0 -0
  36. {snowpark_connect-0.25.0.data → snowpark_connect-0.27.0.data}/scripts/snowpark-submit +0 -0
  37. {snowpark_connect-0.25.0.dist-info → snowpark_connect-0.27.0.dist-info}/WHEEL +0 -0
  38. {snowpark_connect-0.25.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/LICENSE-binary +0 -0
  39. {snowpark_connect-0.25.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/LICENSE.txt +0 -0
  40. {snowpark_connect-0.25.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/NOTICE-binary +0 -0
  41. {snowpark_connect-0.25.0.dist-info → snowpark_connect-0.27.0.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@ from snowflake.snowpark_connect.utils.cache import (
15
15
  from snowflake.snowpark_connect.utils.context import (
16
16
  get_plan_id_map,
17
17
  get_session_id,
18
+ push_map_partitions,
18
19
  push_operation_scope,
19
20
  set_is_aggregate_function,
20
21
  set_plan_id_map,
@@ -90,6 +91,7 @@ def map_relation(
90
91
  table_name=copy.deepcopy(cached_container.table_name),
91
92
  alias=cached_container.alias,
92
93
  cached_schema_getter=lambda: cached_df.schema,
94
+ partition_hint=cached_container.partition_hint,
93
95
  )
94
96
  # If we don't make a copy of the df._output, the expression IDs for attributes in Snowpark DataFrames will differ from those stored in the cache,
95
97
  # leading to errors during query execution.
@@ -179,7 +181,8 @@ def map_relation(
179
181
  )
180
182
  return cached_df
181
183
  case "map_partitions":
182
- result = map_map_partitions.map_map_partitions(rel)
184
+ with push_map_partitions():
185
+ result = map_map_partitions.map_map_partitions(rel)
183
186
  case "offset":
184
187
  result = map_row_ops.map_offset(rel)
185
188
  case "project":
@@ -189,13 +192,23 @@ def map_relation(
189
192
  case "read":
190
193
  result = read.map_read(rel)
191
194
  case "repartition":
192
- # TODO: Snowpark df identity transform with annotation
195
+ # Preserve partition hint for file output control
196
+ # This handles both repartition(n) with shuffle=True and coalesce(n) with shuffle=False
193
197
  result = map_relation(rel.repartition.input)
198
+ if rel.repartition.num_partitions > 0:
199
+ result.partition_hint = rel.repartition.num_partitions
194
200
  case "repartition_by_expression":
195
201
  # This is a no-op operation in SAS as Snowpark doesn't have the concept of partitions.
196
202
  # All the data in the dataframe will be treated as a single partition, and this will not
197
203
  # have any side effects.
198
204
  result = map_relation(rel.repartition_by_expression.input)
205
+ # Only preserve partition hint if num_partitions is explicitly specified and > 0
206
+ # Column-based repartitioning without count should clear any existing partition hints
207
+ if rel.repartition_by_expression.num_partitions > 0:
208
+ result.partition_hint = rel.repartition_by_expression.num_partitions
209
+ else:
210
+ # Column-based repartitioning clears partition hint (resets to default behavior)
211
+ result.partition_hint = None
199
212
  case "replace":
200
213
  result = map_row_ops.map_replace(rel)
201
214
  case "sample":
@@ -553,7 +553,14 @@ def map_filter(
553
553
  rel.filter.condition, input_container.column_map, typer
554
554
  )
555
555
 
556
- result = input_df.filter(condition.col)
556
+ if rel.filter.input.WhichOneof("rel_type") == "subquery_alias":
557
+ # map_subquery_alias does not actually wrap the DataFrame in an alias or subquery.
558
+ # Apparently, there are cases (e.g., TpcdsQ53) where this is required, without it, we get
559
+ # SQL compilation error.
560
+ # To mitigate it, we are doing .select("*"), .alias() introduces additional describe queries
561
+ result = input_df.select("*").filter(condition.col)
562
+ else:
563
+ result = input_df.filter(condition.col)
557
564
 
558
565
  return DataFrameContainer(
559
566
  result,
@@ -12,6 +12,7 @@ from snowflake.snowpark._internal.analyzer import analyzer_utils
12
12
  from snowflake.snowpark.functions import col
13
13
  from snowflake.snowpark.types import DateType, StringType, StructField, StructType
14
14
  from snowflake.snowpark_connect.column_name_handler import set_schema_getter
15
+ from snowflake.snowpark_connect.config import global_config
15
16
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
16
17
  from snowflake.snowpark_connect.relation.map_relation import map_relation
17
18
 
@@ -33,6 +34,7 @@ def map_show_string(rel: relation_proto.Relation) -> pandas.DataFrame:
33
34
  truncate=rel.show_string.truncate,
34
35
  vertical=rel.show_string.vertical,
35
36
  _spark_column_names=input_df_container.column_map.get_spark_columns(),
37
+ _spark_session_tz=global_config.spark_sql_session_timeZone,
36
38
  )
37
39
  return pandas.DataFrame({"show_string": [show_string]})
38
40
 
@@ -56,6 +56,7 @@ from snowflake.snowpark_connect.utils.context import (
56
56
  _accessing_temp_object,
57
57
  gen_sql_plan_id,
58
58
  get_session_id,
59
+ get_sql_plan,
59
60
  push_evaluating_sql_scope,
60
61
  push_sql_scope,
61
62
  set_sql_args,
@@ -542,6 +543,7 @@ def map_sql_to_pandas_df(
542
543
  rows = session.sql(f"DESCRIBE TABLE {name}").collect()
543
544
  case "DescribeNamespace":
544
545
  name = get_relation_identifier_name(logical_plan.namespace(), True)
546
+ name = change_default_to_public(name)
545
547
  rows = session.sql(f"DESCRIBE SCHEMA {name}").collect()
546
548
  if not rows:
547
549
  rows = None
@@ -793,6 +795,7 @@ def map_sql_to_pandas_df(
793
795
  case "SetCatalogAndNamespace":
794
796
  # TODO: add catalog setting here
795
797
  name = get_relation_identifier_name(logical_plan.child(), True)
798
+ name = change_default_to_public(name)
796
799
  session.sql(f"USE SCHEMA {name}").collect()
797
800
  case "SetCommand":
798
801
  kv_result_tuple = logical_plan.kv().get()
@@ -801,6 +804,7 @@ def map_sql_to_pandas_df(
801
804
  set_config_param(get_session_id(), key, val, session)
802
805
  case "SetNamespaceCommand":
803
806
  name = _spark_to_snowflake(logical_plan.namespace())
807
+ name = change_default_to_public(name)
804
808
  session.sql(f"USE SCHEMA {name}").collect()
805
809
  case "SetNamespaceLocation" | "SetNamespaceProperties":
806
810
  raise SnowparkConnectNotImplementedError(
@@ -997,6 +1001,20 @@ def get_sql_passthrough() -> bool:
997
1001
  return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
998
1002
 
999
1003
 
1004
+ def change_default_to_public(name: str) -> str:
1005
+ """
1006
+ Change the namespace to PUBLIC when given name is DEFAULT
1007
+ :param name: Given namespace
1008
+ :return: if name is DEFAULT return PUBLIC otherwise name
1009
+ """
1010
+ if name.startswith('"'):
1011
+ if name.upper() == '"DEFAULT"':
1012
+ return name.replace("DEFAULT", "PUBLIC")
1013
+ elif name.upper() == "DEFAULT":
1014
+ return "PUBLIC"
1015
+ return name
1016
+
1017
+
1000
1018
  def map_sql(
1001
1019
  rel: relation_proto.Relation,
1002
1020
  ) -> DataFrameContainer:
@@ -1008,7 +1026,6 @@ def map_sql(
1008
1026
  In passthough mode as True, SAS calls session.sql() and not calling Spark Parser.
1009
1027
  This is to mitigate any issue not covered by spark logical plan to protobuf conversion.
1010
1028
  """
1011
-
1012
1029
  snowpark_connect_sql_passthrough = get_sql_passthrough()
1013
1030
 
1014
1031
  if not snowpark_connect_sql_passthrough:
@@ -1353,6 +1370,7 @@ def map_logical_plan_relation(
1353
1370
  left_input=map_logical_plan_relation(children[0]),
1354
1371
  right_input=map_logical_plan_relation(children[1]),
1355
1372
  set_op_type=relation_proto.SetOperation.SET_OP_TYPE_UNION,
1373
+ is_all=True,
1356
1374
  by_name=rel.byName(),
1357
1375
  allow_missing_columns=rel.allowMissingCol(),
1358
1376
  )
@@ -1701,7 +1719,50 @@ def map_logical_plan_relation(
1701
1719
  _window_specs.get()[key] = window_spec
1702
1720
  proto = map_logical_plan_relation(rel.child())
1703
1721
  case "Generate":
1704
- input_relation = map_logical_plan_relation(rel.child())
1722
+ # Generate creates a nested Project relation (see lines 1785-1790) without
1723
+ # setting its plan_id field. When this Project is later processed by map_project
1724
+ # (map_column_ops.py), it uses rel.common.plan_id which defaults to 0 for unset
1725
+ # protobuf fields. This means all columns from the Generate operation (both exploded
1726
+ # columns and passthrough columns) will have plan_id=0 in their names.
1727
+ #
1728
+ # If Generate's child is a SubqueryAlias whose inner relation was processed
1729
+ # with a non-zero plan_id, there will be a mismatch between:
1730
+ # - The columns referenced in the Project (expecting plan_id from SubqueryAlias's child)
1731
+ # - The actual column names created by Generate's Project (using plan_id=0)
1732
+
1733
+ # Therefore, when Generate has a SubqueryAlias child, we explicitly process the inner
1734
+ # relation with plan_id=0 to match what Generate's Project will use. This only applies when
1735
+ # the immediate child of Generate is a SubqueryAlias and preserves existing registrations (like CTEs),
1736
+ # so it won't affect other patterns.
1737
+
1738
+ child_class = str(rel.child().getClass().getSimpleName())
1739
+
1740
+ if child_class == "SubqueryAlias":
1741
+ alias = str(rel.child().alias())
1742
+
1743
+ # Check if this alias was already registered during initial SQL parsing
1744
+ existing_plan_id = get_sql_plan(alias)
1745
+
1746
+ if existing_plan_id is not None:
1747
+ # Use the existing plan_id to maintain consistency with prior registration
1748
+ used_plan_id = existing_plan_id
1749
+ else:
1750
+ # Use plan_id=0 to match what the nested Project will use (protobuf default)
1751
+ used_plan_id = 0
1752
+ set_sql_plan_name(alias, used_plan_id)
1753
+
1754
+ # Process the inner child with the determined plan_id
1755
+ inner_child = map_logical_plan_relation(
1756
+ rel.child().child(), plan_id=used_plan_id
1757
+ )
1758
+ input_relation = relation_proto.Relation(
1759
+ subquery_alias=relation_proto.SubqueryAlias(
1760
+ input=inner_child,
1761
+ alias=alias,
1762
+ )
1763
+ )
1764
+ else:
1765
+ input_relation = map_logical_plan_relation(rel.child())
1705
1766
  generator_output_list = as_java_list(rel.generatorOutput())
1706
1767
  generator_output_list_expressions = [
1707
1768
  map_logical_plan_expression(e) for e in generator_output_list
@@ -31,6 +31,10 @@ from snowflake.snowpark_connect.type_mapping import (
31
31
  proto_to_snowpark_type,
32
32
  )
33
33
  from snowflake.snowpark_connect.utils.context import push_udtf_context
34
+ from snowflake.snowpark_connect.utils.external_udxf_cache import (
35
+ cache_external_udtf,
36
+ get_external_udtf_from_cache,
37
+ )
34
38
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
35
39
  from snowflake.snowpark_connect.utils.udtf_helper import (
36
40
  SnowparkUDTF,
@@ -44,6 +48,34 @@ from snowflake.snowpark_connect.utils.udxf_import_utils import (
44
48
  )
45
49
 
46
50
 
51
+ def cache_external_udtf_wrapper(from_register_udtf: bool):
52
+ def outer_wrapper(wrapper_func):
53
+ def wrapper(
54
+ udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
55
+ spark_column_names,
56
+ ) -> SnowparkUDTF | None:
57
+ udf_hash = hash(str(udtf_proto))
58
+ cached_udtf = get_external_udtf_from_cache(udf_hash)
59
+
60
+ if cached_udtf:
61
+ if from_register_udtf:
62
+ session = get_or_create_snowpark_session()
63
+ session._udtfs[udtf_proto.function_name.lower()] = (
64
+ cached_udtf,
65
+ spark_column_names,
66
+ )
67
+
68
+ return cached_udtf
69
+
70
+ snowpark_udf = wrapper_func(udtf_proto, spark_column_names)
71
+ cache_external_udtf(udf_hash, snowpark_udf)
72
+ return snowpark_udf
73
+
74
+ return wrapper
75
+
76
+ return outer_wrapper
77
+
78
+
47
79
  def build_expected_types_from_parsed(
48
80
  parsed_return: types_proto.DataType,
49
81
  ) -> List[Tuple[str, Any]]:
@@ -165,26 +197,37 @@ def register_udtf(
165
197
  ) = process_return_type(python_udft.return_type)
166
198
  function_name = udtf_proto.function_name
167
199
 
168
- kwargs = {
169
- "session": session,
170
- "udtf_proto": udtf_proto,
171
- "expected_types": expected_types,
172
- "output_schema": output_schema,
173
- "packages": global_config.get("snowpark.connect.udf.packages", ""),
174
- "imports": get_python_udxf_import_files(session),
175
- "called_from": "register_udtf",
176
- "is_arrow_enabled": is_arrow_enabled_in_udtf(),
177
- "is_spark_compatible_udtf_mode_enabled": is_spark_compatible_udtf_mode_enabled(),
178
- }
179
-
180
- if require_creating_udtf_in_sproc(udtf_proto):
181
- snowpark_udtf = create_udtf_in_sproc(**kwargs)
182
- else:
183
- udtf = create_udtf(**kwargs)
184
- snowpark_udtf = SnowparkUDTF(
185
- name=udtf.name, input_types=udtf._input_types, output_schema=output_schema
186
- )
200
+ @cache_external_udtf_wrapper(from_register_udtf=True)
201
+ def _register_udtf(
202
+ udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
203
+ spark_column_names,
204
+ ):
205
+ kwargs = {
206
+ "session": session,
207
+ "udtf_proto": udtf_proto,
208
+ "expected_types": expected_types,
209
+ "output_schema": output_schema,
210
+ "packages": global_config.get("snowpark.connect.udf.packages", ""),
211
+ "imports": get_python_udxf_import_files(session),
212
+ "called_from": "register_udtf",
213
+ "is_arrow_enabled": is_arrow_enabled_in_udtf(),
214
+ "is_spark_compatible_udtf_mode_enabled": is_spark_compatible_udtf_mode_enabled(),
215
+ }
216
+
217
+ if require_creating_udtf_in_sproc(udtf_proto):
218
+ snowpark_udtf = create_udtf_in_sproc(**kwargs)
219
+ else:
220
+ udtf = create_udtf(**kwargs)
221
+ snowpark_udtf = SnowparkUDTF(
222
+ name=udtf.name,
223
+ input_types=udtf._input_types,
224
+ output_schema=output_schema,
225
+ )
226
+
227
+ return snowpark_udtf
187
228
 
229
+ snowpark_udtf = _register_udtf(udtf_proto, spark_column_names)
230
+ # We have to update cached _udtfs here, because function could have been cached in map_common_inline_user_defined_table_function
188
231
  session._udtfs[function_name.lower()] = (snowpark_udtf, spark_column_names)
189
232
  return snowpark_udtf
190
233
 
@@ -213,32 +256,41 @@ def map_common_inline_user_defined_table_function(
213
256
  spark_column_names,
214
257
  ) = process_return_type(python_udft.return_type)
215
258
 
216
- kwargs = {
217
- "session": session,
218
- "udtf_proto": rel,
219
- "expected_types": expected_types,
220
- "output_schema": output_schema,
221
- "packages": global_config.get("snowpark.connect.udf.packages", ""),
222
- "imports": get_python_udxf_import_files(session),
223
- "called_from": "map_common_inline_user_defined_table_function",
224
- "is_arrow_enabled": is_arrow_enabled_in_udtf(),
225
- "is_spark_compatible_udtf_mode_enabled": is_spark_compatible_udtf_mode_enabled(),
226
- }
227
-
228
- if require_creating_udtf_in_sproc(rel):
229
- snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
230
- if isinstance(snowpark_udtf_or_error, str):
231
- raise PythonException(snowpark_udtf_or_error)
232
- snowpark_udtf = snowpark_udtf_or_error
233
- else:
234
- udtf_or_error = create_udtf(**kwargs)
235
- if isinstance(udtf_or_error, str):
236
- raise PythonException(udtf_or_error)
237
- udtf = udtf_or_error
238
- snowpark_udtf = SnowparkUDTF(
239
- name=udtf.name, input_types=udtf._input_types, output_schema=output_schema
240
- )
259
+ @cache_external_udtf_wrapper(from_register_udtf=False)
260
+ def _get_udtf(
261
+ udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
262
+ spark_column_names,
263
+ ):
264
+ kwargs = {
265
+ "session": session,
266
+ "udtf_proto": udtf_proto,
267
+ "expected_types": expected_types,
268
+ "output_schema": output_schema,
269
+ "packages": global_config.get("snowpark.connect.udf.packages", ""),
270
+ "imports": get_python_udxf_import_files(session),
271
+ "called_from": "map_common_inline_user_defined_table_function",
272
+ "is_arrow_enabled": is_arrow_enabled_in_udtf(),
273
+ "is_spark_compatible_udtf_mode_enabled": is_spark_compatible_udtf_mode_enabled(),
274
+ }
275
+
276
+ if require_creating_udtf_in_sproc(udtf_proto):
277
+ snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
278
+ if isinstance(snowpark_udtf_or_error, str):
279
+ raise PythonException(snowpark_udtf_or_error)
280
+ snowpark_udtf = snowpark_udtf_or_error
281
+ else:
282
+ udtf_or_error = create_udtf(**kwargs)
283
+ if isinstance(udtf_or_error, str):
284
+ raise PythonException(udtf_or_error)
285
+ udtf = udtf_or_error
286
+ snowpark_udtf = SnowparkUDTF(
287
+ name=udtf.name,
288
+ input_types=udtf._input_types,
289
+ output_schema=output_schema,
290
+ )
291
+ return snowpark_udtf
241
292
 
293
+ snowpark_udtf = _get_udtf(rel, spark_column_names)
242
294
  column_map = ColumnNameMap([], [])
243
295
  snowpark_udtf_args = []
244
296
 
@@ -6,6 +6,7 @@ import random
6
6
  import re
7
7
  import string
8
8
  import time
9
+ import uuid
9
10
  from typing import Sequence
10
11
 
11
12
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
@@ -153,6 +154,49 @@ def random_string(
153
154
  return "".join([prefix, random_part, suffix])
154
155
 
155
156
 
157
+ def generate_spark_compatible_filename(
158
+ task_id: int = 0,
159
+ attempt_number: int = 0,
160
+ compression: str = None,
161
+ format_ext: str = "parquet",
162
+ ) -> str:
163
+ """Generate a Spark-compatible filename following the convention:
164
+ part-<task-id>-<uuid>-c<attempt-number>.<compression>.<format>
165
+
166
+ Args:
167
+ task_id: Task ID (usually 0 for single partition)
168
+ attempt_number: Attempt number (usually 0)
169
+ compression: Compression type (e.g., 'snappy', 'gzip', 'none')
170
+ format_ext: File format extension (e.g., 'parquet', 'csv', 'json')
171
+
172
+ Returns:
173
+ A filename string following Spark's naming convention
174
+ """
175
+ # Generate a UUID for uniqueness
176
+ file_uuid = str(uuid.uuid4())
177
+
178
+ # Format task ID with leading zeros (5 digits)
179
+ formatted_task_id = f"{task_id:05d}"
180
+
181
+ # Format attempt number with leading zeros (3 digits)
182
+ formatted_attempt = f"{attempt_number:03d}"
183
+
184
+ # Build the base filename
185
+ base_name = f"part-{formatted_task_id}-{file_uuid}-c{formatted_attempt}"
186
+
187
+ # Add compression if specified and not 'none'
188
+ if compression and compression.lower() not in ("none", "uncompressed"):
189
+ compression_part = f".{compression.lower()}"
190
+ else:
191
+ compression_part = ""
192
+
193
+ # Add format extension if specified
194
+ if format_ext:
195
+ return f"{base_name}{compression_part}.{format_ext}"
196
+ else:
197
+ return f"{base_name}{compression_part}"
198
+
199
+
156
200
  def _normalize_query_for_semantic_hash(query_str: str) -> str:
157
201
  """
158
202
  Normalize a query string for semantic comparison by extracting original names from