subquadratic-ops-torch-cu13 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,343 @@
1
+ Metadata-Version: 2.2
2
+ Name: subquadratic-ops-torch-cu13
3
+ Version: 0.2.0
4
+ Summary: subquadratic-ops-torch-cu13 - GPU Accelerated Torch Extensions for Subquadratic Operations
5
+ Author-Email: Alireza Moradzadeh <amoradzadeh@nvidia.com>
6
+ License: =?utf-8?q?Other/Proprietary_License_=28NVIDIA_Proprietary_Software=29?=
7
+ =?utf-8?q?_?=
8
+ =?utf-8?q?_=23_Software_License_Agreement?=
9
+ =?utf-8?q?_?=
10
+ =?utf-8?q?_LICENSE_AGREEMENT_FOR_NVIDIA_MATH_LIBRARIES_SOFTWARE_DEVELOPMENT_KITS?=
11
+ =?utf-8?q?_?=
12
+ =?utf-8?q?_This_license_agreement=28=E2=80=9CAgreement=E2=80=9D=29_is_a_legal_agreement_between_you_and_NVIDIA_Corporation_=28=E2=80=9CNVIDIA=E2=80=9D=29_and_governs_your_use_of_the_NVIDIA_math_libraries_software_development_kit_as_available_at_NVIDIA=E2=80=99s_discretion_=28each=2C_a_=E2=80=9CSDK=E2=80=9D=29=2E?=
13
+ =?utf-8?q?_?=
14
+ =?utf-8?q?_Each_SDK_has_its_own_set_of_software_and_materials=2C_but_here_is_a_description_of_the_types_of_items_that_may_be_included_in_a_SDK=3A_source_code=2C_header_files=2C_APIs=2C_data_sets_and_assets_=28examples_include_images=2C_textures=2C_models=2C_scenes=2C_videos=2C_native_API_input/output_files=29=2C_binary_software=2C_sample_code=2C_libraries=2C_utility_programs=2C_programming_code_and_documentation=2E?=
15
+ =?utf-8?q?_?=
16
+ =?utf-8?q?_This_Agreement_can_be_accepted_only_by_an_adult_of_legal_age_of_majority_in_the_country_in_which_the_SDK_is_used=2E?=
17
+ =?utf-8?q?_?=
18
+ =?utf-8?q?_If_you_are_entering_into_this_Agreement_on_behalf_of_a_company_or_other_legal_entity=2C_you_represent_that_you_have_the_legal_authority_to_bind_the_entity_to_this_Agreement=2C_in_which_case_=22you=22_will_mean_the_entity_you_represent=2E?=
19
+ =?utf-8?q?_?=
20
+ =?utf-8?q?_If_you_don=27t_have_the_required_age_or_authority_to_accept_this_Agreement=2C_or_if_you_don=27t_accept_all_the_terms_and_conditions_of_this_Agreement=2C_do_not_download=2C_install_or_use_the_SDK=2E?=
21
+ =?utf-8?q?_?=
22
+ =?utf-8?q?_You_agree_to_use_the_SDK_only_for_purposes_that_are_permitted_by_=28a=29_this_Agreement=2C_and_=28b=29_any_applicable_law=2C_regulation_or_generally_accepted_practices_or_guidelines_in_the_relevant_jurisdictions=2E?=
23
+ =?utf-8?q?_?=
24
+ =?utf-8?q?_License=2E?=
25
+ =?utf-8?q?_?=
26
+ =?utf-8?q?_1=2E1_Grant?=
27
+ =?utf-8?q?_?=
28
+ =?utf-8?q?_Subject_to_the_terms_of_this_Agreement=2C_NVIDIA_hereby_grants_you_a_non-exclusive=2C_non-transferable_license=2C_without_the_right_to_sublicense_=28except_as_expressly_provided_in_this_Agreement=29_to=3A?=
29
+ =?utf-8?q?_?=
30
+ =?utf-8?q?_Install_and_use_the_SDK=2C_and?=
31
+ =?utf-8?q?_?=
32
+ =?utf-8?q?_Distribute_the_binary_files=2C_files_identified_as_samples=2C_and_headers_as_incorporated_into_a_software_application_that_meets_the_distribution_requirements_indicated_in_this_Agreement=2E?=
33
+ =?utf-8?q?_?=
34
+ =?utf-8?q?_1=2E2_Distribution_Requirements?=
35
+ =?utf-8?q?_?=
36
+ =?utf-8?q?_These_are_the_distribution_requirements_for_you_to_exercise_the_distribution_grant=3A?=
37
+ =?utf-8?q?_?=
38
+ =?utf-8?q?_Your_application_must_have_material_additional_functionality=2C_beyond_the_included_portions_of_the_SDK=2E?=
39
+ =?utf-8?q?_?=
40
+ =?utf-8?q?_The_distributable_portions_of_the_SDK_shall_only_be_accessed_by_your_application=2E?=
41
+ =?utf-8?q?_?=
42
+ =?utf-8?q?_The_following_notice_shall_be_included_in_modifications_and_derivative_works_of_sample_source_code_distributed=3A_=22This_software_contains_source_code_provided_by_NVIDIA_Corporation=2E=22?=
43
+ =?utf-8?q?_?=
44
+ =?utf-8?q?_Unless_a_developer_tool_is_identified_in_this_Agreement_as_distributable=2C_it_is_delivered_for_your_internal_use_only=2E?=
45
+ =?utf-8?q?_?=
46
+ =?utf-8?q?_The_terms_under_which_you_distribute_your_application_must_be_consistent_with_the_terms_of_this_Agreement=2C_including_=28without_limitation=29_terms_relating_to_the_license_grant_and_license_restrictions_and_protection_of_NVIDIA=27s_intellectual_property_rights=2E_Additionally=2C_you_agree_that_you_will_protect_the_privacy=2C_security_and_legal_rights_of_your_application_users=2E?=
47
+ =?utf-8?q?_?=
48
+ =?utf-8?q?_You_agree_to_notify_NVIDIA_in_writing_of_any_known_or_suspected_distribution_or_use_of_the_SDK_not_in_compliance_with_the_requirements_of_this_Agreement=2C_and_to_enforce_the_terms_of_your_agreements_with_respect_to_distributed_SDK=2E?=
49
+ =?utf-8?q?_?=
50
+ =?utf-8?q?_1=2E3_Authorized_Users?=
51
+ =?utf-8?q?_?=
52
+ =?utf-8?q?_You_may_allow_employees_and_contractors_of_your_entity_or_of_your_subsidiary=28ies=29_to_access_and_use_the_SDK_from_your_secure_network_to_perform_work_on_your_behalf=2E?=
53
+ =?utf-8?q?_?=
54
+ =?utf-8?q?_If_you_are_an_academic_institution_you_may_allow_users_enrolled_or_employed_by_the_academic_institution_to_access_and_use_the_SDK_from_your_secure_network=2E?=
55
+ =?utf-8?q?_?=
56
+ =?utf-8?q?_You_are_responsible_for_the_compliance_with_the_terms_of_this_Agreement_by_your_authorized_users=2E_If_you_become_aware_that_your_authorized_users_didn=27t_follow_the_terms_of_this_Agreement=2C_you_agree_to_take_reasonable_steps_to_resolve_the_non-compliance_and_prevent_new_occurrences=2E?=
57
+ =?utf-8?q?_?=
58
+ =?utf-8?q?_1=2E4_Pre-Release_SDK?=
59
+ =?utf-8?q?_?=
60
+ =?utf-8?q?_The_SDK_versions_identified_as_alpha=2C_beta=2C_preview_or_otherwise_as_pre-release=2C_may_not_be_fully_functional=2C_may_contain_errors_or_design_flaws=2C_and_may_have_reduced_or_different_security=2C_privacy=2C_accessibility=2C_availability=2C_and_reliability_standards_relative_to_commercial_versions_of_NVIDIA_software_and_materials=2E_Use_of_a_pre-release_SDK_may_result_in_unexpected_results=2C_loss_of_data=2C_project_delays_or_other_unpredictable_damage_or_loss=2E_You_may_use_a_pre-release_SDK_at_your_own_risk=2C_understanding_that_pre-release_SDKs_are_not_intended_for_use_in_production_or_business-critical_systems=2E_NVIDIA_may_choose_not_to_make_available_a_commercial_version_of_any_pre-release_SDK=2E_NVIDIA_may_also_choose_to_abandon_development_and_terminate_the_availability_of_a_pre-release_SDK_at_any_time_without_liability=2E_1=2E5_Updates?=
61
+ =?utf-8?q?_?=
62
+ =?utf-8?q?_NVIDIA_may=2C_at_its_option=2C_make_available_patches=2C_workarounds_or_other_updates_to_this_SDK=2E_Unless_the_updates_are_provided_with_their_separate_governing_terms=2C_they_are_deemed_part_of_the_SDK_licensed_to_you_as_provided_in_this_Agreement=2E?=
63
+ =?utf-8?q?_?=
64
+ =?utf-8?q?_You_agree_that_the_form_and_content_of_the_SDK_that_NVIDIA_provides_may_change_without_prior_notice_to_you=2E_While_NVIDIA_generally_maintains_compatibility_between_versions=2C_NVIDIA_may_in_some_cases_make_changes_that_introduce_incompatibilities_in_future_versions_of_the_SDK=2E?=
65
+ =?utf-8?q?_?=
66
+ =?utf-8?q?_1=2E6_Components_Under_Other_Licenses=2E?=
67
+ =?utf-8?q?_?=
68
+ =?utf-8?q?_The_SDK_may_include_NVIDIA_or_third-party_components_with_separate_legal_notices_or_terms_as_may_be_described_in_proprietary_notices_accompanying_the_SDK=2C_such_as_components_governed_by_open_source_software_licenses=2E_If_and_to_the_extent_there_is_a_conflict_between_the_terms_in_this_license_and_the_license_terms_associated_with_a_component=2C_the_license_terms_associated_with_the_components_control_only_to_the_extent_necessary_to_resolve_the_conflict=2E?=
69
+ =?utf-8?q?_?=
70
+ =?utf-8?q?_1=2E7_Reservation_of_Rights?=
71
+ =?utf-8?q?_?=
72
+ =?utf-8?q?_NVIDIA_reserves_all_rights=2C_title_and_interest_in_and_to_the_SDK_not_expressly_granted_to_you_under_this_Agreement=2E?=
73
+ =?utf-8?q?_?=
74
+ =?utf-8?q?_Limitations=2E?=
75
+ =?utf-8?q?_?=
76
+ =?utf-8?q?_The_following_license_limitations_apply_to_your_use_of_the_SDK=3A?=
77
+ =?utf-8?q?_?=
78
+ =?utf-8?q?_2=2E1_The_SDK_is_licensed_for_you_to_develop_applications_only_for_use_in_systems_with_NVIDIA_GPUs=2E?=
79
+ =?utf-8?q?_?=
80
+ =?utf-8?q?_2=2E2_You_may_not_reverse_engineer=2C_decompile_or_disassemble=2C_or_remove_copyright_or_other_proprietary_notices_from_any_portion_of_the_SDK_or_copies_of_the_SDK=2E?=
81
+ =?utf-8?q?_?=
82
+ =?utf-8?q?_2=2E3_Except_as_expressly_provided_in_this_Agreement=2C_you_may_not_copy=2C_sell=2C_rent=2C_sublicense=2C_transfer=2C_distribute=2C_modify=2C_or_create_derivative_works_of_any_portion_of_the_SDK=2E?=
83
+ =?utf-8?q?_?=
84
+ =?utf-8?q?_2=2E4_Unless_you_have_an_agreement_with_NVIDIA_for_this_purpose=2C_you_may_not_indicate_that_an_application_created_with_the_SDK_is_sponsored_or_endorsed_by_NVIDIA=2E?=
85
+ =?utf-8?q?_?=
86
+ =?utf-8?q?_2=2E5_You_may_not_bypass=2C_disable=2C_or_circumvent_any_encryption=2C_security=2C_digital_rights_management_or_authentication_mechanism_in_the_SDK=2E?=
87
+ =?utf-8?q?_?=
88
+ =?utf-8?q?_2=2E6_You_may_not_use_the_SDK_in_any_manner_that_would_cause_it_to_become_subject_to_an_open_source_software_license=2E_As_examples=2C_licenses_that_require_as_a_condition_of_use=2C_modification=2C_and/or_distribution_that_the_SDK_be_=28i=29_disclosed_or_distributed_in_source_code_form=3B_=28ii=29_licensed_for_the_purpose_of_making_derivative_works=3B_or_=28iii=29_redistributable_at_no_charge=2E?=
89
+ =?utf-8?q?_?=
90
+ =?utf-8?q?_2=2E7_You_acknowledge_that_the_SDK_as_delivered_is_not_tested_or_certified_by_NVIDIA_for_use_in_connection_with_the_design=2C_construction=2C_maintenance=2C_and/or_operation_of_any_system_where_the_use_or_failure_of_such_system_could_result_in_a_situation_that_threatens_the_safety_of_human_life_or_results_in_catastrophic_damages_=28each=2C_a_=22Critical_Application=22=29=2E_Examples_of_Critical_Applications_include_use_in_avionics=2C_navigation=2C_autonomous_vehicle_applications=2C_ai_solutions_for_automotive_products=2C_military=2C_medical=2C_life_support_or_other_life_critical_applications=2E_NVIDIA_shall_not_be_liable_to_you_or_any_third_party=2C_in_whole_or_in_part=2C_for_any_claims_or_damages_arising_from_such_uses=2E_You_are_solely_responsible_for_ensuring_that_any_product_or_service_developed_with_the_SDK_as_a_whole_includes_sufficient_features_to_comply_with_all_applicable_legal_and_regulatory_standards_and_requirements=2E?=
91
+ =?utf-8?q?_?=
92
+ =?utf-8?q?_2=2E8_You_agree_to_defend=2C_indemnify_and_hold_harmless_NVIDIA_and_its_affiliates=2C_and_their_respective_employees=2C_contractors=2C_agents=2C_officers_and_directors=2C_from_and_against_any_and_all_claims=2C_damages=2C_obligations=2C_losses=2C_liabilities=2C_costs_or_debt=2C_fines=2C_restitutions_and_expenses_=28including_but_not_limited_to_attorney=27s_fees_and_costs_incident_to_establishing_the_right_of_indemnification=29_arising_out_of_or_related_to_products_or_services_that_use_the_SDK_in_or_for_Critical_Applications=2C_and_for_use_of_the_SDK_outside_of_the_scope_of_this_Agreement=2C_or_not_in_compliance_with_its_terms=2E?=
93
+ =?utf-8?q?_?=
94
+ =?utf-8?q?_Ownership=2E?=
95
+ =?utf-8?q?_?=
96
+ =?utf-8?q?_3=2E1_NVIDIA_or_its_licensors_hold_all_rights=2C_title_and_interest_in_and_to_the_SDK_and_its_modifications=2C_including_their_respective_intellectual_property_rights=2E_This_SDK_may_include_software_and_materials_from_NVIDIA=27s_licensors=2C_and_these_licensors_are_intended_third_party_beneficiaries_that_may_enforce_this_Agreement_with_respect_to_their_intellectual_property_rights=2E?=
97
+ =?utf-8?q?_?=
98
+ =?utf-8?q?_3=2E2_You_may=2C_but_don=27t_have_to=2C_provide_to_NVIDIA_suggestions=2C_feature_requests_or_other_feedback_regarding_the_SDK=2C_including_possible_enhancements_or_modifications_to_the_SDK=2E_For_any_feedback_that_you_voluntarily_provide=2C_you_hereby_grant_NVIDIA_and_its_affiliates_a_perpetual=2C_non-exclusive=2C_worldwide=2C_irrevocable_license_to_use=2C_reproduce=2C_modify=2C_license=2C_sublicense_=28through_multiple_tiers_of_sublicensees=29=2C_and_distribute_=28through_multiple_tiers_of_distributors=29_it_without_the_payment_of_any_royalties_or_fees_to_you=2E_NVIDIA_will_use_feedback_at_its_choice=2E?=
99
+ =?utf-8?q?_?=
100
+ =?utf-8?q?_No_Warranties=2E?=
101
+ =?utf-8?q?_?=
102
+ =?utf-8?q?_THE_SDK_IS_PROVIDED_BY_NVIDIA_=22AS_IS=22_AND_=22WITH_ALL_FAULTS=2E=22_TO_THE_MAXIMUM_EXTENT_PERMITTED_BY_LAW=2C_NVIDIA_AND_ITS_AFFILIATES_EXPRESSLY_DISCLAIM_ALL_WARRANTIES_OF_ANY_KIND_OR_NATURE=2C_WHETHER_EXPRESS=2C_IMPLIED_OR_STATUTORY=2C_INCLUDING=2C_BUT_NOT_LIMITED_TO=2C_ANY_WARRANTIES_OF_MERCHANTABILITY=2C_FITNESS_FOR_A_PARTICULAR_PURPOSE=2C_TITLE=2C_NON-INFRINGEMENT=2C_OR_THE_ABSENCE_OF_ANY_DEFECTS_THEREIN=2C_WHETHER_LATENT_OR_PATENT=2E_NO_WARRANTY_IS_MADE_ON_THE_BASIS_OF_TRADE_USAGE=2C_COURSE_OF_DEALING_OR_COURSE_OF_TRADE=2E?=
103
+ =?utf-8?q?_?=
104
+ =?utf-8?q?_Limitations_of_Liability=2E?=
105
+ =?utf-8?q?_?=
106
+ =?utf-8?q?_TO_THE_MAXIMUM_EXTENT_PERMITTED_BY_LAW=2C_NVIDIA_AND_ITS_AFFILIATES_SHALL_NOT_BE_LIABLE_FOR_ANY_SPECIAL=2C_INCIDENTAL=2C_PUNITIVE_OR_CONSEQUENTIAL_DAMAGES=2C_OR_ANY_LOST_PROFITS=2C_LOSS_OF_USE=2C_LOSS_OF_DATA_OR_LOSS_OF_GOODWILL=2C_OR_THE_COSTS_OF_PROCURING_SUBSTITUTE_PRODUCTS=2C_ARISING_OUT_OF_OR_IN_CONNECTION_WITH_THIS_AGREEMENT_OR_THE_USE_OR_PERFORMANCE_OF_THE_SDK=2C_WHETHER_SUCH_LIABILITY_ARISES_FROM_ANY_CLAIM_BASED_UPON_BREACH_OF_CONTRACT=2C_BREACH_OF_WARRANTY=2C_TORT_=28INCLUDING_NEGLIGENCE=29=2C_PRODUCT_LIABILITY_OR_ANY_OTHER_CAUSE_OF_ACTION_OR_THEORY_OF_LIABILITY=2E_IN_NO_EVENT_WILL_NVIDIA=27S_AND_ITS_AFFILIATES_TOTAL_CUMULATIVE_LIABILITY_UNDER_OR_ARISING_OUT_OF_THIS_AGREEMENT_EXCEED_US=2410=2E00=2E_THE_NATURE_OF_THE_LIABILITY_OR_THE_NUMBER_OF_CLAIMS_OR_SUITS_SHALL_NOT_ENLARGE_OR_EXTEND_THIS_LIMIT=2E?=
107
+ =?utf-8?q?_?=
108
+ =?utf-8?q?_These_exclusions_and_limitations_of_liability_shall_apply_regardless_if_NVIDIA_or_its_affiliates_have_been_advised_of_the_possibility_of_such_damages=2C_and_regardless_of_whether_a_remedy_fails_its_essential_purpose=2E_These_exclusions_and_limitations_of_liability_form_an_essential_basis_of_the_bargain_between_the_parties=2C_and=2C_absent_any_of_these_exclusions_or_limitations_of_liability=2C_the_provisions_of_this_Agreement=2C_including=2C_without_limitation=2C_the_economic_terms=2C_would_be_substantially_different=2E?=
109
+ =?utf-8?q?_?=
110
+ =?utf-8?q?_Termination=2E?=
111
+ =?utf-8?q?_?=
112
+ =?utf-8?q?_6=2E1_This_Agreement_will_continue_to_apply_until_terminated_by_either_you_or_NVIDIA_as_described_below=2E?=
113
+ =?utf-8?q?_?=
114
+ =?utf-8?q?_6=2E2_If_you_want_to_terminate_this_Agreement=2C_you_may_do_so_by_stopping_to_use_the_SDK=2E?=
115
+ =?utf-8?q?_?=
116
+ =?utf-8?q?_6=2E3_NVIDIA_may=2C_at_any_time=2C_terminate_this_Agreement_if=3A_=28i=29_you_fail_to_comply_with_any_term_of_this_Agreement_and_the_non-compliance_is_not_fixed_within_thirty_=2830=29_days_following_notice_from_NVIDIA_=28or_immediately_if_you_violate_NVIDIA=27s_intellectual_property_rights=29=3B_=28ii=29_you_commence_or_participate_in_any_legal_proceeding_against_NVIDIA_with_respect_to_the_SDK=3B_or_=28iii=29_NVIDIA_decides_to_no_longer_provide_the_SDK_in_a_country_or=2C_in_NVIDIA=27s_sole_discretion=2C_the_continued_use_of_it_is_no_longer_commercially_viable=2E?=
117
+ =?utf-8?q?_?=
118
+ =?utf-8?q?_6=2E4_Upon_any_termination_of_this_Agreement=2C_you_agree_to_promptly_discontinue_use_of_the_SDK_and_destroy_all_copies_in_your_possession_or_control=2E_Your_prior_distributions_in_accordance_with_this_Agreement_are_not_affected_by_the_termination_of_this_Agreement=2E_Upon_written_request=2C_you_will_certify_in_writing_that_you_have_complied_with_your_commitments_under_this_section=2E_Upon_any_termination_of_this_Agreement_all_provisions_survive_except_for_the_licenses_granted_to_you=2E?=
119
+ =?utf-8?q?_?=
120
+ =?utf-8?q?_General=2E?=
121
+ =?utf-8?q?_?=
122
+ =?utf-8?q?_If_you_wish_to_assign_this_Agreement_or_your_rights_and_obligations=2C_including_by_merger=2C_consolidation=2C_dissolution_or_operation_of_law=2C_contact_NVIDIA_to_ask_for_permission=2E_Any_attempted_assignment_not_approved_by_NVIDIA_in_writing_shall_be_void_and_of_no_effect=2E_NVIDIA_may_assign=2C_delegate_or_transfer_this_Agreement_and_its_rights_and_obligations=2C_and_if_to_a_non-affiliate_you_will_be_notified=2E?=
123
+ =?utf-8?q?_?=
124
+ =?utf-8?q?_You_agree_to_cooperate_with_NVIDIA_and_provide_reasonably_requested_information_to_verify_your_compliance_with_this_Agreement=2E?=
125
+ =?utf-8?q?_?=
126
+ =?utf-8?q?_This_Agreement_will_be_governed_in_all_respects_by_the_laws_of_the_United_States_and_of_the_State_of_Delaware_as_those_laws_are_applied_to_contracts_entered_into_and_performed_entirely_within_Delaware_by_Delaware_residents=2C_without_regard_to_the_conflicts_of_laws_principles=2E_The_United_Nations_Convention_on_Contracts_for_the_International_Sale_of_Goods_is_specifically_disclaimed=2E_You_agree_to_all_terms_of_this_Agreement_in_the_English_language=2E?=
127
+ =?utf-8?q?_?=
128
+ =?utf-8?q?_The_state_or_federal_courts_residing_in_Santa_Clara_County=2C_California_shall_have_exclusive_jurisdiction_over_any_dispute_or_claim_arising_out_of_this_Agreement=2E_Notwithstanding_this=2C_you_agree_that_NVIDIA_shall_still_be_allowed_to_apply_for_injunctive_remedies_or_an_equivalent_type_of_urgent_legal_relief_in_any_jurisdiction=2E?=
129
+ =?utf-8?q?_?=
130
+ =?utf-8?q?_If_any_court_of_competent_jurisdiction_determines_that_any_provision_of_this_Agreement_is_illegal=2C_invalid_or_unenforceable=2C_such_provision_will_be_construed_as_limited_to_the_extent_necessary_to_be_consistent_with_and_fully_enforceable_under_the_law_and_the_remaining_provisions_will_remain_in_full_force_and_effect=2E_Unless_otherwise_specified=2C_remedies_are_cumulative=2E?=
131
+ =?utf-8?q?_?=
132
+ =?utf-8?q?_Each_party_acknowledges_and_agrees_that_the_other_is_an_independent_contractor_in_the_performance_of_this_Agreement=2E?=
133
+ =?utf-8?q?_?=
134
+ =?utf-8?q?_The_SDK_has_been_developed_entirely_at_private_expense_and_is_=22commercial_items=22_consisting_of_=22commercial_computer_software=22_and_=22commercial_computer_software_documentation=22_provided_with_RESTRICTED_RIGHTS=2E_Use=2C_duplication_or_disclosure_by_the_U=2ES=2E_Government_or_a_U=2ES=2E_Government_subcontractor_is_subject_to_the_restrictions_in_this_Agreement_pursuant_to_DFARS_227=2E7202-3=28a=29_or_as_set_forth_in_subparagraphs_=28b=29=281=29_and_=282=29_of_the_Commercial_Computer_Software_-_Restricted_Rights_clause_at_FAR_52=2E227-19=2C_as_applicable=2E_Contractor/manufacturer_is_NVIDIA=2C_2788_San_Tomas_Expressway=2C_Santa_Clara=2C_CA_95051=2E?=
135
+ =?utf-8?q?_?=
136
+ =?utf-8?q?_The_SDK_is_subject_to_United_States_export_laws_and_regulations=2E_You_agree_that_you_will_not_ship=2C_transfer_or_export_the_SDK_into_any_country=2C_or_use_the_SDK_in_any_manner=2C_prohibited_by_the_United_States_Bureau_of_Industry_and_Security_or_economic_sanctions_regulations_administered_by_the_U=2ES=2E_Department_of_Treasury=27s_Office_of_Foreign_Assets_Control_=28OFAC=29=2C_or_any_applicable_export_laws=2C_restrictions_or_regulations=2E_These_laws_include_restrictions_on_destinations=2C_end_users_and_end_use=2E_By_accepting_this_Agreement=2C_you_confirm_that_you_are_not_a_resident_or_citizen_of_any_country_currently_embargoed_by_the_U=2ES=2E_and_that_you_are_not_otherwise_prohibited_from_receiving_the_SDK=2E?=
137
+ =?utf-8?q?_?=
138
+ =?utf-8?q?_Any_notice_delivered_by_NVIDIA_to_you_under_this_Agreement_will_be_delivered_via_mail=2C_email_or_fax=2E_You_agree_that_any_notices_that_NVIDIA_sends_you_electronically_will_satisfy_any_legal_communication_requirements=2E_Please_direct_your_legal_notices_or_other_correspondence_to_NVIDIA_Corporation=2C_2788_San_Tomas_Expressway=2C_Santa_Clara=2C_California_95051=2C_United_States_of_America=2C_Attention=3A_Legal_Department=2E?=
139
+ =?utf-8?q?_?=
140
+ =?utf-8?q?_This_Agreement_constitutes_the_entire_agreement_of_the_parties_with_respect_to_the_subject_matter_of_this_Agreement_and_supersedes_all_prior_negotiations_or_documentation_exchanged_between_the_parties_relating_to_this_subject_matter=2E_Any_additional_and/or_conflicting_terms_on_documents_issued_by_you_are_null=2C_void=2C_and_invalid=2E_Any_amendment_or_waiver_under_this_Agreement_shall_be_in_writing_and_signed_by_representatives_of_both_parties=2E?=
141
+ =?utf-8?q?_?=
142
+ =?utf-8?q?_If_the_distribution_terms_in_this_Agreement_are_not_suitable_for_your_organization=2C_or_for_any_questions_regarding_this_Agreement=2C_please_contact_NVIDIA_at_nvidia-compute-license-questions=40nvidia=2Ecom=2E?=
143
+ =?utf-8?q?_?=
144
+ =?utf-8?q?_=28v=2E_February_10=2C_2022=29?=
145
+ =?utf-8?q?_?=
146
+ =?utf-8?q?_---?=
147
+ =?utf-8?q?_?=
148
+ =?utf-8?q?_**Note=3A**_This_package_also_includes_third-party_components_with_their_respective_licenses_and_attributions=2E_Please_see_the_accompanying_=60Third=5Fparty=5Fattr=2Etxt=60_file_for_complete_third-party_license_information=2E?=
149
+ =?utf-8?q?_?=
150
+ Project-URL: Homepage, https://github.com/nvidia-digital-bio/subquadraticOps-docs
151
+ Project-URL: Documentation, https://nvidia-digital-bio.github.io/subquadraticOps-docs/index.html
152
+ Project-URL: Repository, https://github.com/nvidia-digital-bio/subquadraticOps-docs
153
+ Requires-Dist: scikit-build-core>=0.10
154
+ Requires-Dist: nanobind>=1.5.0
155
+ Requires-Dist: warp-lang>=1.8.0
156
+ Requires-Dist: nvidia-ml-py
157
+ Description-Content-Type: text/markdown
158
+
159
+ # subquadratic_ops_torch
160
+
161
+ ## Introduction
162
+
163
+ `subquadratic_ops_torch` provides CUDA kernels for subquadratic operations, e.g. long and short convolution.
164
+ It contains PyTorch bindings to optimized kernels.
165
+
166
+ ## Installation
167
+
168
+ Please install using `pip install subquadratic-ops-torch-cu[12,13]`
169
+
170
+ ## Documentation
171
+
172
+ For detailed usage information of the kernels, please refer to the docstrings
173
+ in their respective functions.
174
+
175
+ ## Usage
176
+
177
+ You can import the library from python:
178
+
179
+ ```python
180
+ import subquadratic_ops_torch as subq
181
+ ```
182
+
183
+ Kernels are primarily exposed as function calls underlying `torch.ops`, which also provide a lower-level
184
+ interface as `torch.library` operators.
185
+ This allows you to export models using these operations via `torch.export`
186
+ and run inference on them using TensorRT.
187
+
188
+
189
+ ## Support and Feedback
190
+
191
+ Please contact the developers for any issues you might encounter.
192
+
193
+
194
+ ## Requirements
195
+
196
+ - CUDA-compatible NVIDIA GPU (Ampere+)
197
+ - CUDA Toolkit 12.0 or higher
198
+ - Python 3.11-3.13
199
+
200
+ ## Modules
201
+
202
+ ### B2B CausalConv1d
203
+
204
+ #### Operation
205
+
206
+ Back-to-back causal conv1d for the Striped Hyena 2 architecture used in the Evo2 model.
207
+ The operation is performed in a causal manner, meaning each position only attends to previous positions in the sequence.
208
+ In code terms,
209
+ ```python
210
+ in_dim = 8192
211
+
212
+ width_proj = 8
213
+ width_mixer = 128
214
+
215
+ dtype = torch.float32
216
+
217
+ class Conv1DModel(nn.Module):
218
+ def __init__(self, in_dim, width, dtype, skip_bias=False):
219
+ super(Conv1DModel, self).__init__()
220
+
221
+ self.conv = nn.Conv1d(
222
+ in_dim,
223
+ in_dim,
224
+ width,
225
+ padding=width - 1,
226
+ groups=in_dim,
227
+ bias=False,
228
+ dtype=dtype,
229
+ device="cuda:0",
230
+ )
231
+ self.width = width
232
+ self.weight = self.conv.weight.reshape(-1, width)
233
+ if skip_bias:
234
+ self.skip_bias = nn.Parameter(torch.zeros(in_dim, dtype=dtype, device="cuda:0").reshape(1, -1, 1))
235
+ else:
236
+ self.skip_bias = None
237
+
238
+ def forward(self, x):
239
+ seqlen = x.shape[-1]
240
+ out = self.conv(x)
241
+ return out[..., :seqlen]
242
+
243
+ def model(x, conv1d_proj, conv1d_mixer):
244
+ xv = conv1d_proj(x)
245
+ z = xv[:,1::3, :] * xv[:, 2::3, :]
246
+ y = conv1d_mixer(z) + conv1d_mixer.skip_bias * z
247
+ return y * xv[:, ::3, :]
248
+ x = torch.randn(batch_size, 3*in_dim, seq_dim)
249
+ conv1d_proj = Conv1DModel(3*in_dim, width_proj)
250
+ conv1d_mixer = Conv1DModel(in_dim, width_mixer, True)
251
+
252
+ y = model(x, conv1d_proj, conv1d_mixer)
253
+ ```
254
+ is equivalent to,
255
+ ```python
256
+ weight_proj = torch.randn(3*in_dim, width_proj).to(dtype)
257
+ weight_mixer = torch.randn(in_dim, width_mixer).to(dtype)
258
+ skip_bias = torch.randn(in_dim).to(dtype)
259
+
260
+ b2b_causal_conv1d(x, weight_proj, weight_mixer, skip_bias)
261
+
262
+ ```
263
+
264
+ #### Supported Kernel Sizes for B2B Causal Conv1d
265
+
266
+ | Kernel Type | Supported Sizes |
267
+ |------------|-----------------|
268
+ | Projection | 2, 3, 4, 8, 16, 32 |
269
+ | Mixer | 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256 |
270
+
271
+ #### CausalConv1d
272
+
273
+ Causal conv1d: the convolution operation is performed in a causal manner, meaning each position only attends to previous positions in the sequence.
274
+ In code terms,
275
+ ```python
276
+ in_dim = 8192
277
+ width = 8
278
+ dtype = torch.float32
279
+ model = nn.Conv1d(
280
+ in_dim,
281
+ in_dim,
282
+ width,
283
+ padding=width - 1,
284
+ groups=in_dim,
285
+ bias=False,
286
+ dtype=dtype,
287
+ device="cuda:0",
288
+ )
289
+ y = model(x)
290
+ ```
291
+ is equivalent to,
292
+ ```python
293
+ weight = torch.randn((in_dim, width))
294
+ causal_conv1d(x, weight)
295
+ ```
296
+
297
+ #### Supported Kernel Sizes for Causal Conv1d
298
+
299
+ | Kernel Type | Supported Sizes | Channel Last |
300
+ |------------|-----------------|-----------|
301
+ | CausalConv1d | <= 256 | False |
302
+ | CausalConv1d | <= 128 (64 fp64) | True |
303
+
304
+
305
+
306
+ ### FFT Conv1d
307
+
308
+ Non-causal 1D convolution using real FFT. Supports sequences up to FFT size 8192.
309
+
310
+ ```python
311
+ from subquadratic_ops_torch.fft_conv1d import fft_conv1d
312
+
313
+ batch_size, dim, seq_len, filter_dim = 64, 128, 512, 1024
314
+ x = torch.randn(batch_size, dim, seq_len, device="cuda")
315
+ weight = torch.randn(dim, filter_dim, device="cuda")
316
+ y = fft_conv1d(x, weight) # shape: (64, 128, 512)
317
+ ```
318
+
319
+ ### FFT CausalConv1d
320
+
321
+
322
+ FFT Causal Conv1d: the convolution operation is performed in a causal manner, meaning each position only attends to previous positions in the sequence. It uses real FFT and IFFT instead of direct summation for convolution.
323
+ In code terms,
324
+ ```python
325
+ in_dim = 8192
326
+ width = 1024
327
+ dtype = torch.float32
328
+ weight = torch.randn(1, in_dim, width)
329
+ def model(x, w):
330
+ fft_size = x.shape[-1] * 2
331
+ xf = torch.fft.rfft(x, n=fft_size, dim=-1)
332
+ wf = torch.fft.rfft(w, n=fft_size, dim=-1)
333
+ return torch.fft.irfft(xf*wf, n=fft_size, dim=-1)[..., :x.shape[-1]]
334
+
335
+ y = model(x, weight)
336
+
337
+ ```
338
+ is equivalent to,
339
+ ```python
340
+ weight = torch.randn((in_dim, width))
341
+ y = fft_causal_conv1d(x, weight)
342
+
343
+ ```
@@ -0,0 +1,10 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2
3
+
4
+ [build-system]
5
+ requires = ["wheel-stub"]
6
+ build-backend = "wheel_stub.buildapi"
7
+
8
+ [tool.wheel_stub]
9
+ index_url = "https://pypi.nvidia.com/"
10
+ include_cuda_debuginfo = true