diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..41dfb0f --- /dev/null +++ b/.editorconfig @@ -0,0 +1,456 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +max_line_length = 120 +tab_width = 4 +ij_continuation_indent_size = 8 +ij_formatter_off_tag = @formatter:off +ij_formatter_on_tag = @formatter:on +ij_formatter_tags_enabled = true +ij_smart_tabs = false +ij_visual_guides = +ij_wrap_on_typing = false + +[.editorconfig] +ij_editorconfig_align_group_field_declarations = false +ij_editorconfig_space_after_colon = false +ij_editorconfig_space_after_comma = true +ij_editorconfig_space_before_colon = false +ij_editorconfig_space_before_comma = false +ij_editorconfig_spaces_around_assignment_operators = true + +[{*.c,*.c++,*.c++m,*.cc,*.ccm,*.cp,*.cpp,*.cppm,*.cu,*.cuh,*.cxx,*.cxxm,*.h,*.h++,*.hh,*.hp,*.hpp,*.hxx,*.i,*.icc,*.ii,*.inl,*.ino,*.ipp,*.ixx,*.m,*.mm,*.mxx,*.pch,*.tcc,*.tpp}] +ij_c_add_brief_tag = false +ij_c_add_getter_prefix = true +ij_c_add_setter_prefix = true +ij_c_align_dictionary_pair_values = false +ij_c_align_group_field_declarations = false +ij_c_align_init_list_in_columns = true +ij_c_align_multiline_array_initializer_expression = true +ij_c_align_multiline_assignment = true +ij_c_align_multiline_binary_operation = true +ij_c_align_multiline_chained_methods = false +ij_c_align_multiline_for = true +ij_c_align_multiline_ternary_operation = true +ij_c_array_initializer_comma_on_next_line = false +ij_c_array_initializer_new_line_after_left_brace = false +ij_c_array_initializer_right_brace_on_new_line = false +ij_c_array_initializer_wrap = normal +ij_c_assignment_wrap = off +ij_c_binary_operation_sign_on_next_line = false +ij_c_binary_operation_wrap = normal +ij_c_blank_lines_after_class_header = 0 +ij_c_blank_lines_after_imports = 1 +ij_c_blank_lines_around_class = 1 +ij_c_blank_lines_around_field = 0 +ij_c_blank_lines_around_field_in_interface = 0 +ij_c_blank_lines_around_method = 1 +ij_c_blank_lines_around_method_in_interface = 1 +ij_c_blank_lines_around_namespace = 0 +ij_c_blank_lines_around_properties_in_declaration = 0 +ij_c_blank_lines_around_properties_in_interface = 0 +ij_c_blank_lines_before_imports = 1 +ij_c_blank_lines_before_method_body = 0 +ij_c_block_brace_placement = end_of_line +ij_c_block_brace_style = end_of_line +ij_c_block_comment_at_first_column = true +ij_c_catch_on_new_line = false +ij_c_class_brace_style = end_of_line +ij_c_class_constructor_init_list_align_multiline = true +ij_c_class_constructor_init_list_comma_on_next_line = false +ij_c_class_constructor_init_list_new_line_after_colon = never +ij_c_class_constructor_init_list_new_line_before_colon = if_long +ij_c_class_constructor_init_list_wrap = normal +ij_c_copy_is_deep = false +ij_c_create_interface_for_categories = true +ij_c_declare_generated_methods = true +ij_c_description_include_member_names = true +ij_c_discharged_short_ternary_operator = false +ij_c_do_not_add_breaks = false +ij_c_do_while_brace_force = never +ij_c_else_on_new_line = false +ij_c_enum_constants_comma_on_next_line = false +ij_c_enum_constants_wrap = on_every_item +ij_c_for_brace_force = never +ij_c_for_statement_new_line_after_left_paren = false +ij_c_for_statement_right_paren_on_new_line = false +ij_c_for_statement_wrap = off +ij_c_function_brace_placement = end_of_line +ij_c_function_call_arguments_align_multiline = true +ij_c_function_call_arguments_align_multiline_pars = false +ij_c_function_call_arguments_comma_on_next_line = false +ij_c_function_call_arguments_new_line_after_lpar = false +ij_c_function_call_arguments_new_line_before_rpar = false +ij_c_function_call_arguments_wrap = normal +ij_c_function_non_top_after_return_type_wrap = normal +ij_c_function_parameters_align_multiline = true +ij_c_function_parameters_align_multiline_pars = false +ij_c_function_parameters_comma_on_next_line = false +ij_c_function_parameters_new_line_after_lpar = false +ij_c_function_parameters_new_line_before_rpar = false +ij_c_function_parameters_wrap = normal +ij_c_function_top_after_return_type_wrap = normal +ij_c_generate_additional_eq_operators = true +ij_c_generate_additional_rel_operators = true +ij_c_generate_class_constructor = true +ij_c_generate_comparison_operators_use_std_tie = false +ij_c_generate_instance_variables_for_properties = ask +ij_c_generate_operators_as_members = true +ij_c_header_guard_style_pattern = ${PROJECT_NAME}_${FILE_NAME}_${EXT} +ij_c_if_brace_force = never +ij_c_in_line_short_ternary_operator = true +ij_c_indent_block_comment = true +ij_c_indent_c_struct_members = 4 +ij_c_indent_case_from_switch = true +ij_c_indent_class_members = 4 +ij_c_indent_directive_as_code = false +ij_c_indent_implementation_members = 0 +ij_c_indent_inside_code_block = 4 +ij_c_indent_interface_members = 0 +ij_c_indent_interface_members_except_ivars_block = false +ij_c_indent_namespace_members = 4 +ij_c_indent_preprocessor_directive = 0 +ij_c_indent_visibility_keywords = 0 +ij_c_insert_override = true +ij_c_insert_virtual_with_override = false +ij_c_introduce_auto_consts = false +ij_c_introduce_auto_vars = false +ij_c_introduce_const_params = false +ij_c_introduce_const_vars = false +ij_c_introduce_constexpr_consts = false +ij_c_introduce_generate_property = false +ij_c_introduce_generate_synthesize = true +ij_c_introduce_globals_to_header = true +ij_c_introduce_prop_to_private_category = false +ij_c_introduce_static_consts = true +ij_c_introduce_use_ns_types = false +ij_c_ivars_prefix = _ +ij_c_ivars_suffix = +ij_c_keep_blank_lines_before_end = 2 +ij_c_keep_blank_lines_before_right_brace = 2 +ij_c_keep_blank_lines_in_code = 2 +ij_c_keep_blank_lines_in_declarations = 2 +ij_c_keep_case_expressions_in_one_line = false +ij_c_keep_control_statement_in_one_line = true +ij_c_keep_directive_at_first_column = true +ij_c_keep_first_column_comment = true +ij_c_keep_line_breaks = true +ij_c_keep_nested_namespaces_in_one_line = false +ij_c_keep_simple_blocks_in_one_line = true +ij_c_keep_simple_methods_in_one_line = true +ij_c_keep_structures_in_one_line = false +ij_c_lambda_capture_list_align_multiline = false +ij_c_lambda_capture_list_align_multiline_bracket = false +ij_c_lambda_capture_list_comma_on_next_line = false +ij_c_lambda_capture_list_new_line_after_lbracket = false +ij_c_lambda_capture_list_new_line_before_rbracket = false +ij_c_lambda_capture_list_wrap = off +ij_c_line_comment_add_space = false +ij_c_line_comment_at_first_column = true +ij_c_method_brace_placement = end_of_line +ij_c_method_call_arguments_align_by_colons = true +ij_c_method_call_arguments_align_multiline = false +ij_c_method_call_arguments_special_dictionary_pairs_treatment = true +ij_c_method_call_arguments_wrap = off +ij_c_method_call_chain_wrap = off +ij_c_method_parameters_align_by_colons = true +ij_c_method_parameters_align_multiline = false +ij_c_method_parameters_wrap = off +ij_c_namespace_brace_placement = end_of_line +ij_c_parentheses_expression_new_line_after_left_paren = false +ij_c_parentheses_expression_right_paren_on_new_line = false +ij_c_place_assignment_sign_on_next_line = false +ij_c_property_nonatomic = true +ij_c_put_ivars_to_implementation = true +ij_c_refactor_compatibility_aliases_and_classes = true +ij_c_refactor_properties_and_ivars = true +ij_c_release_style = ivar +ij_c_retain_object_parameters_in_constructor = true +ij_c_semicolon_after_method_signature = false +ij_c_shift_operation_align_multiline = true +ij_c_shift_operation_wrap = normal +ij_c_show_non_virtual_functions = false +ij_c_space_after_colon = true +ij_c_space_after_colon_in_foreach = true +ij_c_space_after_colon_in_selector = false +ij_c_space_after_comma = true +ij_c_space_after_cup_in_blocks = false +ij_c_space_after_dictionary_literal_colon = true +ij_c_space_after_for_semicolon = true +ij_c_space_after_init_list_colon = true +ij_c_space_after_method_parameter_type_parentheses = false +ij_c_space_after_method_return_type_parentheses = false +ij_c_space_after_pointer_in_declaration = false +ij_c_space_after_quest = true +ij_c_space_after_reference_in_declaration = false +ij_c_space_after_reference_in_rvalue = false +ij_c_space_after_structures_rbrace = true +ij_c_space_after_superclass_colon = true +ij_c_space_after_type_cast = true +ij_c_space_after_visibility_sign_in_method_declaration = true +ij_c_space_before_autorelease_pool_lbrace = true +ij_c_space_before_catch_keyword = true +ij_c_space_before_catch_left_brace = true +ij_c_space_before_catch_parentheses = true +ij_c_space_before_category_parentheses = true +ij_c_space_before_chained_send_message = true +ij_c_space_before_class_left_brace = true +ij_c_space_before_colon = true +ij_c_space_before_colon_in_foreach = false +ij_c_space_before_comma = false +ij_c_space_before_dictionary_literal_colon = false +ij_c_space_before_do_left_brace = true +ij_c_space_before_else_keyword = true +ij_c_space_before_else_left_brace = true +ij_c_space_before_export_lbrace = true +ij_c_space_before_for_left_brace = true +ij_c_space_before_for_parentheses = true +ij_c_space_before_for_semicolon = false +ij_c_space_before_if_left_brace = true +ij_c_space_before_if_parentheses = true +ij_c_space_before_init_list = false +ij_c_space_before_init_list_colon = true +ij_c_space_before_method_call_parentheses = false +ij_c_space_before_method_left_brace = true +ij_c_space_before_method_parentheses = false +ij_c_space_before_namespace_lbrace = true +ij_c_space_before_pointer_in_declaration = true +ij_c_space_before_property_attributes_parentheses = false +ij_c_space_before_protocols_brackets = true +ij_c_space_before_quest = true +ij_c_space_before_reference_in_declaration = true +ij_c_space_before_superclass_colon = true +ij_c_space_before_switch_left_brace = true +ij_c_space_before_switch_parentheses = true +ij_c_space_before_template_call_lt = false +ij_c_space_before_template_declaration_lt = false +ij_c_space_before_try_left_brace = true +ij_c_space_before_while_keyword = true +ij_c_space_before_while_left_brace = true +ij_c_space_before_while_parentheses = true +ij_c_space_between_adjacent_brackets = false +ij_c_space_between_operator_and_punctuator = false +ij_c_space_within_empty_array_initializer_braces = false +ij_c_spaces_around_additive_operators = true +ij_c_spaces_around_assignment_operators = true +ij_c_spaces_around_bitwise_operators = true +ij_c_spaces_around_equality_operators = true +ij_c_spaces_around_lambda_arrow = true +ij_c_spaces_around_logical_operators = true +ij_c_spaces_around_multiplicative_operators = true +ij_c_spaces_around_pm_operators = false +ij_c_spaces_around_relational_operators = true +ij_c_spaces_around_shift_operators = true +ij_c_spaces_around_unary_operator = false +ij_c_spaces_within_array_initializer_braces = false +ij_c_spaces_within_braces = true +ij_c_spaces_within_brackets = false +ij_c_spaces_within_cast_parentheses = false +ij_c_spaces_within_catch_parentheses = false +ij_c_spaces_within_category_parentheses = false +ij_c_spaces_within_empty_braces = false +ij_c_spaces_within_empty_function_call_parentheses = false +ij_c_spaces_within_empty_function_declaration_parentheses = false +ij_c_spaces_within_empty_lambda_capture_list_bracket = false +ij_c_spaces_within_empty_template_call_ltgt = false +ij_c_spaces_within_empty_template_declaration_ltgt = false +ij_c_spaces_within_for_parentheses = false +ij_c_spaces_within_function_call_parentheses = false +ij_c_spaces_within_function_declaration_parentheses = false +ij_c_spaces_within_if_parentheses = false +ij_c_spaces_within_lambda_capture_list_bracket = false +ij_c_spaces_within_method_parameter_type_parentheses = false +ij_c_spaces_within_method_return_type_parentheses = false +ij_c_spaces_within_parentheses = false +ij_c_spaces_within_property_attributes_parentheses = false +ij_c_spaces_within_protocols_brackets = false +ij_c_spaces_within_send_message_brackets = false +ij_c_spaces_within_structured_binding_list_bracket = false +ij_c_spaces_within_switch_parentheses = false +ij_c_spaces_within_template_call_ltgt = false +ij_c_spaces_within_template_declaration_ltgt = false +ij_c_spaces_within_template_double_gt = true +ij_c_spaces_within_while_parentheses = false +ij_c_special_else_if_treatment = true +ij_c_structured_binding_list_align_multiline = false +ij_c_structured_binding_list_align_multiline_bracket = false +ij_c_structured_binding_list_comma_on_next_line = false +ij_c_structured_binding_list_new_line_after_lbracket = false +ij_c_structured_binding_list_new_line_before_rbracket = false +ij_c_structured_binding_list_wrap = off +ij_c_superclass_list_after_colon = never +ij_c_superclass_list_align_multiline = true +ij_c_superclass_list_before_colon = if_long +ij_c_superclass_list_comma_on_next_line = false +ij_c_superclass_list_wrap = on_every_item +ij_c_tag_prefix_of_block_comment = at +ij_c_tag_prefix_of_line_comment = back_slash +ij_c_template_call_arguments_align_multiline = false +ij_c_template_call_arguments_align_multiline_pars = false +ij_c_template_call_arguments_comma_on_next_line = false +ij_c_template_call_arguments_new_line_after_lt = false +ij_c_template_call_arguments_new_line_before_gt = false +ij_c_template_call_arguments_wrap = off +ij_c_template_declaration_function_body_indent = false +ij_c_template_declaration_function_wrap = split_into_lines +ij_c_template_declaration_struct_body_indent = false +ij_c_template_declaration_struct_wrap = split_into_lines +ij_c_template_parameters_align_multiline = false +ij_c_template_parameters_align_multiline_pars = false +ij_c_template_parameters_comma_on_next_line = false +ij_c_template_parameters_new_line_after_lt = false +ij_c_template_parameters_new_line_before_gt = false +ij_c_template_parameters_wrap = off +ij_c_ternary_operation_signs_on_next_line = true +ij_c_ternary_operation_wrap = normal +ij_c_type_qualifiers_placement = before +ij_c_use_modern_casts = true +ij_c_use_setters_in_constructor = true +ij_c_while_brace_force = never +ij_c_while_on_new_line = false +ij_c_wrap_property_declaration = off + +[{*.cmake,CMakeLists.txt}] +ij_cmake_align_command_call_r_par = false +ij_cmake_align_control_flow_r_par = false +ij_cmake_align_multiline_parameters_in_calls = false +ij_cmake_force_commands_case = 2 +ij_cmake_keep_blank_lines_in_code = 2 +ij_cmake_space_before_for_parentheses = true +ij_cmake_space_before_if_parentheses = true +ij_cmake_space_before_method_call_parentheses = false +ij_cmake_space_before_method_parentheses = false +ij_cmake_space_before_while_parentheses = true +ij_cmake_spaces_within_for_parentheses = false +ij_cmake_spaces_within_if_parentheses = false +ij_cmake_spaces_within_method_call_parentheses = false +ij_cmake_spaces_within_method_parentheses = false +ij_cmake_spaces_within_while_parentheses = false + + +[{*.kt,*.kts}] +ij_kotlin_align_in_columns_case_branch = false +ij_kotlin_align_multiline_binary_operation = false +ij_kotlin_align_multiline_extends_list = false +ij_kotlin_align_multiline_method_parentheses = false +ij_kotlin_align_multiline_parameters = true +ij_kotlin_align_multiline_parameters_in_calls = false +ij_kotlin_allow_trailing_comma = false +ij_kotlin_allow_trailing_comma_on_call_site = false +ij_kotlin_assignment_wrap = normal +ij_kotlin_blank_lines_after_class_header = 0 +ij_kotlin_blank_lines_around_block_when_branches = 0 +ij_kotlin_blank_lines_before_declaration_with_comment_or_annotation_on_separate_line = 1 +ij_kotlin_block_comment_add_space = false +ij_kotlin_block_comment_at_first_column = true +ij_kotlin_call_parameters_new_line_after_left_paren = true +ij_kotlin_call_parameters_right_paren_on_new_line = true +ij_kotlin_call_parameters_wrap = on_every_item +ij_kotlin_catch_on_new_line = false +ij_kotlin_class_annotation_wrap = split_into_lines +ij_kotlin_code_style_defaults = KOTLIN_OFFICIAL +ij_kotlin_continuation_indent_for_chained_calls = false +ij_kotlin_continuation_indent_for_expression_bodies = false +ij_kotlin_continuation_indent_in_argument_lists = false +ij_kotlin_continuation_indent_in_elvis = false +ij_kotlin_continuation_indent_in_if_conditions = false +ij_kotlin_continuation_indent_in_parameter_lists = false +ij_kotlin_continuation_indent_in_supertype_lists = false +ij_kotlin_else_on_new_line = false +ij_kotlin_enum_constants_wrap = off +ij_kotlin_extends_list_wrap = normal +ij_kotlin_field_annotation_wrap = split_into_lines +ij_kotlin_finally_on_new_line = false +ij_kotlin_if_rparen_on_new_line = true +ij_kotlin_import_nested_classes = false +ij_kotlin_imports_layout = *,java.**,javax.**,kotlin.**,^ +ij_kotlin_insert_whitespaces_in_simple_one_line_method = true +ij_kotlin_keep_blank_lines_before_right_brace = 2 +ij_kotlin_keep_blank_lines_in_code = 2 +ij_kotlin_keep_blank_lines_in_declarations = 2 +ij_kotlin_keep_first_column_comment = true +ij_kotlin_keep_indents_on_empty_lines = false +ij_kotlin_keep_line_breaks = true +ij_kotlin_lbrace_on_next_line = false +ij_kotlin_line_break_after_multiline_when_entry = true +ij_kotlin_line_comment_add_space = false +ij_kotlin_line_comment_add_space_on_reformat = false +ij_kotlin_line_comment_at_first_column = true +ij_kotlin_method_annotation_wrap = split_into_lines +ij_kotlin_method_call_chain_wrap = normal +ij_kotlin_method_parameters_new_line_after_left_paren = true +ij_kotlin_method_parameters_right_paren_on_new_line = true +ij_kotlin_method_parameters_wrap = on_every_item +ij_kotlin_name_count_to_use_star_import = 5 +ij_kotlin_name_count_to_use_star_import_for_members = 3 +ij_kotlin_packages_to_use_import_on_demand = java.util.*,kotlinx.android.synthetic.**,io.ktor.** +ij_kotlin_parameter_annotation_wrap = off +ij_kotlin_space_after_comma = true +ij_kotlin_space_after_extend_colon = true +ij_kotlin_space_after_type_colon = true +ij_kotlin_space_before_catch_parentheses = true +ij_kotlin_space_before_comma = false +ij_kotlin_space_before_extend_colon = true +ij_kotlin_space_before_for_parentheses = true +ij_kotlin_space_before_if_parentheses = true +ij_kotlin_space_before_lambda_arrow = true +ij_kotlin_space_before_type_colon = false +ij_kotlin_space_before_when_parentheses = true +ij_kotlin_space_before_while_parentheses = true +ij_kotlin_spaces_around_additive_operators = true +ij_kotlin_spaces_around_assignment_operators = true +ij_kotlin_spaces_around_equality_operators = true +ij_kotlin_spaces_around_function_type_arrow = true +ij_kotlin_spaces_around_logical_operators = true +ij_kotlin_spaces_around_multiplicative_operators = true +ij_kotlin_spaces_around_range = false +ij_kotlin_spaces_around_relational_operators = true +ij_kotlin_spaces_around_unary_operator = false +ij_kotlin_spaces_around_when_arrow = true +ij_kotlin_variable_annotation_wrap = off +ij_kotlin_while_on_new_line = false +ij_kotlin_wrap_elvis_expressions = 1 +ij_kotlin_wrap_expression_body_functions = 1 +ij_kotlin_wrap_first_method_in_call_chain = false + +[{*.markdown,*.md}] +ij_markdown_force_one_space_after_blockquote_symbol = true +ij_markdown_force_one_space_after_header_symbol = true +ij_markdown_force_one_space_after_list_bullet = true +ij_markdown_force_one_space_between_words = true +ij_markdown_format_tables = true +ij_markdown_insert_quote_arrows_on_wrap = true +ij_markdown_keep_indents_on_empty_lines = false +ij_markdown_keep_line_breaks_inside_text_blocks = true +ij_markdown_max_lines_around_block_elements = 1 +ij_markdown_max_lines_around_header = 1 +ij_markdown_max_lines_between_paragraphs = 1 +ij_markdown_min_lines_around_block_elements = 1 +ij_markdown_min_lines_around_header = 1 +ij_markdown_min_lines_between_paragraphs = 1 +ij_markdown_wrap_text_if_long = true +ij_markdown_wrap_text_inside_blockquotes = true + +[{*.properties,spring.handlers,spring.schemas}] +ij_properties_align_group_field_declarations = false +ij_properties_keep_blank_lines = false +ij_properties_key_value_delimiter = equals +ij_properties_spaces_around_key_value_delimiter = false + +[{*.yaml,*.yml}] +indent_size = 2 +ij_yaml_align_values_properties = do_not_align +ij_yaml_autoinsert_sequence_marker = true +ij_yaml_block_mapping_on_new_line = false +ij_yaml_indent_sequence_value = true +ij_yaml_keep_indents_on_empty_lines = false +ij_yaml_keep_line_breaks = true +ij_yaml_sequence_on_new_line = false +ij_yaml_space_before_colon = false +ij_yaml_spaces_within_braces = true +ij_yaml_spaces_within_brackets = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5dbefb8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +### IDE +.idea/ + +### Gradle +.gradle/ +.kotlin/ +build/ + +### CMake +cmake-build-*/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..16006b4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +3-Clause BSD NON-AI License + +Copyright 2024 Martmists + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +4. The source code and the binary form, and any modifications made to them may not be used for the purpose of training or improving machine learning algorithms, +including but not limited to artificial intelligence, natural language processing, or data mining. This condition applies to any derivatives, +modifications, or updates based on the Software code. Any usage of the source code or the binary form in an AI-training dataset is considered a breach of this License. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5637456 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# NDArray.SIMD + +A Kotlin NDArray library with built-in SIMD support. + +### Installation + +Coming soon:tm: to my Maven repo. + +### Motivation + +I basically made this because [Viktor](https://github.com/JetBrains-Research/viktor) didn't really utilize their SIMD capabilities. + +As such, most of the JVM code for NDArray is mostly the same as Viktor. + +### License + +The nativeMain and jvmMain sourcesets are licensed under the [3-Clause BSD NON-AI License](https://github.com/non-ai-licenses/non-ai-licenses/blob/main/NON-AI-BSD3), with @Martmists-GH as the copyright holder. + +The commonMain sourceset is mostly copied/adapted from Viktor, and as such is licensed under the [original MIT license](https://github.com/JetBrains-Research/viktor/blob/master/LICENSE), with JetBrains BioLabs as the copyright holder. diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..6f268aa --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,148 @@ +import com.github.tomtzook.gcmake.tasks.CmakeBuildTask +import org.gradle.jvm.tasks.Jar +import org.jetbrains.kotlin.gradle.plugin.mpp.KotlinNativeTarget +import org.jetbrains.kotlin.gradle.plugin.mpp.KotlinNativeTargetWithSimulatorTests +import org.jetbrains.kotlin.gradle.targets.native.tasks.artifact.KotlinNativeLinkArtifactTask +import org.jetbrains.kotlin.gradle.tasks.KotlinNativeCompile +import org.jetbrains.kotlin.gradle.tasks.KotlinNativeLink + +plugins { + kotlin("multiplatform") version "2.0.0" + id("io.github.tomtzook.gradle-cmake") version "1.2.2" +} + +group = "com.martmists" +version = "1.0-SNAPSHOT" + +repositories { + mavenCentral() +} + +cmake { + targets { + val simd by creating { + cmakeLists = file("cmake/CMakeLists.txt") + + val linuxX64 by machines.customMachines.registering { + toolchainFile = file("cmake/toolchains/linux-x64.cmake") + } + val linuxArm64 by machines.customMachines.registering { + toolchainFile = file("cmake/toolchains/linux-arm64.cmake") + } + val mingwX64 by machines.customMachines.registering { + toolchainFile = file("cmake/toolchains/mingw-x64.cmake") + } + val mingwArm64 by machines.customMachines.registering { + toolchainFile = file("cmake/toolchains/mingw-arm64.cmake") + } + val macosX64 by machines.customMachines.registering { + toolchainFile = file("cmake/toolchains/macos-x64.cmake") + } + val macosArm64 by machines.customMachines.registering { + toolchainFile = file("cmake/toolchains/macos-arm64.cmake") + } + + if (project.hasProperty("production")) { + targetMachines.add(linuxX64) + targetMachines.add(linuxArm64) + targetMachines.add(mingwX64) +// targetMachines.add(mingwArm64) + targetMachines.add(macosX64) + targetMachines.add(macosArm64) + } else { + targetMachines.add(linuxX64) + } + + cmakeArgs = if (project.hasProperty("production")) { + listOf("-DCMAKE_BUILD_TYPE=Release") + } else { + listOf("-DCMAKE_BUILD_TYPE=Debug") + } + } + } +} + +kotlin { + jvm() + + val natives = if (project.hasProperty("production")) { + listOf( + linuxX64(), + linuxArm64(), + mingwX64(), +// mingwArm64(), + macosX64(), + macosArm64(), + ) + } else { + when (System.getProperty("os.name")) { + "Linux" -> listOf(linuxX64()) + "Windows" -> listOf(mingwX64()) + "Mac OS X" -> listOf(macosX64()) + else -> error("Unsupported OS") + } + } + + for (native in natives) { + native.apply { + binaries { + sharedLib { + baseName = "ndarray_simd" + } + } + + compilerOptions { + optIn = listOf( + "kotlin.experimental.ExperimentalNativeApi", + "kotlinx.cinterop.ExperimentalForeignApi", + ) + } + + compilations.named("main") { + val jni by cinterops.creating { + val javaHome = File(System.getProperty("java.home")!!) + defFile(project.projectDir.resolve("src/nativeMain/cinterops/jni.def")) + includeDirs( + javaHome.resolve("include"), + javaHome.resolve("include/linux"), + javaHome.resolve("include/darwin"), + javaHome.resolve("include/win32"), + ) + } + + val simd by cinterops.creating { + defFile(project.projectDir.resolve("src/nativeMain/cinterops/simd.def")) + includeDirs( + project.projectDir.resolve("src/lib"), + ) + + extraOpts("-libraryPath", projectDir.resolve("build/cmake/simd/${target.name}/").absolutePath) + } + } + } + } +} + +tasks { + val cmakeBuild by existing + + withType { + dependsOn(cmakeBuild) + } + + val jvmProcessResources by existing(Copy::class) { + val binaryName = if (project.hasProperty("production")) { + "releaseShared" + } else { + "debugShared" + } + + for (native in kotlin.targets.withType()) { + into("META-INF/natives/${native.targetName}") { + from(named(native.binaries.getByName(binaryName).linkTaskName)) { + exclude("**/*.h") + } + } + } + } +} diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt new file mode 100644 index 0000000..3e894cf --- /dev/null +++ b/cmake/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.30.0) +project(simd) + +file(GLOB_RECURSE source_files ../src/lib/cpp/*.cpp) + +include_directories(../src/lib/public) + +add_library( + simd STATIC ${source_files} +) diff --git a/cmake/arch/arm64.cmake b/cmake/arch/arm64.cmake new file mode 100644 index 0000000..0ca6d1d --- /dev/null +++ b/cmake/arch/arm64.cmake @@ -0,0 +1,2 @@ +set(CMAKE_SYSTEM_PROCESSOR aarch64) +set("CMAKE_C_FLAGS" "-march=armv8-a --no-standard-libraries") diff --git a/cmake/arch/x64.cmake b/cmake/arch/x64.cmake new file mode 100644 index 0000000..3f8b54f --- /dev/null +++ b/cmake/arch/x64.cmake @@ -0,0 +1,2 @@ +set(CMAKE_SYSTEM_PROCESSOR x86_64) +set("CMAKE_C_FLAGS" "-march=x86-64 --no-standard-libraries") diff --git a/cmake/platform/linux.cmake b/cmake/platform/linux.cmake new file mode 100644 index 0000000..3058d51 --- /dev/null +++ b/cmake/platform/linux.cmake @@ -0,0 +1,2 @@ +set(CMAKE_SYSTEM_NAME Linux) + diff --git a/cmake/platform/macos.cmake b/cmake/platform/macos.cmake new file mode 100644 index 0000000..8c0a861 --- /dev/null +++ b/cmake/platform/macos.cmake @@ -0,0 +1 @@ +set(CMAKE_SYSTEM_NAME Darwin) diff --git a/cmake/platform/windows.cmake b/cmake/platform/windows.cmake new file mode 100644 index 0000000..572f3fb --- /dev/null +++ b/cmake/platform/windows.cmake @@ -0,0 +1 @@ +set(CMAKE_SYSTEM_NAME Windows) diff --git a/cmake/toolchains/common.cmake b/cmake/toolchains/common.cmake new file mode 100644 index 0000000..612f58a --- /dev/null +++ b/cmake/toolchains/common.cmake @@ -0,0 +1,2 @@ +set(CMAKE_C_FLAGS "-O2 -fPIC") +set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}") diff --git a/cmake/toolchains/linux-arm64.cmake b/cmake/toolchains/linux-arm64.cmake new file mode 100644 index 0000000..c2cd4cd --- /dev/null +++ b/cmake/toolchains/linux-arm64.cmake @@ -0,0 +1,3 @@ +include("${CMAKE_CURRENT_LIST_DIR}/common.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../platform/linux.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../arch/arm64.cmake") diff --git a/cmake/toolchains/linux-x64.cmake b/cmake/toolchains/linux-x64.cmake new file mode 100644 index 0000000..dfae332 --- /dev/null +++ b/cmake/toolchains/linux-x64.cmake @@ -0,0 +1,3 @@ +include("${CMAKE_CURRENT_LIST_DIR}/common.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../platform/linux.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../arch/x64.cmake") diff --git a/cmake/toolchains/macos-arm64.cmake b/cmake/toolchains/macos-arm64.cmake new file mode 100644 index 0000000..a1e4401 --- /dev/null +++ b/cmake/toolchains/macos-arm64.cmake @@ -0,0 +1,3 @@ +include("${CMAKE_CURRENT_LIST_DIR}/common.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../platform/macos.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../arch/arm64.cmake") diff --git a/cmake/toolchains/macos-x64.cmake b/cmake/toolchains/macos-x64.cmake new file mode 100644 index 0000000..b2d4e5f --- /dev/null +++ b/cmake/toolchains/macos-x64.cmake @@ -0,0 +1,3 @@ +include("${CMAKE_CURRENT_LIST_DIR}/common.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../platform/macos.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../arch/x64.cmake") diff --git a/cmake/toolchains/mingw-arm64.cmake b/cmake/toolchains/mingw-arm64.cmake new file mode 100644 index 0000000..d02f294 --- /dev/null +++ b/cmake/toolchains/mingw-arm64.cmake @@ -0,0 +1,3 @@ +include("${CMAKE_CURRENT_LIST_DIR}/common.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../platform/windows.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../arch/arm64.cmake") diff --git a/cmake/toolchains/mingw-x64.cmake b/cmake/toolchains/mingw-x64.cmake new file mode 100644 index 0000000..c611168 --- /dev/null +++ b/cmake/toolchains/mingw-x64.cmake @@ -0,0 +1,3 @@ +include("${CMAKE_CURRENT_LIST_DIR}/common.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../platform/windows.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/../arch/x64.cmake") diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..4d40fac --- /dev/null +++ b/gradle.properties @@ -0,0 +1,9 @@ +org.gradle.parallel=true + +kotlin.code.style=official +kotlin.native.ignoreDisabledTargets=true +kotlin.mpp.enableCInteropCommonization=true +kotlin.mpp.enableCInteropCommonization.nowarn=true +kotlin.native.binary.sourceInfoType=libbacktrace + +kotlin.native.cacheKind.linuxX64=none diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..249e583 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..d681a49 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Mon Jul 15 14:03:33 CEST 2024 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..6fea4ec --- /dev/null +++ b/gradlew @@ -0,0 +1,234 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +APP_NAME="Gradle" +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the lib class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..ac1b06f --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..2188e23 --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,9 @@ +pluginManagement { + repositories { + gradlePluginPortal() + google() + } +} + +rootProject.name = "kt-ndarray-simd" + diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/F64Array.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/F64Array.kt new file mode 100644 index 0000000..ced145d --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/F64Array.kt @@ -0,0 +1,258 @@ +package com.martmists.ndarray.simd + +import com.martmists.ndarray.simd.impl.create +import com.martmists.ndarray.simd.impl.product +import com.martmists.ndarray.simd.impl.unsupported +import kotlin.jvm.JvmName +import kotlin.math.* + + +/** + * The default methods are naive implementations for anyone wishing to implement their own NDArrays. + * However, I would recommend using delegation to [F64ArrayImpl] for most use-cases. + */ +interface F64Array { + val data: DoubleArray + val offset: Int + val strides: IntArray + val shape: IntArray + val unrollDim: Int + val unrollStride: Int + val unrollSize: Int + + val nDim: Int + get() = shape.size + val length: Int + get() = shape[0] + val isFlattenable: Boolean + + fun checkShape(other: F64Array): F64Array { + check(this === other || shape.contentEquals(other.shape)) { + "operands shapes do not match: ${shape.contentToString()} vs ${other.shape.contentToString()}" + } + return other + } + + operator fun get(vararg indices: Int): Double + operator fun get(r: Int, c: Int): Double + operator fun get(d: Int, r: Int, c: Int): Double + + operator fun set(vararg indices: Int, value: Double) + operator fun set(r: Int, c: Int, value: Double) + operator fun set(d: Int, r: Int, c: Int, value: Double) + + fun along(axis: Int): Sequence = (0 until shape[axis]).asSequence().map { view(it, axis) } + fun view(index: Int, axis: Int = 0): F64Array = unsupported() + + val V: Viewer + + fun copy(): F64Array = F64Array.create(data.copyOf(), offset, strides.copyOf(), shape.copyOf()) + fun copyTo(other: F64Array) = other.zipTransformInPlace(this) { _, d -> d } + fun reshape(vararg shape: Int): F64Array = flatten().reshape(*shape) + fun flatten(): F64FlatArray = unsupported() + fun slice(from: Int = 0, to: Int = -1, step: Int = 1, axis: Int = 0): F64Array + operator fun contains(other: Double): Boolean + fun fill(value: Double) = transformInPlace { value } + fun reorder(indices: IntArray, axis: Int = 0): Unit = unsupported() + infix fun dot(other: F64Array): Double = unsupported() + fun sum(): Double = reduce { acc, d -> acc + d } + fun min(): Double = fold(Double.POSITIVE_INFINITY) { acc, d -> if (d < acc) d else acc } + fun max(): Double = fold(Double.NEGATIVE_INFINITY) { acc, d -> if (d > acc) d else acc } + fun product(): Double = reduce { acc, d -> acc * d } + fun mean(): Double = sum() / shape.product() + fun variance(): Double = fold(0.0) { acc, d -> acc + (d - mean()).pow(2) } / shape.product() + fun stdDev(): Double = sqrt(variance()) + fun cumSumInPlace() { + var sum = 0.0 + transformInPlace { sum += it; sum } + } + fun cumSum(): F64Array = copy().apply { cumSumInPlace() } + fun coerceInPlace(min: Double, max: Double) = transformInPlace { it.coerceIn(min, max) } + fun coerce(min: Double, max: Double): F64Array = copy().apply { coerceInPlace(min, max) } + fun transformInPlace(transform: (Double) -> Double) + fun transform(transform: (Double) -> Double): F64Array = copy().apply { transformInPlace(transform) } + fun zipTransformInPlace(other: F64Array, transform: (Double, Double) -> Double) + fun zipTransform(other: F64Array, transform: (Double, Double) -> Double): F64Array = copy().apply { zipTransformInPlace(other, transform) } + fun fold(initial: T, operation: (acc: T, Double) -> T): T + fun reduce(operation: (Double, Double) -> Double): Double + fun expInPlace() = transformInPlace(::exp) + fun exp(): F64Array = copy().apply { expInPlace() } + fun expm1InPlace() = transformInPlace(::expm1) + fun expm1(): F64Array = copy().apply { expm1InPlace() } + fun logInPlace() = transformInPlace(::ln) + fun log(): F64Array = copy().apply { logInPlace() } + fun log1pInPlace() = transformInPlace { ln(1 + it) } + fun log1p(): F64Array = copy().apply { log1pInPlace() } + fun log2InPlace() = transformInPlace(::log2) + fun log2(): F64Array = copy().apply { log2InPlace() } + fun log10InPlace() = transformInPlace(::log10) + fun log10(): F64Array = copy().apply { log10InPlace() } + fun logBaseInPlace(base: Double) = transformInPlace { log2(it) / log2(base) } // On some systems this is fastest, on others it's slowest? + fun logBase(base: Double): F64Array = copy().apply { logBaseInPlace(base) } + fun sqrtInPlace() = transformInPlace(::sqrt) + fun sqrt(): F64Array = copy().apply { sqrtInPlace() } + fun powInPlace(power: Double) = transformInPlace { it.pow(power) } + fun pow(power: Double): F64Array = copy().apply { powInPlace(power) } + fun ipowInPlace(base: Double) = transformInPlace { base.pow(it) } + fun ipow(base: Double): F64Array = copy().apply { ipowInPlace(base) } + + operator fun unaryPlus(): F64Array = this + fun unaryMinusInPlace() = transformInPlace(Double::unaryMinus) + operator fun unaryMinus(): F64Array = copy().apply { unaryMinusInPlace() } + operator fun plusAssign(other: F64Array) = zipTransformInPlace(other) { a, b -> a + b } + operator fun plus(other: F64Array): F64Array = copy().apply { plusAssign(other) } + operator fun plusAssign(other: Double) = transformInPlace { it + other } + operator fun plus(other: Double): F64Array = copy().apply { plusAssign(other) } + operator fun minusAssign(other: F64Array) = zipTransformInPlace(other) { a, b -> a - b } + operator fun minus(other: F64Array): F64Array = copy().apply { minusAssign(other) } + operator fun minusAssign(other: Double) = transformInPlace { it - other } + operator fun minus(other: Double): F64Array = copy().apply { minusAssign(other) } + operator fun timesAssign(other: F64Array) = zipTransformInPlace(other) { a, b -> a * b } + operator fun times(other: F64Array): F64Array = copy().apply { timesAssign(other) } + operator fun timesAssign(other: Double) = transformInPlace { it * other } + operator fun times(other: Double): F64Array = copy().apply { timesAssign(other) } + operator fun divAssign(other: F64Array) = zipTransformInPlace(other) { a, b -> a / b } + operator fun div(other: F64Array): F64Array = copy().apply { divAssign(other) } + operator fun divAssign(other: Double) = transformInPlace { it / other } + operator fun div(other: Double): F64Array = copy().apply { divAssign(other) } + fun absInPlace() = transformInPlace(::abs) + fun abs(): F64Array = copy().apply { absInPlace() } + + fun ltInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a < b) 1.0 else 0.0 } + infix fun lt(other: F64Array): F64Array = copy().apply { ltInPlace(other) } + fun ltInPlace(other: Double) = transformInPlace { if (it < other) 1.0 else 0.0 } + infix fun lt(other: Double): F64Array = copy().apply { ltInPlace(other) } + fun lteInPlace(other: F64Array) { + gtInPlace(other) + xorInPlace(1) + } + infix fun lte(other: F64Array): F64Array = copy().apply { lteInPlace(other) } + fun lteInPlace(other: Double) { + gtInPlace(other) + xorInPlace(1) + } + infix fun lte(other: Double): F64Array = copy().apply { lteInPlace(other) } + fun gtInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a > b) 1.0 else 0.0 } + infix fun gt(other: F64Array): F64Array = copy().apply { gtInPlace(other) } + fun gtInPlace(other: Double) = transformInPlace { if (it > other) 1.0 else 0.0 } + infix fun gt(other: Double): F64Array = copy().apply { gtInPlace(other) } + fun gteInPlace(other: F64Array) { + ltInPlace(other) + xorInPlace(1) + } + infix fun gte(other: F64Array): F64Array = copy().apply { gteInPlace(other) } + fun gteInPlace(other: Double) { + ltInPlace(other) + xorInPlace(1) + } + infix fun gte(other: Double): F64Array = copy().apply { gteInPlace(other) } + fun eqInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a == b) 1.0 else 0.0 } + infix fun eq(other: F64Array): F64Array = copy().apply { eqInPlace(other) } + fun eqInPlace(other: Double) = transformInPlace { if (it == other) 1.0 else 0.0 } + infix fun eq(other: Double): F64Array = copy().apply { eqInPlace(other) } + fun neqInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a != b) 1.0 else 0.0 } + infix fun neq(other: F64Array): F64Array = copy().apply { neqInPlace(other) } + fun neqInPlace(other: Double) = transformInPlace { if (it != other) 1.0 else 0.0 } + infix fun neq(other: Double): F64Array = copy().apply { neqInPlace(other) } + + fun andInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a != 0.0 && b != 0.0) 1.0 else 0.0 } + infix fun and(other: F64Array): F64Array = copy().apply { andInPlace(other) } + fun andInPlace(other: Int) = transformInPlace { (it.toInt() and other).toDouble() } + infix fun and(other: Int): F64Array = copy().apply { andInPlace(other) } + fun orInPlace(other: F64Array) = transformInPlace { (it.toInt() or other[it.toInt()].toInt()).toDouble() } + infix fun or(other: F64Array): F64Array = copy().apply { orInPlace(other) } + fun orInPlace(other: Int) = transformInPlace { (it.toInt() or other).toDouble() } + infix fun or(other: Int): F64Array = copy().apply { orInPlace(other) } + fun xorInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a != b) 1.0 else 0.0 } + infix fun xor(other: F64Array): F64Array = copy().apply { xorInPlace(other) } + fun xorInPlace(other: Int) = transformInPlace { (it.toInt() xor other).toDouble() } + infix fun xor(other: Int): F64Array = copy().apply { xorInPlace(other) } + fun notInPlace() = transformInPlace { it.toInt().inv().toDouble() } + fun not(): F64Array = copy().apply { notInPlace() } + fun shlInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> (a.toInt() shl b.toInt()).toDouble() } + infix fun shl(other: F64Array): F64Array = copy().apply { shlInPlace(other) } + fun shlInPlace(other: Int) = transformInPlace { (it.toInt() shl other).toDouble() } + infix fun shl(other: Int): F64Array = copy().apply { shlInPlace(other) } + fun shrInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> (a.toInt() shr b.toInt()).toDouble() } + infix fun shr(other: F64Array): F64Array = copy().apply { shrInPlace(other) } + fun shrInPlace(other: Int) = transformInPlace { (it.toInt() shr other).toDouble() } + infix fun shr(other: Int): F64Array = copy().apply { shrInPlace(other) } + + fun sinInPlace() = transformInPlace(::sin) + fun sin(): F64Array = copy().apply { sinInPlace() } + fun cosInPlace() = transformInPlace(::cos) + fun cos(): F64Array = copy().apply { cosInPlace() } + fun tanInPlace() = transformInPlace(::tan) + fun tan(): F64Array = copy().apply { tanInPlace() } + fun asinInPlace() = transformInPlace(::asin) + fun asin(): F64Array = copy().apply { asinInPlace() } + fun acosInPlace() = transformInPlace(::acos) + fun acos(): F64Array = copy().apply { acosInPlace() } + fun atanInPlace() = transformInPlace(::atan) + fun atan(): F64Array = copy().apply { atanInPlace() } + fun atan2InPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> atan2(a, b) } + fun atan2(other: F64Array): F64Array = copy().apply { atan2InPlace(other) } + fun sinhInPlace() = transformInPlace(::sinh) + fun sinh(): F64Array = copy().apply { sinhInPlace() } + fun coshInPlace() = transformInPlace(::cosh) + fun cosh(): F64Array = copy().apply { coshInPlace() } + fun tanhInPlace() = transformInPlace(::tanh) + fun tanh(): F64Array = copy().apply { tanhInPlace() } + fun asinhInPlace() = transformInPlace(::asinh) + fun asinh(): F64Array = copy().apply { asinhInPlace() } + fun acoshInPlace() = transformInPlace(::acosh) + fun acosh(): F64Array = copy().apply { acoshInPlace() } + fun atanhInPlace() = transformInPlace(::atanh) + fun atanh(): F64Array = copy().apply { atanhInPlace() } + fun hypotInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> hypot(a, b) } + fun hypot(other: F64Array): F64Array = copy().apply { hypotInPlace(other) } + + infix fun matmul(other: F64Array): F64Array = unsupported() + + fun toDoubleArray(): DoubleArray = unsupported() + + companion object { + val simdSize by lazy { NativeSpeedup.getSimdSize() * 2 } + + operator fun invoke(vararg shape: Int) = F64FlatArray.create(DoubleArray(shape.product())).reshape(*shape) + operator fun invoke(size: Int, init: (Int) -> Double) = F64Array(size).apply { + for (i in 0 until size) { + this[i] = init(i) + } + } + operator fun invoke(numRows: Int, numColumns: Int, init: (Int, Int) -> Double) = F64Array(numRows, numColumns).apply { + for (r in 0 until numRows) { + for (c in 0 until numColumns) { + this[r, c] = init(r, c) + } + } + } + + fun of(vararg values: Double) = F64FlatArray.of(values) + + @JvmName("ofArray") + fun of(data: DoubleArray) = F64FlatArray.of(data) + + fun full(shape: IntArray, init: Double): F64Array { + return F64FlatArray.create(DoubleArray(shape.product()).apply { fill(init) }).reshape(*shape) + } + + fun identity(n: Int): F64Array = zeros(intArrayOf(n, n)).apply { + for (i in 0 until n) { + this[i, i] = 1.0 + } + } + + fun diagonal(values: DoubleArray): F64Array { + val n = values.size + val result = zeros(intArrayOf(n, n)) + for (i in 0 until n) { + result[i, i] = values[i] + } + return result + } + + fun zeros(shape: IntArray): F64Array = full(shape, 0.0) + fun ones(shape: IntArray): F64Array = full(shape, 1.0) + } +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/F64FlatArray.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/F64FlatArray.kt new file mode 100644 index 0000000..51a09f1 --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/F64FlatArray.kt @@ -0,0 +1,30 @@ +package com.martmists.ndarray.simd + +import com.martmists.ndarray.simd.impl.create + +/** + * A 1D specialization type for [F64Array]. + */ +interface F64FlatArray : F64Array { + override val isFlattenable: Boolean + get() = true + + override fun checkShape(other: F64Array): F64FlatArray { + check(this === other || (other is F64FlatArray && shape[0] == other.shape[0])) { + "operands shapes do not match: ${shape.contentToString()} vs ${other.shape.contentToString()}" + } + return other as F64FlatArray + } + + override fun flatten(): F64FlatArray = this + + operator fun get(pos: Int): Double + operator fun set(pos: Int, value: Double) + + fun argMin(): Int = (0 until length).minBy(::get) + fun argMax(): Int = (0 until length).maxBy(::get) + + companion object { + fun of(data: DoubleArray): F64FlatArray = F64FlatArray.create(data) + } +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.kt new file mode 100644 index 0000000..ccf5ccd --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.kt @@ -0,0 +1,76 @@ +package com.martmists.ndarray.simd + +internal expect object NativeSpeedup { + fun vecAddVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecAddScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecSubVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecSubScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecMulVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecMulScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecDivVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecDivScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecNegate(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAbs(a: DoubleArray, aOffset: Int, aSize: Int) + + fun vecAndVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecAndScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + fun vecOrVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecOrScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + fun vecXorVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecXorScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + fun vecNot(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecLShiftVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecLShiftScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + fun vecRShiftVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecRShiftScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + + fun vecEqVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecEqScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecNeqVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecNeqScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecLtVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecLtScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecGtVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecGtScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + + fun vecSqrt(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecPow(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun veciPow(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecLog(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecLogBase(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + fun vecExp(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecExpm1(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecLog1p(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecLog2(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecLog10(a: DoubleArray, aOffset: Int, aSize: Int) + + fun vecCopy(dest: DoubleArray, destOffset: Int, destSize: Int, src: DoubleArray, srcOffset: Int) + fun getSimdSize(): Int + + fun vecSum(a: DoubleArray, aOffset: Int, aSize: Int): Double + fun vecMin(a: DoubleArray, aOffset: Int, aSize: Int): Double + fun vecMax(a: DoubleArray, aOffset: Int, aSize: Int): Double + fun vecProduct(a: DoubleArray, aOffset: Int, aSize: Int): Double + fun vecMean(a: DoubleArray, aOffset: Int, aSize: Int): Double + fun vecVariance(a: DoubleArray, aOffset: Int, aSize: Int): Double + fun vecStdDev(a: DoubleArray, aOffset: Int, aSize: Int): Double + fun vecCoerce(a: DoubleArray, aOffset: Int, aSize: Int, min: Double, max: Double) + + fun vecSin(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecCos(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecTan(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAsin(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAcos(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAtan(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAtan2(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + fun vecSinh(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecCosh(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecTanh(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAsinh(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAcosh(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecAtanh(a: DoubleArray, aOffset: Int, aSize: Int) + fun vecHypot(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + + fun vecDot(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int): Double + fun vecMatMul(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int, n: Int, m: Int, p: Int): DoubleArray +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/Viewer.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/Viewer.kt new file mode 100644 index 0000000..4c7b02a --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/Viewer.kt @@ -0,0 +1,12 @@ +package com.martmists.ndarray.simd + +interface Viewer { + operator fun get(vararg indices: Int): F64Array + operator fun set(vararg indices: Int, other: F64Array) + operator fun set(vararg indices: Int, init: Double) + operator fun set(any: _I, other: F64Array) + operator fun set(any: _I, other: Double) + operator fun get(any: _I, c: Int): F64Array + operator fun set(any: _I, c: Int, other: F64Array) + operator fun set(any: _I, c: Int, init: Double) +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/_I.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/_I.kt new file mode 100644 index 0000000..d2621d3 --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/_I.kt @@ -0,0 +1,3 @@ +package com.martmists.ndarray.simd + +object _I diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/compat.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/compat.kt new file mode 100644 index 0000000..95ccc85 --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/compat.kt @@ -0,0 +1,10 @@ +package com.martmists.ndarray.simd + +operator fun Double.plus(arr: F64Array): F64Array = arr.plus(this) +operator fun Double.minus(arr: F64Array): F64Array = arr.minus(this) +operator fun Double.times(arr: F64Array): F64Array = arr.times(this) +operator fun Double.div(arr: F64Array): F64Array = arr.div(this) + +fun Double.pow(arr: F64Array): F64Array = arr.ipow(this) + +fun DoubleArray.toF64Array() = F64Array.of(this) diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64ArrayImpl.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64ArrayImpl.kt new file mode 100644 index 0000000..7fde269 --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64ArrayImpl.kt @@ -0,0 +1,436 @@ +package com.martmists.ndarray.simd.impl + +import com.martmists.ndarray.simd.* + +internal open class F64ArrayImpl internal constructor( + override val data: DoubleArray, + override val offset: Int, + override val strides: IntArray, + override val shape: IntArray, + override val unrollDim: Int, + override val unrollStride: Int, + override val unrollSize: Int +) : F64Array { + override val isFlattenable = unrollDim == nDim + + protected inline fun F64Array.unsafeIndex(r: Int, c: Int): Int { + return offset + r * strides[0] + c * strides[1] + } + + protected inline fun F64Array.unsafeIndex(d: Int, r: Int, c: Int): Int { + return offset + d * strides[0] + r * strides[1] + c * strides[2] + } + + protected inline fun F64Array.unsafeIndex(indices: IntArray): Int { + return strides.foldIndexed(offset) { i, acc, stride -> acc + indices[i] * stride } + } + + override fun get(vararg indices: Int): Double { + check(indices.size == nDim) { "broadcasting get is not supported" } + for (d in 0 until nDim) { + checkIndex("index", indices[d], shape[d]) + } + return data[unsafeIndex(indices)] + } + + override fun get(r: Int, c: Int): Double { + check(nDim == 2) { "broadcasting get is not supported" } + checkIndex("row", r, shape[0]) + checkIndex("column", c, shape[1]) + return data[unsafeIndex(r, c)] + } + + override fun get(d: Int, r: Int, c: Int): Double { + check(nDim == 3) { "broadcasting get is not supported" } + checkIndex("depth", d, shape[0]) + checkIndex("row", r, shape[1]) + checkIndex("column", c, shape[2]) + return data[unsafeIndex(d, r, c)] + } + + override fun set(vararg indices: Int, value: Double) { + check(indices.size == nDim) { "broadcasting set is not supported" } + for (d in 0 until nDim) { + checkIndex("index", indices[d], shape[d]) + } + data[unsafeIndex(indices)] = value + } + + override operator fun set(r: Int, c: Int, value: Double) { + check(nDim == 2) { "broadcasting set is not supported" } + checkIndex("row", r, shape[0]) + checkIndex("column", c, shape[1]) + data[unsafeIndex(r, c)] = value + } + + override operator fun set(d: Int, r: Int, c: Int, value: Double) { + check(nDim == 3) { "broadcasting set is not supported" } + checkIndex("depth", d, shape[0]) + checkIndex("row", r, shape[1]) + checkIndex("column", c, shape[2]) + data[unsafeIndex(d, r, c)] = value + } + + override fun view(index: Int, axis: Int): F64Array { + checkIndex("axis", axis, nDim) + checkIndex("index", index, shape[axis]) + return F64Array.create( + data, offset + strides[axis] * index, + strides.remove(axis), shape.remove(axis) + ) + } + + override val V: Viewer by lazy(LazyThreadSafetyMode.PUBLICATION) { ViewerImpl(this) } + + private class ViewerImpl(private val a: F64Array) : Viewer { + override fun get(vararg indices: Int): F64Array = a.view0(indices) + + override fun get(any: _I, c: Int): F64Array = a.view(c, axis=1) + + override fun set(vararg indices: Int, other: F64Array) { + other.copyTo(a.view0(indices)) + } + + override fun set(vararg indices: Int, init: Double) { + a.view0(indices).fill(init) + } + + override fun set(any: _I, other: F64Array) { + other.copyTo(a) + } + + override fun set(any: _I, other: Double) { + a.fill(other) + } + + override fun set(any: _I, c: Int, other: F64Array) { + other.copyTo(a.view(c, axis=1)) + } + + override fun set(any: _I, c: Int, init: Double) { + a.view(c, axis=1).fill(init) + } + } + + override fun copy(): F64Array { + return F64Array.full(shape, 0.0).also { this.copyTo(it) } + } + + override fun copyTo(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.copyTo(b) } + } + + override fun flatten(): F64FlatArray { + check(isFlattenable) { "array can't be flattened" } + return F64FlatArray.create(data, offset, unrollStride, unrollSize) + } + + override fun slice(from: Int, to: Int, step: Int, axis: Int): F64Array { + require(step > 0) { "slicing step must be positive, but was $step" } + require(axis in 0 until nDim) { "axis out of bounds: $axis" } + require(from >= 0) { "slicing start index must be positive, but was $from" } + val actualTo = if (to != -1) { + require(to > from) { "slicing end index $to must be greater than start index $from" } + check(to <= shape[axis]) { "slicing end index out of bounds: $to > ${shape[axis]}" } + to + } else { + check(shape[axis] > from) { "slicing start index out of bounds: $from >= ${shape[axis]}" } + shape[axis] + } + + val sliceStrides = strides.copyOf().apply { this[axis] *= step } + val sliceShape = shape.copyOf().apply { + this[axis] = (actualTo - from + step - 1) / step + } + return F64Array.create(data, offset + from * strides[axis], sliceStrides, sliceShape) + } + + override fun contains(other: Double): Boolean = unrollToFlat().any { it.contains(other) } + + override fun fill(value: Double) = flatten().fill(value) + + override fun reorder(indices: IntArray, axis: Int) { + reorderInternal( + this, indices, axis, + get = { pos -> view(pos, axis).copy() }, + set = { pos, value -> value.copyTo(view(pos, axis)) } + ) + } + + override fun sum(): Double = unrollToFlat().map { it.sum() }.sum() + + override fun min(): Double = unrollToFlat().map { it.min() }.minOrNull() ?: Double.POSITIVE_INFINITY + + override fun max(): Double = unrollToFlat().map { it.max() }.maxOrNull() ?: Double.NEGATIVE_INFINITY + + override fun product(): Double = unrollToFlat().map { it.product() }.reduce(Double::times) + + override fun coerceInPlace(min: Double, max: Double) { + unrollToFlat().forEach { it.coerceInPlace(min, max) } + } + + override fun transformInPlace(transform: (Double) -> Double) { + unrollToFlat().forEach { it.transformInPlace(transform) } + } + + override fun zipTransformInPlace(other: F64Array, transform: (Double, Double) -> Double) { + commonUnrollToFlat(other) { a, b -> a.zipTransformInPlace(b, transform) } + } + + override fun fold(initial: T, operation: (acc: T, Double) -> T): T { + if (isFlattenable) { + return flatten().fold(initial, operation) + } + return unrollToFlat().fold(initial) { acc, f64FlatArray -> f64FlatArray.fold(acc, operation) } + } + + override fun reduce(operation: (Double, Double) -> Double): Double { + if (isFlattenable) { + return flatten().reduce(operation) + } + val sequence = unrollToFlat() + val initial = sequence.first().reduce(operation) + return sequence.drop(1).fold(initial) { acc, f64FlatArray -> f64FlatArray.fold(acc, operation) } + } + + override fun expInPlace() { + unrollToFlat().forEach { it.expInPlace() } + } + + override fun expm1InPlace() { + unrollToFlat().forEach { it.expm1InPlace() } + } + + override fun logInPlace() { + unrollToFlat().forEach { it.logInPlace() } + } + + override fun log1pInPlace() { + unrollToFlat().forEach { it.log1pInPlace() } + } + + override fun log2InPlace() { + unrollToFlat().forEach { it.log2InPlace() } + } + + override fun log10InPlace() { + unrollToFlat().forEach { it.log10InPlace() } + } + + override fun logBaseInPlace(base: Double) { + unrollToFlat().forEach { it.logBaseInPlace(base) } + } + + override fun sqrtInPlace() { + unrollToFlat().forEach { it.sqrtInPlace() } + } + + override fun powInPlace(power: Double) { + unrollToFlat().forEach { it.powInPlace(power) } + } + + override fun ipowInPlace(base: Double) { + unrollToFlat().forEach { it.ipowInPlace(base) } + } + + override fun unaryMinusInPlace() { + unrollToFlat().forEach { it.unaryMinusInPlace() } + } + + override fun plusAssign(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.plusAssign(b) } + } + + override fun plusAssign(other: Double) { + unrollToFlat().forEach { it.plusAssign(other) } + } + + override fun minusAssign(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.minusAssign(b) } + } + + override fun minusAssign(other: Double) { + unrollToFlat().forEach { it.minusAssign(other) } + } + + override fun timesAssign(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.timesAssign(b) } + } + + override fun timesAssign(other: Double) { + unrollToFlat().forEach { it.timesAssign(other) } + } + + override fun divAssign(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.divAssign(b) } + } + + override fun divAssign(other: Double) { + unrollToFlat().forEach { it.divAssign(other) } + } + + override fun absInPlace() { + unrollToFlat().forEach { it.absInPlace() } + } + + override fun ltInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.ltInPlace(b) } + } + + override fun ltInPlace(other: Double) { + unrollToFlat().forEach { it.ltInPlace(other) } + } + + override fun gtInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.gtInPlace(b) } + } + + override fun gtInPlace(other: Double) { + unrollToFlat().forEach { it.gtInPlace(other) } + } + + override fun eqInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.eqInPlace(b) } + } + + override fun eqInPlace(other: Double) { + unrollToFlat().forEach { it.eqInPlace(other) } + } + + override fun neqInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.neqInPlace(b) } + } + + override fun neqInPlace(other: Double) { + unrollToFlat().forEach { it.neqInPlace(other) } + } + + override fun andInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.andInPlace(b) } + } + + override fun andInPlace(other: Int) { + unrollToFlat().forEach { it.andInPlace(other) } + } + + override fun orInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.orInPlace(b) } + } + + override fun orInPlace(other: Int) { + unrollToFlat().forEach { it.orInPlace(other) } + } + + override fun xorInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.xorInPlace(b) } + } + + override fun xorInPlace(other: Int) { + unrollToFlat().forEach { it.xorInPlace(other) } + } + + override fun notInPlace() { + unrollToFlat().forEach { it.notInPlace() } + } + + override fun shlInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.shlInPlace(b) } + } + + override fun shlInPlace(other: Int) { + unrollToFlat().forEach { it.shlInPlace(other) } + } + + override fun shrInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.shrInPlace(b) } + } + + override fun shrInPlace(other: Int) { + unrollToFlat().forEach { it.shrInPlace(other) } + } + + override fun sinInPlace() { + unrollToFlat().forEach { it.sinInPlace() } + } + + override fun cosInPlace() { + unrollToFlat().forEach { it.cosInPlace() } + } + + override fun tanInPlace() { + unrollToFlat().forEach { it.tanInPlace() } + } + + override fun asinInPlace() { + unrollToFlat().forEach { it.asinInPlace() } + } + + override fun acosInPlace() { + unrollToFlat().forEach { it.acosInPlace() } + } + + override fun atanInPlace() { + unrollToFlat().forEach { it.atanInPlace() } + } + + override fun atan2InPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.atan2InPlace(b) } + } + + override fun sinhInPlace() { + unrollToFlat().forEach { it.sinhInPlace() } + } + + override fun coshInPlace() { + unrollToFlat().forEach { it.coshInPlace() } + } + + override fun tanhInPlace() { + unrollToFlat().forEach { it.tanhInPlace() } + } + + override fun asinhInPlace() { + unrollToFlat().forEach { it.asinhInPlace() } + } + + override fun acoshInPlace() { + unrollToFlat().forEach { it.acoshInPlace() } + } + + override fun atanhInPlace() { + unrollToFlat().forEach { it.atanhInPlace() } + } + + override fun hypotInPlace(other: F64Array) { + commonUnrollToFlat(other) { a, b -> a.hypotInPlace(b) } + } + + override fun matmul(other: F64Array): F64Array { + check(nDim == 2) { "matmul is only supported for 2D arrays" } + check(other.nDim == 2) { "matmul is only supported for 2D arrays" } + check(shape[1] == other.shape[0]) { + "matmul dimensions do not match: ${shape[1]} != ${other.shape[0]}" + } + val resultShape = intArrayOf(shape[0], other.shape[1]) + val result = F64Array.full(resultShape, 0.0) + for (i in 0 until shape[0]) { + for (j in 0 until other.shape[1]) { + for (k in 0 until shape[1]) { + result[i, j] += this[i, k] * other[k, j] + } + } + } + return result + } + + override fun equals(other: Any?): Boolean = when { + this === other -> true + other !is F64Array -> false + !shape.contentEquals(other.shape) -> false + else -> (0 until length).all { view(it) == other.view(it) } + } + + override fun hashCode(): Int = (0 until length).fold(1) { acc, r -> + 31 * acc + view(r).hashCode() + } +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64DenseFlatArrayBase.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64DenseFlatArrayBase.kt new file mode 100644 index 0000000..367640c --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64DenseFlatArrayBase.kt @@ -0,0 +1,98 @@ +package com.martmists.ndarray.simd.impl + +import com.martmists.ndarray.simd.F64Array +import com.martmists.ndarray.simd.F64FlatArray + +// TODO: Dense array for ND? +internal abstract class F64DenseFlatArrayBase( + data: DoubleArray, + offset: Int, + size: Int +) : F64FlatArrayImpl(data, offset, 1, size) { + override fun fill(init: Double) = data.fill(init, offset, offset + length) + + override fun copy(): F64FlatArray { + val copyData = DoubleArray(length) + data.copyInto(copyData, 0, offset, offset + length) + return F64FlatArray.create(copyData, 0) + } + + override fun copyTo(other: F64Array) { + if (other is F64DenseFlatArrayBase) { + checkShape(other) + + } else { + super.copyTo(other) + } + } + + override fun toDoubleArray(): DoubleArray { + return data.copyOfRange(offset, offset + length) + } + + override fun transformInPlace(transform: (Double) -> Double) { + var dstOffset = offset + val dstEnd = dstOffset + length + while (dstOffset < dstEnd) { + data[dstOffset] = transform(data[dstOffset]) + dstOffset++ + } + } + + override fun transform(transform: (Double) -> Double): F64FlatArray { + val dst = DoubleArray(length) + var srcOffset = offset + for (i in 0 until length) { + dst[i] = transform(data[srcOffset]) + srcOffset++ + } + return F64FlatArray.create(dst, 0, length) + } + + override fun zipTransformInPlace(other: F64Array, transform: (Double, Double) -> Double) { + if (other is F64DenseFlatArrayBase) { + checkShape(other) + if (offset == 0 && other.offset == 0) { + for (i in 0 until length) { + this[i] = transform(data[i], other.data[i]) + } + } else { + var dstOffset = offset + var srcOffset = other.offset + val dstEnd = offset + length + while (dstOffset < dstEnd) { + data[dstOffset] = transform(data[dstOffset], other.data[srcOffset]) + dstOffset++ + srcOffset++ + } + } + } else { + super.zipTransformInPlace(other, transform) + } + } + + override fun zipTransform(other: F64Array, transform: (Double, Double) -> Double): F64FlatArray { + if (other is F64DenseFlatArrayBase) { + checkShape(other) + val dst = DoubleArray(length) + if (offset == 0 && other.offset == 0) { + for (i in 0 until length) { + dst[i] = transform(data[i], other.data[i]) + } + } else { + var dstOffset = 0 + var srcOffset = offset + var otherOffset = other.offset + for (i in 0 until length) { + dst[dstOffset] = transform(data[srcOffset], other.data[otherOffset]) + dstOffset++ + srcOffset++ + otherOffset++ + } + } + return F64FlatArray.create(dst, 0, length) + } else { + return super.zipTransform(other, transform) + } + } +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64FlatArrayImpl.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64FlatArrayImpl.kt new file mode 100644 index 0000000..602d547 --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64FlatArrayImpl.kt @@ -0,0 +1,256 @@ +package com.martmists.ndarray.simd.impl + +import com.martmists.ndarray.simd.F64Array +import com.martmists.ndarray.simd.F64FlatArray +import kotlin.math.* + +internal open class F64FlatArrayImpl internal constructor( + data: DoubleArray, + offset: Int, + stride: Int, + size: Int +) : F64ArrayImpl(data, offset, intArrayOf(stride), intArrayOf(size), 1, stride, size), F64FlatArray { + override val isFlattenable: Boolean = true + + protected val unsafeGet: (Int) -> Double = { data[it * stride + offset] } + protected val unsafeSet: (Int, Double) -> Unit = { i, v -> data[i * stride + offset] = v } + + override fun flatten(): F64FlatArray = this + + override operator fun get(pos: Int): Double { + checkIndex("pos", pos, length) + return unsafeGet(pos) + } + + override operator fun set(pos: Int, value: Double) { + checkIndex("pos", pos, length) + unsafeSet(pos, value) + } + + override fun contains(other: Double): Boolean { + for (pos in 0 until length) { + if (unsafeGet(pos) == other) { + return true + } + } + + return false + } + + override fun along(axis: Int) = unsupported() + + override fun view(index: Int, axis: Int) = unsupported() + + override fun copyTo(other: F64Array) { + val o = checkShape(other) + o as F64FlatArrayImpl + for (pos in 0 until length) { + o.unsafeSet(pos, unsafeGet(pos)) + } + } + + override fun copy(): F64FlatArray = F64FlatArray.create(toDoubleArray(), 0, length) + + override fun fill(init: Double) { + for (pos in 0 until length) { + unsafeSet(pos, init) + } + } + + override fun reorder(indices: IntArray, axis: Int) { + if (axis == 0) { + reorderInternal(this, indices, axis, + get = { pos -> unsafeGet(pos) }, + set = { pos, value -> unsafeSet(pos, value) }) + } else { + unsupported() + } + } + + private inline fun balancedSum(getter: (Int) -> Double): Double { + var accUnaligned = 0.0 + var remaining = length + while (remaining % 4 > 0) { + remaining-- + accUnaligned += getter(remaining) + } + val stack = DoubleArray(31 - 2) + var p = 0 + var i = 0 + while (i < remaining) { + // Shift. + var v = getter(i) + getter(i + 1) + val w = getter(i + 2) + getter(i + 3) + v += w + + // Reduce. + var bitmask = 4 + while (i and bitmask != 0) { + v += stack[--p] + bitmask = bitmask shl 1 + } + stack[p++] = v + i += 4 + } + var acc = 0.0 + while (p > 0) { + acc += stack[--p] + } + return acc + accUnaligned + } + + override fun dot(other: F64Array) = balancedSum { unsafeGet(it) * other[it] } + + override fun sum(): Double = balancedSum { unsafeGet(it) } + + override fun min() = unsafeGet(argMin()) + + override fun argMin(): Int { + var minValue = Double.POSITIVE_INFINITY + var res = 0 + for (pos in 0 until length) { + val value = unsafeGet(pos) + if (value <= minValue) { + minValue = value + res = pos + } + } + return res + } + + override fun max() = unsafeGet(argMax()) + + override fun argMax(): Int { + var maxValue = Double.NEGATIVE_INFINITY + var res = 0 + for (pos in 0 until length) { + val value = unsafeGet(pos) + if (value >= maxValue) { + maxValue = value + res = pos + } + } + return res + } + + override fun transformInPlace(transform: (Double) -> Double) { + println("transformInPlace: $offset $length ${strides[0]}") + + for (pos in 0 until length) { + unsafeSet(pos, transform.invoke(unsafeGet(pos))) + } + } + + override fun transform(transform: (Double) -> Double): F64FlatArray { + val res = DoubleArray(length) + for (pos in 0 until length) { + res[pos] = transform.invoke(unsafeGet(pos)) + } + return F64FlatArray.create(res) + } + + override fun zipTransformInPlace(other: F64Array, transform: (Double, Double) -> Double) { + val o = checkShape(other) + o as F64FlatArrayImpl + for (pos in 0 until length) { + unsafeSet(pos, transform.invoke(unsafeGet(pos), o.unsafeGet(pos))) + } + } + + override fun zipTransform(other: F64Array, transform: (Double, Double) -> Double): F64FlatArray { + val o = checkShape(other) + o as F64FlatArrayImpl + val res = DoubleArray(length) + for (pos in 0 until length) { + res[pos] = transform.invoke(unsafeGet(pos), o.unsafeGet(pos)) + } + return F64FlatArray.create(res, 0, length) + } + + override fun fold(initial: T, operation: (T, Double) -> T): T { + var res = initial + for (pos in 0 until length) { + res = operation(res, unsafeGet(pos)) + } + return res + } + + override fun reduce(operation: (Double, Double) -> Double): Double { + var res = unsafeGet(0) + for (pos in 1 until length) { + res = operation(res, unsafeGet(pos)) + } + return res + } + + override fun coerceInPlace(min: Double, max: Double) = transformInPlace { it.coerceIn(min, max) } + override fun expInPlace() = transformInPlace(::exp) + override fun expm1InPlace() = transformInPlace(::expm1) + override fun logInPlace() = transformInPlace(::ln) + override fun log1pInPlace() = transformInPlace(::ln1p) + override fun log2InPlace() = transformInPlace(::log2) + override fun log10InPlace() = transformInPlace(::log10) + override fun logBaseInPlace(base: Double) = log2(base).let { lb -> transformInPlace { log2(it) / lb } } + override fun sqrtInPlace() = transformInPlace(::sqrt) + override fun powInPlace(power: Double) = transformInPlace { it.pow(power) } + override fun ipowInPlace(base: Double) = transformInPlace { base.pow(it) } + override fun unaryMinusInPlace() = transformInPlace(Double::unaryMinus) + override fun plusAssign(other: F64Array) = zipTransformInPlace(other, Double::plus) + override fun plusAssign(other: Double) = transformInPlace { it + other } + override fun minusAssign(other: F64Array) = zipTransformInPlace(other, Double::minus) + override fun minusAssign(other: Double) = transformInPlace { it - other } + override fun timesAssign(other: F64Array) = zipTransformInPlace(other, Double::times) + override fun timesAssign(other: Double) = transformInPlace { it * other } + override fun divAssign(other: F64Array) = zipTransformInPlace(other, Double::div) + override fun divAssign(other: Double) = transformInPlace { it / other } + override fun absInPlace() = transformInPlace(Double::absoluteValue) + override fun ltInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a < b) 1.0 else 0.0 } + override fun ltInPlace(other: Double) = transformInPlace { if (it < other) 1.0 else 0.0 } + override fun gtInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a > b) 1.0 else 0.0 } + override fun gtInPlace(other: Double) = transformInPlace { if (it > other) 1.0 else 0.0 } + override fun eqInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a == b) 1.0 else 0.0 } + override fun eqInPlace(other: Double) = transformInPlace { if (it == other) 1.0 else 0.0 } + override fun neqInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> if (a != b) 1.0 else 0.0 } + override fun neqInPlace(other: Double) = transformInPlace { if (it != other) 1.0 else 0.0 } + override fun andInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> (a.toInt() and b.toInt()).toDouble() } + override fun andInPlace(other: Int) = transformInPlace { (it.toInt() and other).toDouble() } + override fun orInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> (a.toInt() or b.toInt()).toDouble() } + override fun orInPlace(other: Int) = transformInPlace { (it.toInt() or other).toDouble() } + override fun xorInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> (a.toInt() xor b.toInt()).toDouble() } + override fun xorInPlace(other: Int) = transformInPlace { (it.toInt() xor other).toDouble() } + override fun notInPlace() = transformInPlace { it.toInt().inv().toDouble() } + override fun shlInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> (a.toInt() shl b.toInt()).toDouble() } + override fun shlInPlace(other: Int) = transformInPlace { (it.toInt() shl other).toDouble() } + override fun shrInPlace(other: F64Array) = zipTransformInPlace(other) { a, b -> (a.toInt() shr b.toInt()).toDouble() } + override fun shrInPlace(other: Int) = transformInPlace { (it.toInt() shr other).toDouble() } + override fun sinInPlace() = transformInPlace(::sin) + override fun cosInPlace() = transformInPlace(::cos) + override fun tanInPlace() = transformInPlace(::tan) + override fun asinInPlace() = transformInPlace(::asin) + override fun acosInPlace() = transformInPlace(::acos) + override fun atanInPlace() = transformInPlace(::atan) + override fun atan2InPlace(other: F64Array) = zipTransformInPlace(other, ::atan2) + override fun sinhInPlace() = transformInPlace(::sinh) + override fun coshInPlace() = transformInPlace(::cosh) + override fun tanhInPlace() = transformInPlace(::tanh) + override fun asinhInPlace() = transformInPlace(::asinh) + override fun acoshInPlace() = transformInPlace(::acosh) + override fun atanhInPlace() = transformInPlace(::atanh) + override fun hypotInPlace(other: F64Array) = zipTransformInPlace(other, ::hypot) + + override fun toDoubleArray() = DoubleArray(length) { unsafeGet(it) } + + override fun equals(other: Any?) = when { + this === other -> true + other !is F64FlatArrayImpl -> false // an instance of F64Array can't be flat + length != other.length -> false + else -> (0 until length).all { + (unsafeGet(it) - other.unsafeGet(it)).absoluteValue < 1e-10 + } + } + + override fun hashCode() = (0 until length).fold(1) { acc, pos -> + // XXX calling #hashCode results in boxing, see KT-7571. + 31 * acc + unsafeGet(pos).hashCode() + } +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64LargeDenseFlatArrayImpl.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64LargeDenseFlatArrayImpl.kt new file mode 100644 index 0000000..d70984d --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64LargeDenseFlatArrayImpl.kt @@ -0,0 +1,190 @@ +package com.martmists.ndarray.simd.impl + +import com.martmists.ndarray.simd.F64Array +import com.martmists.ndarray.simd.NativeSpeedup + +internal class F64LargeDenseFlatArrayImpl( + data: DoubleArray, + offset: Int, + size: Int +) : F64DenseFlatArrayBase(data, offset, size) { + override fun dot(other: F64Array): Double { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + return NativeSpeedup.vecDot(data, offset, length, other.data, other.offset) + } else { + return super.dot(other) + } + } + override fun sum() = NativeSpeedup.vecSum(data, offset, length) + override fun max() = NativeSpeedup.vecMax(data, offset, length) + override fun min() = NativeSpeedup.vecMin(data, offset, length) + override fun product() = NativeSpeedup.vecProduct(data, offset, length) + override fun mean() = NativeSpeedup.vecMean(data, offset, length) + override fun variance() = NativeSpeedup.vecVariance(data, offset, length) + override fun stdDev() = NativeSpeedup.vecStdDev(data, offset, length) + override fun coerceInPlace(min: Double, max: Double) = NativeSpeedup.vecCoerce(data, offset, length, min, max) + + override fun expInPlace() = NativeSpeedup.vecExp(data, offset, length) + override fun expm1InPlace() = NativeSpeedup.vecExpm1(data, offset, length) + override fun logInPlace() = NativeSpeedup.vecLog(data, offset, length) + override fun log1pInPlace() = NativeSpeedup.vecLog1p(data, offset, length) + override fun log2InPlace() = NativeSpeedup.vecLog2(data, offset, length) + override fun log10InPlace() = NativeSpeedup.vecLog10(data, offset, length) + override fun logBaseInPlace(base: Double) = NativeSpeedup.vecLogBase(data, offset, length, base) + override fun sqrtInPlace() = NativeSpeedup.vecSqrt(data, offset, length) + override fun powInPlace(power: Double) = NativeSpeedup.vecPow(data, offset, length, power) + override fun ipowInPlace(base: Double) = NativeSpeedup.veciPow(data, offset, length, base) + + override fun unaryMinusInPlace() = NativeSpeedup.vecNegate(data, offset, length) + override fun plusAssign(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecAddVec(data, offset, length, other.data, other.offset) + } else { + super.plusAssign(other) + } + } + override fun plusAssign(other: Double) = NativeSpeedup.vecAddScalar(data, offset, length, other) + override fun minusAssign(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecSubVec(data, offset, length, other.data, other.offset) + } else { + super.minusAssign(other) + } + } + override fun minusAssign(other: Double) = NativeSpeedup.vecSubScalar(data, offset, length, other) + override fun timesAssign(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecMulVec(data, offset, length, other.data, other.offset) + } else { + super.timesAssign(other) + } + } + override fun timesAssign(other: Double) = NativeSpeedup.vecMulScalar(data, offset, length, other) + override fun divAssign(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecDivVec(data, offset, length, other.data, other.offset) + } else { + super.divAssign(other) + } + } + override fun divAssign(other: Double) = NativeSpeedup.vecDivScalar(data, offset, length, other) + override fun absInPlace() = NativeSpeedup.vecAbs(data, offset, length) + + override fun ltInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecLtVec(data, offset, length, other.data, other.offset) + } else { + super.ltInPlace(other) + } + } + override fun ltInPlace(other: Double) = NativeSpeedup.vecLtScalar(data, offset, length, other) + override fun gtInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecGtVec(data, offset, length, other.data, other.offset) + } else { + super.gtInPlace(other) + } + } + override fun gtInPlace(other: Double) = NativeSpeedup.vecGtScalar(data, offset, length, other) + override fun eqInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecEqVec(data, offset, length, other.data, other.offset) + } else { + super.eqInPlace(other) + } + } + override fun eqInPlace(other: Double) = NativeSpeedup.vecEqScalar(data, offset, length, other) + override fun neqInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecNeqVec(data, offset, length, other.data, other.offset) + } else { + super.neqInPlace(other) + } + } + override fun neqInPlace(other: Double) = NativeSpeedup.vecNeqScalar(data, offset, length, other) + + override fun andInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecAndVec(data, offset, length, other.data, other.offset) + } else { + super.andInPlace(other) + } + } + override fun andInPlace(other: Int) = NativeSpeedup.vecAndScalar(data, offset, length, other) + override fun orInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecOrVec(data, offset, length, other.data, other.offset) + } else { + super.orInPlace(other) + } + } + override fun orInPlace(other: Int) = NativeSpeedup.vecOrScalar(data, offset, length, other) + override fun xorInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecXorVec(data, offset, length, other.data, other.offset) + } else { + super.xorInPlace(other) + } + } + override fun xorInPlace(other: Int) = NativeSpeedup.vecXorScalar(data, offset, length, other) + override fun notInPlace() = NativeSpeedup.vecNot(data, offset, length) + override fun shlInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecLShiftVec(data, offset, length, other.data, other.offset) + } else { + super.shlInPlace(other) + } + } + override fun shlInPlace(other: Int) = NativeSpeedup.vecLShiftScalar(data, offset, length, other) + override fun shrInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecRShiftVec(data, offset, length, other.data, other.offset) + } else { + super.shrInPlace(other) + } + } + override fun shrInPlace(other: Int) = NativeSpeedup.vecRShiftScalar(data, offset, length, other) + + override fun sinInPlace() = NativeSpeedup.vecSin(data, offset, length) + override fun cosInPlace() = NativeSpeedup.vecCos(data, offset, length) + override fun tanInPlace() = NativeSpeedup.vecTan(data, offset, length) + override fun asinInPlace() = NativeSpeedup.vecAsin(data, offset, length) + override fun acosInPlace() = NativeSpeedup.vecAcos(data, offset, length) + override fun atanInPlace() = NativeSpeedup.vecAtan(data, offset, length) + override fun atan2InPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecAtan2(data, offset, length, other.data, other.offset) + } else { + super.atan2InPlace(other) + } + } + override fun sinhInPlace() = NativeSpeedup.vecSinh(data, offset, length) + override fun coshInPlace() = NativeSpeedup.vecCosh(data, offset, length) + override fun tanhInPlace() = NativeSpeedup.vecTanh(data, offset, length) + override fun asinhInPlace() = NativeSpeedup.vecAsinh(data, offset, length) + override fun acoshInPlace() = NativeSpeedup.vecAcosh(data, offset, length) + override fun atanhInPlace() = NativeSpeedup.vecAtanh(data, offset, length) + override fun hypotInPlace(other: F64Array) { + if (other is F64LargeDenseFlatArrayImpl) { + checkShape(other) + NativeSpeedup.vecHypot(data, offset, length, other.data, other.offset) + } else { + super.hypotInPlace(other) + } + } +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64SmallDenseFlatArrayImpl.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64SmallDenseFlatArrayImpl.kt new file mode 100644 index 0000000..749dccd --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/F64SmallDenseFlatArrayImpl.kt @@ -0,0 +1,9 @@ +package com.martmists.ndarray.simd.impl + +internal class F64SmallDenseFlatArrayImpl( + data: DoubleArray, + offset: Int, + size: Int +) : F64DenseFlatArrayBase(data, offset, size) { + +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/internal.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/internal.kt new file mode 100644 index 0000000..3d57555 --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/internal.kt @@ -0,0 +1,136 @@ +package com.martmists.ndarray.simd.impl + +import com.martmists.ndarray.simd.F64Array +import com.martmists.ndarray.simd.F64FlatArray + +internal fun F64Array.view0(indices: IntArray): F64Array { + require(indices.size < nDim) { "too many indices" } + return indices.fold(this) { m, pos -> m.view(pos) } +} + +internal fun F64Array.unrollOnce(n: Int = unrollDim): Sequence { + val newStrides = strides.slice(n until nDim).toIntArray() + val newShape = shape.slice(n until nDim).toIntArray() + val currentUnrollStride = if (n == unrollDim) unrollStride else run { + var nonTrivialN = n - 1 + while (nonTrivialN >= 0 && shape[nonTrivialN] <= 1) nonTrivialN-- + if (nonTrivialN >= 0) strides[nonTrivialN] else 0 + } + val currentUnrollSize = if (n == unrollDim) unrollSize else shape.slice(0 until n).toIntArray().product() + + return (0 until currentUnrollSize).asSequence().map { i -> + F64Array.create(data, offset + currentUnrollStride * i, newStrides, newShape) + } +} + +internal fun F64Array.unrollToFlat(): Sequence { + if (isFlattenable) return sequenceOf(flatten()) + return unrollOnce().flatMap { it.unrollToFlat() } +} + +internal fun F64Array.commonUnrollToFlat( + other: F64Array, + action: (F64FlatArray, F64FlatArray) -> Unit +) { + checkShape(other) + val commonUnrollDim = kotlin.math.min(unrollDim, other.unrollDim) + if (commonUnrollDim == nDim) { + action(flatten(), other.flatten()) + } else { + unrollOnce(commonUnrollDim).zip(other.unrollOnce(commonUnrollDim)).forEach { (a, b) -> + a.commonUnrollToFlat(b, action) + } + } +} + +internal data class Unroll(val dim: Int, val stride: Int, val size: Int) + +internal fun calculateUnroll(strides: IntArray, shape: IntArray): Unroll { + var prevStride = 0 + var unrollable = true + var d = 0 + var s = 0 + for (i in strides.indices) { + if (shape[i] == 1) { + if (unrollable) d = i + 1 + continue + } + if (unrollable && (prevStride == 0 || prevStride == strides[i] * shape[i])) { + d = i + 1 + s = strides[i] + } else { + unrollable = false + } + prevStride = strides[i] + } + return Unroll(d, s, shape.slice(0 until d).toIntArray().product()) +} + +internal fun IntArray.remove(pos: Int) = when (pos) { + 0 -> sliceArray(1..lastIndex) + lastIndex -> sliceArray(0 until lastIndex) + else -> sliceArray(0 until pos) + sliceArray(pos + 1..lastIndex) +} + +internal inline fun unsupported(): Nothing = throw UnsupportedOperationException() + +internal inline fun reorderInternal( + a: F64Array, + indices: IntArray, + axis: Int, + get: (Int) -> T, + set: (Int, T) -> Unit +) { + require(indices.size == a.shape[axis]) + + val copy = indices.copyOf() + for (pos in 0 until a.shape[axis]) { + val value = get(pos) + var j = pos + while (true) { + val k = copy[j] + copy[j] = j + if (k == pos) { + set(j, value) + break + } else { + set(j, get(k)) + j = k + } + } + } +} + +internal fun F64Array.Companion.create( + data: DoubleArray, + offset: Int, + strides: IntArray, + shape: IntArray, +): F64Array { + require(strides.size == shape.size) { "strides and shape size don't match" } + require(strides.isNotEmpty()) { "singleton arrays are not supported" } + return if (shape.size == 1) { + F64FlatArray.create(data, offset, strides.single(), shape.single()) + } else { + val (unrollDim, unrollStride, unrollSize) = calculateUnroll(strides, shape) + F64ArrayImpl(data, offset, strides, shape, unrollDim, unrollStride, unrollSize) + } +} + +internal fun F64FlatArray.Companion.create( + data: DoubleArray, + offset: Int = 0, + stride: Int = 1, + size: Int = data.size +): F64FlatArray { + require(size > 0) { "empty arrays not supported" } + return if (stride == 1) { + if (size <= F64Array.simdSize) { + F64SmallDenseFlatArrayImpl(data, offset, size) + } else { + F64LargeDenseFlatArrayImpl(data, offset, size) + } + } else { + F64FlatArrayImpl(data, offset, stride, size) + } +} diff --git a/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/util.kt b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/util.kt new file mode 100644 index 0000000..6abd667 --- /dev/null +++ b/src/commonMain/kotlin/com/martmists/ndarray/simd/impl/util.kt @@ -0,0 +1,9 @@ +package com.martmists.ndarray.simd.impl + +inline fun checkIndex(label: String, pos: Int, size: Int) { + if (pos < 0 || pos >= size) { + throw IndexOutOfBoundsException("$label must be in [0, $size), but was $pos") + } +} + +inline fun IntArray.product() = fold(1, Int::times) diff --git a/src/jvmMain/kotlin/com/martmists/ndarray/simd/Main.kt b/src/jvmMain/kotlin/com/martmists/ndarray/simd/Main.kt new file mode 100644 index 0000000..ab47103 --- /dev/null +++ b/src/jvmMain/kotlin/com/martmists/ndarray/simd/Main.kt @@ -0,0 +1,13 @@ +package com.martmists.ndarray.simd + +import kotlin.random.Random + +fun main() { + val arr = DoubleArray(23) { Random.nextDouble() } + val f64Array = F64Array.of(arr) + + println(f64Array::class.simpleName) + + println((-f64Array).toDoubleArray().contentToString()) + println(f64Array.abs().toDoubleArray().contentToString()) +} diff --git a/src/jvmMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.jvm.kt b/src/jvmMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.jvm.kt new file mode 100644 index 0000000..ee66707 --- /dev/null +++ b/src/jvmMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.jvm.kt @@ -0,0 +1,101 @@ +package com.martmists.ndarray.simd + +import java.io.File + +internal actual object NativeSpeedup { + init { + val osName = System.getProperty("os.name") + val platform = when { + osName.startsWith("Linux") -> "linux" + osName.startsWith("Mac") -> "macos" + osName.startsWith("Windows") -> "windows" + else -> throw UnsupportedOperationException("Unsupported platform: $osName") + } + val arch = when (val osArch = System.getProperty("os.arch")) { + "x86_64", "amd64" -> "X64" + "aarch64" -> "Arm64" + else -> throw UnsupportedOperationException("Unsupported architecture: $osArch") + } + + val tmp = File.createTempFile("libndarray_simd", ".so") + tmp.deleteOnExit() + + val lib = NativeSpeedup::class.java.getResourceAsStream("/META-INF/natives/$platform$arch/libndarray_simd.so")!! + lib.copyTo(tmp.outputStream()) + + System.load(tmp.absolutePath) + } + + actual external fun vecAddVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecAddScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecSubVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecSubScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecMulVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecMulScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecDivVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecDivScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecNegate(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAbs(a: DoubleArray, aOffset: Int, aSize: Int) + + actual external fun vecAndVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecAndScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + actual external fun vecOrVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecOrScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + actual external fun vecXorVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecXorScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + actual external fun vecNot(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecLShiftVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecLShiftScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + actual external fun vecRShiftVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecRShiftScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) + + actual external fun vecEqVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecEqScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecNeqVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecNeqScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecLtVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecLtScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecGtVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecGtScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + + actual external fun vecSqrt(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecPow(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun veciPow(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecLog(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecLogBase(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) + actual external fun vecExp(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecExpm1(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecLog1p(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecLog2(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecLog10(a: DoubleArray, aOffset: Int, aSize: Int) + + actual external fun vecCopy(dest: DoubleArray, destOffset: Int, destSize: Int, src: DoubleArray, srcOffset: Int) + actual external fun getSimdSize(): Int + + actual external fun vecSum(a: DoubleArray, aOffset: Int, aSize: Int): Double + actual external fun vecMin(a: DoubleArray, aOffset: Int, aSize: Int): Double + actual external fun vecMax(a: DoubleArray, aOffset: Int, aSize: Int): Double + actual external fun vecProduct(a: DoubleArray, aOffset: Int, aSize: Int): Double + actual external fun vecMean(a: DoubleArray, aOffset: Int, aSize: Int): Double + actual external fun vecVariance(a: DoubleArray, aOffset: Int, aSize: Int): Double + actual external fun vecStdDev(a: DoubleArray, aOffset: Int, aSize: Int): Double + actual external fun vecCoerce(a: DoubleArray, aOffset: Int, aSize: Int, min: Double, max: Double) + + actual external fun vecSin(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecCos(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecTan(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAsin(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAcos(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAtan(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAtan2(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + actual external fun vecSinh(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecCosh(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecTanh(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAsinh(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAcosh(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecAtanh(a: DoubleArray, aOffset: Int, aSize: Int) + actual external fun vecHypot(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) + + actual external fun vecDot(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int): Double + actual external fun vecMatMul(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int, n: Int, m: Int, p: Int): DoubleArray +} diff --git a/src/lib/cpp/arithmetic.cpp b/src/lib/cpp/arithmetic.cpp new file mode 100644 index 0000000..d599f3c --- /dev/null +++ b/src/lib/cpp/arithmetic.cpp @@ -0,0 +1,147 @@ +#include "common.h" + +extern "C" { + void vec_add_scalar(double* a, double b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = va + b; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] + b; + } + } + + void vec_add_vec(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va + vb; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] + b[i]; + } + } + + void vec_sub_scalar(double* a, double b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = va - b; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] - b; + } + } + + void vec_sub_vec(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va - vb; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] - b[i]; + } + } + + void vec_mul_scalar(double* a, double b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = va * b; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] * b; + } + } + + void vec_mul_vec(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va * vb; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] * b[i]; + } + } + + void vec_div_scalar(double* a, double b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = va / b; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] / b; + } + } + + void vec_div_vec(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va / vb; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = a[i] / b[i]; + } + } + + void vec_negate(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = -va; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = -a[i]; + } + } + + void vec_abs(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::abs(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::abs(a[i]); + } + } +} diff --git a/src/lib/cpp/bitwise.cpp b/src/lib/cpp/bitwise.cpp new file mode 100644 index 0000000..6356850 --- /dev/null +++ b/src/lib/cpp/bitwise.cpp @@ -0,0 +1,162 @@ +#include "common.h" + +extern "C" { + void vec_and_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = to_int(va) & to_int(vb); + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) & static_cast(b[i])); + } + } + + void vec_and_scalar(double* arr, int b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = to_int(va) & b; + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) & b); + } + } + + void vec_or_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = to_int(va) | to_int(vb); + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) | static_cast(b[i])); + } + } + + void vec_or_scalar(double* arr, int b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = to_int(va) | b; + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) | b); + } + } + + void vec_xor_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = to_int(va) ^ to_int(vb); + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) ^ static_cast(b[i])); + } + } + + void vec_xor_scalar(double* arr, int b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = to_int(va) ^ b; + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) ^ b); + } + } + + void vec_not(double* arr, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = ~to_int(va); + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(~static_cast(arr[i])); + } + } + + void vec_lshift_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = to_int(va) << to_int(vb); + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) << static_cast(b[i])); + } + } + + void vec_lshift_scalar(double* arr, int b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = to_int(va) << b; + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) << b); + } + } + + void vec_rshift_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = to_int(va) >> to_int(vb); + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) >> static_cast(b[i])); + } + } + + void vec_rshift_scalar(double* arr, int b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = to_int(va) >> b; + xsimd::store_unaligned(&arr[i], to_float(res)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = static_cast(static_cast(arr[i]) >> b); + } + } +} diff --git a/src/lib/cpp/common.h b/src/lib/cpp/common.h new file mode 100644 index 0000000..652c981 --- /dev/null +++ b/src/lib/cpp/common.h @@ -0,0 +1,8 @@ +#pragma once + +#include +#include "lib.h" + +constexpr std::size_t simd_size = xsimd::simd_type::size; +#define MAKE_TRUE() xsimd::batch(1.0) +#define MAKE_FALSE() xsimd::batch(0.0) diff --git a/src/lib/cpp/compare.cpp b/src/lib/cpp/compare.cpp new file mode 100644 index 0000000..91e932c --- /dev/null +++ b/src/lib/cpp/compare.cpp @@ -0,0 +1,135 @@ +#include "common.h" + +extern "C" { + void vec_eq_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va == vb; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] == b[i] ? 1.0 : 0.0; + } + } + + void vec_eq_scalar(double* arr, double b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = va == b; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] == b ? 1.0 : 0.0; + } + } + + void vec_neq_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va != vb; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] != b[i] ? 1.0 : 0.0; + } + } + + void vec_neq_scalar(double* arr, double b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = va != b; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] != b ? 1.0 : 0.0; + } + } + + void vec_lt_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va < vb; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] < b[i] ? 1.0 : 0.0; + } + } + + void vec_lt_scalar(double* arr, double b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = va < b; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] < b ? 1.0 : 0.0; + } + } + + void vec_gt_vec(double* arr, double* b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = va > vb; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] > b[i] ? 1.0 : 0.0; + } + } + + void vec_gt_scalar(double* arr, double b, int n) { + std::size_t size = n - n % simd_size; + auto TRUE = MAKE_TRUE(); + auto FALSE = MAKE_FALSE(); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&arr[i]); + auto res = va > b; + xsimd::store_unaligned(&arr[i], select(res, TRUE, FALSE)); + } + + for (std::size_t i = size; i < n; ++i) { + arr[i] = arr[i] > b ? 1.0 : 0.0; + } + } +} diff --git a/src/lib/cpp/math.cpp b/src/lib/cpp/math.cpp new file mode 100644 index 0000000..238ced3 --- /dev/null +++ b/src/lib/cpp/math.cpp @@ -0,0 +1,147 @@ +#include "common.h" + +extern "C" { + void vec_sqrt(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::sqrt(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::sqrt(a[i]); + } + } + + void vec_pow(double* a, double b, int n) { + std::size_t size = n - n % simd_size; + auto vb = xsimd::batch(b); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::pow(va, vb); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::pow(a[i], b); + } + } + + void vec_ipow(double* a, double b, int n) { + std::size_t size = n - n % simd_size; + auto vb = xsimd::batch(b); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::pow(vb, va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::pow(a[i], b); + } + } + + void vec_log(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::log(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::log(a[i]); + } + } + + void vec_logbase(double* a, double b, int n) { + std::size_t size = n - n % simd_size; + auto lb = std::log(b); + auto vb = xsimd::batch(lb); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::log(va) / vb; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::log(a[i]) / lb; + } + } + + void vec_exp(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::exp(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::exp(a[i]); + } + } + + void vec_expm1(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::expm1(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::expm1(a[i]); + } + } + + void vec_log1p(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::log1p(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::log1p(a[i]); + } + } + + void vec_log2(double* a, int n) { + std::size_t size = n - n % simd_size; + auto vb = xsimd::batch(std::log(2.0)); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::log(va) / vb; + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::log2(a[i]); + } + } + + void vec_log10(double* a, int n) { + std::size_t size = n - n % simd_size; + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::log10(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::log10(a[i]); + } + } +} diff --git a/src/lib/cpp/misc.cpp b/src/lib/cpp/misc.cpp new file mode 100644 index 0000000..9a5791b --- /dev/null +++ b/src/lib/cpp/misc.cpp @@ -0,0 +1,20 @@ +#include "common.h" + +extern "C" { + void vec_copy(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto vb = xsimd::load_unaligned(&b[i]); + xsimd::store_unaligned(&a[i], vb); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = b[i]; + } + } + + int get_simd_size() { + return simd_size; + } +} diff --git a/src/lib/cpp/procedure.cpp b/src/lib/cpp/procedure.cpp new file mode 100644 index 0000000..c9c22c8 --- /dev/null +++ b/src/lib/cpp/procedure.cpp @@ -0,0 +1,124 @@ +#include "common.h" + +extern "C" { + double vec_sum(double* a, int n) { + auto sum = xsimd::batch(0.0); + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + sum += va; + } + + double result = xsimd::reduce_add(sum); + + for (std::size_t i = size; i < n; ++i) { + result += a[i]; + } + + return result; + } + + double vec_min(double* a, int n) { + auto min = xsimd::batch(std::numeric_limits::max()); + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + min = xsimd::min(min, va); + } + + double result = xsimd::reduce_min(min); + + for (std::size_t i = size; i < n; ++i) { + result = std::min(result, a[i]); + } + + return result; + } + + double vec_max(double* a, int n) { + auto max = xsimd::batch(std::numeric_limits::lowest()); + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + max = xsimd::max(max, va); + } + + double result = xsimd::reduce_max(max); + + for (std::size_t i = size; i < n; ++i) { + result = std::max(result, a[i]); + } + + return result; + } + + double vec_prod(double* a, int n) { + auto prod = xsimd::batch(1.0); + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + prod *= va; + } + + double result = 1.0; + + // FIXME: Figure out how to do this with xsimd::reduce + for (std::size_t i = 0; i < simd_size; ++i) { + result *= prod.get(i); + } + + for (std::size_t i = size; i < n; ++i) { + result *= a[i]; + } + + return result; + } + + double vec_mean(double* a, int n) { + return vec_sum(a, n) / n; + } + + double vec_var(double* a, int n) { + double mean = vec_mean(a, n); + auto sum = xsimd::batch(0.0); + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + sum += (va - mean) * (va - mean); + } + + double result = xsimd::reduce_add(sum); + + for (std::size_t i = size; i < n; ++i) { + result += (a[i] - mean) * (a[i] - mean); + } + + return result / n; + } + + double vec_std(double* a, int n) { + return std::sqrt(vec_var(a, n)); + } + + void vec_coerce(double* a, int n, double min, double max) { + auto min_batch = xsimd::batch(min); + auto max_batch = xsimd::batch(max); + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + va = xsimd::min(va, max_batch); + va = xsimd::max(va, min_batch); + xsimd::store_unaligned(&a[i], va); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::min(std::max(a[i], min), max); + } + } +} diff --git a/src/lib/cpp/trigonometry.cpp b/src/lib/cpp/trigonometry.cpp new file mode 100644 index 0000000..76ca277 --- /dev/null +++ b/src/lib/cpp/trigonometry.cpp @@ -0,0 +1,201 @@ +#include "common.h" + +extern "C" { + void vec_sin(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::sin(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::sin(a[i]); + } + } + + void vec_cos(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::cos(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::cos(a[i]); + } + } + + void vec_tan(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::tan(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::tan(a[i]); + } + } + + void vec_asin(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::asin(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::asin(a[i]); + } + } + + void vec_acos(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::acos(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::acos(a[i]); + } + } + + void vec_atan(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::atan(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::atan(a[i]); + } + } + + void vec_atan2(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = xsimd::atan2(va, vb); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::atan2(a[i], b[i]); + } + } + + void vec_sinh(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::sinh(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::sinh(a[i]); + } + } + + void vec_cosh(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::cosh(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::cosh(a[i]); + } + } + + void vec_tanh(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::tanh(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::tanh(a[i]); + } + } + + void vec_asinh(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::asinh(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::asinh(a[i]); + } + } + + void vec_acosh(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::acosh(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::acosh(a[i]); + } + } + + void vec_atanh(double* a, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto res = xsimd::atanh(va); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::atanh(a[i]); + } + } + + void vec_hypot(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto vb = xsimd::load_unaligned(&b[i]); + auto res = xsimd::hypot(va, vb); + xsimd::store_unaligned(&a[i], res); + } + + for (std::size_t i = size; i < n; ++i) { + a[i] = std::hypot(a[i], b[i]); + } + } +} diff --git a/src/lib/cpp/vector.cpp b/src/lib/cpp/vector.cpp new file mode 100644 index 0000000..6324257 --- /dev/null +++ b/src/lib/cpp/vector.cpp @@ -0,0 +1,43 @@ +#include "common.h" + +extern "C" { + double vec_dot(double* a, double* b, int n) { + std::size_t size = n - n % simd_size; + auto res = xsimd::batch(0.0); + + for (std::size_t i = 0; i < size; i += simd_size) { + auto va = xsimd::load_unaligned(&a[i]); + auto vb = xsimd::load_unaligned(&b[i]); + res += va * vb; + } + + auto result = xsimd::reduce_add(res); + + for (std::size_t i = size; i < n; ++i) { + result += a[i] * b[i]; + } + + return result; + } + + void vec_matmul(double* a, double* b, double* c, int n, int m, int p) { + auto size = m - (m % simd_size); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < p; ++j) { + auto sum = xsimd::batch(0.0); + std::size_t k = 0; + for (; k < size; k += simd_size) { + // Need to do unaligned load here + auto va = xsimd::load_unaligned(&a[i * m + k]); + auto vb = xsimd::load_unaligned(&b[k * p + j]); + sum += va * vb; + } + double scalar_sum = xsimd::reduce_add(sum); + for (; k < m; ++k) { + scalar_sum += a[i * m + k] * b[k * p + j]; + } + c[i * p + j] = scalar_sum; + } + } + } +} diff --git a/src/lib/public/arithmetic.h b/src/lib/public/arithmetic.h new file mode 100644 index 0000000..86721dc --- /dev/null +++ b/src/lib/public/arithmetic.h @@ -0,0 +1,12 @@ +#pragma once + +void vec_add_vec(double* arr, double* b, int n); +void vec_add_scalar(double* arr, double b, int n); +void vec_sub_vec(double* arr, double* b, int n); +void vec_sub_scalar(double* arr, double b, int n); +void vec_mul_vec(double* arr, double* b, int n); +void vec_mul_scalar(double* arr, double b, int n); +void vec_div_vec(double* arr, double* b, int n); +void vec_div_scalar(double* arr, double b, int n); +void vec_negate(double* arr, int n); +void vec_abs(double* arr, int n); diff --git a/src/lib/public/bitwise.h b/src/lib/public/bitwise.h new file mode 100644 index 0000000..5eebe96 --- /dev/null +++ b/src/lib/public/bitwise.h @@ -0,0 +1,13 @@ +#pragma once + +void vec_and_vec(double* arr, double* b, int n); +void vec_and_scalar(double* arr, int b, int n); +void vec_or_vec(double* arr, double* b, int n); +void vec_or_scalar(double* arr, int b, int n); +void vec_xor_vec(double* arr, double* b, int n); +void vec_xor_scalar(double* arr, int b, int n); +void vec_not(double* arr, int n); +void vec_lshift_vec(double* arr, double* b, int n); +void vec_lshift_scalar(double* arr, int b, int n); +void vec_rshift_vec(double* arr, double* b, int n); +void vec_rshift_scalar(double* arr, int b, int n); diff --git a/src/lib/public/compare.h b/src/lib/public/compare.h new file mode 100644 index 0000000..82ead3a --- /dev/null +++ b/src/lib/public/compare.h @@ -0,0 +1,10 @@ +#pragma once + +void vec_eq_vec(double* arr, double* b, int n); +void vec_eq_scalar(double* arr, double b, int n); +void vec_neq_vec(double* arr, double* b, int n); +void vec_neq_scalar(double* arr, double b, int n); +void vec_lt_vec(double* arr, double* b, int n); +void vec_lt_scalar(double* arr, double b, int n); +void vec_gt_vec(double* arr, double* b, int n); +void vec_gt_scalar(double* arr, double b, int n); diff --git a/src/lib/public/lib.h b/src/lib/public/lib.h new file mode 100644 index 0000000..ac5ad0d --- /dev/null +++ b/src/lib/public/lib.h @@ -0,0 +1,18 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "arithmetic.h" +#include "bitwise.h" +#include "compare.h" +#include "math.h" +#include "misc.h" +#include "procedure.h" +#include "trigonometry.h" +#include "vector.h" + +#ifdef __cplusplus +} +#endif diff --git a/src/lib/public/math.h b/src/lib/public/math.h new file mode 100644 index 0000000..3cecc55 --- /dev/null +++ b/src/lib/public/math.h @@ -0,0 +1,13 @@ +#pragma once + +void vec_sqrt(double* arr, int n); +void vec_pow(double* arr, double b, int n); +void vec_ipow(double* arr, double b, int n); +void vec_log(double* arr, int n); +void vec_logbase(double* arr, double b, int n); +void vec_exp(double* arr, int n); + +void vec_expm1(double* arr, int n); +void vec_log1p(double* arr, int n); +void vec_log2(double* arr, int n); +void vec_log10(double* arr, int n); diff --git a/src/lib/public/misc.h b/src/lib/public/misc.h new file mode 100644 index 0000000..2576b21 --- /dev/null +++ b/src/lib/public/misc.h @@ -0,0 +1,4 @@ +#pragma once + +void vec_copy(double* arr, double* b, int n); +int get_simd_size(); diff --git a/src/lib/public/procedure.h b/src/lib/public/procedure.h new file mode 100644 index 0000000..31f3a27 --- /dev/null +++ b/src/lib/public/procedure.h @@ -0,0 +1,12 @@ +#pragma once + +double vec_sum(double* arr, int n); +double vec_min(double* arr, int n); +double vec_max(double* arr, int n); +double vec_prod(double* arr, int n); + +double vec_mean(double* arr, int n); +double vec_var(double* arr, int n); +double vec_std(double* arr, int n); + +void vec_coerce(double* arr, int n, double min, double max); diff --git a/src/lib/public/trigonometry.h b/src/lib/public/trigonometry.h new file mode 100644 index 0000000..6c4132d --- /dev/null +++ b/src/lib/public/trigonometry.h @@ -0,0 +1,16 @@ +#pragma once + +void vec_sin(double* arr, int n); +void vec_cos(double* arr, int n); +void vec_tan(double* arr, int n); +void vec_asin(double* arr, int n); +void vec_acos(double* arr, int n); +void vec_atan(double* arr, int n); +void vec_atan2(double* arr, double* b, int n); +void vec_sinh(double* arr, int n); +void vec_cosh(double* arr, int n); +void vec_tanh(double* arr, int n); +void vec_asinh(double* arr, int n); +void vec_acosh(double* arr, int n); +void vec_atanh(double* arr, int n); +void vec_hypot(double* arr, double* b, int n); diff --git a/src/lib/public/vector.h b/src/lib/public/vector.h new file mode 100644 index 0000000..3c68c4b --- /dev/null +++ b/src/lib/public/vector.h @@ -0,0 +1,4 @@ +#pragma once + +double vec_dot(double* a, double* b, int n); +void vec_matmul(double* a, double* b, double* c, int n, int m, int p); diff --git a/src/nativeMain/cinterops/jni.def b/src/nativeMain/cinterops/jni.def new file mode 100644 index 0000000..8e21ac8 --- /dev/null +++ b/src/nativeMain/cinterops/jni.def @@ -0,0 +1,2 @@ +headers = jni.h +package = jni diff --git a/src/nativeMain/cinterops/simd.def b/src/nativeMain/cinterops/simd.def new file mode 100644 index 0000000..0cb3b1c --- /dev/null +++ b/src/nativeMain/cinterops/simd.def @@ -0,0 +1,4 @@ +package = simd +headers = public/lib.h + +linkerOpts.linux_x64 = -Lbuild/cmake/simd/linuxX64 -lsimd diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.native.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.native.kt new file mode 100644 index 0000000..b223bd3 --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/NativeSpeedup.native.kt @@ -0,0 +1,434 @@ +package com.martmists.ndarray.simd + +import kotlinx.cinterop.addressOf +import kotlinx.cinterop.usePinned + +internal actual object NativeSpeedup { + actual fun vecAddVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_add_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecAddScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_add_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecSubVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_sub_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecSubScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_sub_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecMulVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_mul_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecMulScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_mul_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecDivVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_div_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecDivScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_div_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecNegate(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_negate(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAbs(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_abs(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAndVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_and_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecAndScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) { + a.usePinned { pinA -> + simd.vec_and_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecOrVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_or_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecOrScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) { + a.usePinned { pinA -> + simd.vec_or_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecXorVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_xor_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecXorScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) { + a.usePinned { pinA -> + simd.vec_xor_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecNot(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_not(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecLShiftVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_lshift_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecLShiftScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) { + a.usePinned { pinA -> + simd.vec_lshift_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecRShiftVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_rshift_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecRShiftScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Int) { + a.usePinned { pinA -> + simd.vec_rshift_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecEqVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_eq_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecEqScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_eq_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecNeqVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_neq_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecNeqScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_neq_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecLtVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_lt_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecLtScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_lt_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecGtVec(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_gt_vec(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecGtScalar(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_gt_scalar(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecSqrt(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_sqrt(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecPow(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_pow(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun veciPow(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_ipow(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecLog(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_log(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecLogBase(a: DoubleArray, aOffset: Int, aSize: Int, b: Double) { + a.usePinned { pinA -> + simd.vec_logbase(pinA.addressOf(aOffset), b, aSize) + } + } + + actual fun vecExp(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_exp(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecExpm1(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_expm1(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecLog1p(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_log1p(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecLog2(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_log2(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecLog10(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_log10(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecCopy(dest: DoubleArray, destOffset: Int, destSize: Int, src: DoubleArray, srcOffset: Int) { + dest.usePinned { pinDest -> + src.usePinned { pinSrc -> + simd.vec_copy(pinDest.addressOf(destOffset), pinSrc.addressOf(srcOffset), destSize) + } + } + } + + actual fun getSimdSize(): Int { + return simd.get_simd_size() + } + + actual fun vecSum(a: DoubleArray, aOffset: Int, aSize: Int): Double { + a.usePinned { pinA -> + return simd.vec_sum(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecMin(a: DoubleArray, aOffset: Int, aSize: Int): Double { + a.usePinned { pinA -> + return simd.vec_min(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecMax(a: DoubleArray, aOffset: Int, aSize: Int): Double { + a.usePinned { pinA -> + return simd.vec_max(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecProduct(a: DoubleArray, aOffset: Int, aSize: Int): Double { + a.usePinned { pinA -> + return simd.vec_prod(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecMean(a: DoubleArray, aOffset: Int, aSize: Int): Double { + a.usePinned { pinA -> + return simd.vec_mean(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecVariance(a: DoubleArray, aOffset: Int, aSize: Int): Double { + a.usePinned { pinA -> + return simd.vec_var(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecStdDev(a: DoubleArray, aOffset: Int, aSize: Int): Double { + a.usePinned { pinA -> + return simd.vec_std(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecCoerce(a: DoubleArray, aOffset: Int, aSize: Int, min: Double, max: Double) { + a.usePinned { pinA -> + simd.vec_coerce(pinA.addressOf(aOffset), aSize, min, max) + } + } + + actual fun vecSin(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_sin(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecCos(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_cos(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecTan(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_tan(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAsin(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_asin(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAcos(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_acos(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAtan(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_atan(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAtan2(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_atan2(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecSinh(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_sinh(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecCosh(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_cosh(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecTanh(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_tanh(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAsinh(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_asinh(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAcosh(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_acosh(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecAtanh(a: DoubleArray, aOffset: Int, aSize: Int) { + a.usePinned { pinA -> + simd.vec_atanh(pinA.addressOf(aOffset), aSize) + } + } + + actual fun vecHypot(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int) { + a.usePinned { pinA -> + b.usePinned { pinB -> + simd.vec_hypot(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecDot(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int): Double { + a.usePinned { pinA -> + b.usePinned { pinB -> + return simd.vec_dot(pinA.addressOf(aOffset), pinB.addressOf(bOffset), aSize) + } + } + } + + actual fun vecMatMul(a: DoubleArray, aOffset: Int, aSize: Int, b: DoubleArray, bOffset: Int, n: Int, m: Int, p: Int): DoubleArray { + val c = DoubleArray(m * p) + a.usePinned { pinA -> + b.usePinned { pinB -> + c.usePinned { pinC -> + simd.vec_matmul(pinA.addressOf(aOffset), pinB.addressOf(bOffset), pinC.addressOf(0), n, m, p) + } + } + } + return c + } +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/arithmetic.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/arithmetic.kt new file mode 100644 index 0000000..25c1455 --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/arithmetic.kt @@ -0,0 +1,139 @@ +package com.martmists.ndarray.simd.jni + +import jni.* +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAddVec") +fun jni_vec_add_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_add_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical !!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAddScalar") +fun jni_vec_add_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_add_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecSubVec") +fun jni_vec_sub_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_sub_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecSubScalar") +fun jni_vec_sub_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_sub_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecMulVec") +fun jni_vec_mul_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_mul_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical !!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecMulScalar") +fun jni_vec_mul_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_mul_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecDivVec") +fun jni_vec_div_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_div_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical !!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecDivScalar") +fun jni_vec_div_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_div_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecNegate") +fun jni_vec_negate(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_negate(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAbs") +fun jni_vec_abs(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_abs(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/bitwise.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/bitwise.kt new file mode 100644 index 0000000..dce8d3b --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/bitwise.kt @@ -0,0 +1,152 @@ +package com.martmists.ndarray.simd.jni + +import jni.* +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAndVec") +fun jni_vec_and_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_and_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAndScalar") +fun jni_vec_and_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_and_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecOrVec") +fun jni_vec_or_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_or_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecOrScalar") +fun jni_vec_or_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_or_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecXorVec") +fun jni_vec_xor_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_xor_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecXorScalar") +fun jni_vec_xor_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_xor_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecNot") +fun jni_vec_not(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_not(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLShiftVec") +fun jni_vec_lshift_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_lshift_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLShiftScalar") +fun jni_vec_lshift_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_lshift_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecRShiftVec") +fun jni_vec_rshift_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_rshift_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecRShiftScalar") +fun jni_vec_rshift_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_rshift_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/compare.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/compare.kt new file mode 100644 index 0000000..c7acd58 --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/compare.kt @@ -0,0 +1,113 @@ +package com.martmists.ndarray.simd.jni + +import jni.* +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecEqVec") +fun jni_vec_eq_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_eq_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecEqScalar") +fun jni_vec_eq_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_eq_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecNeqVec") +fun jni_vec_neq_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_neq_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecNeqScalar") +fun jni_vec_neq_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_neq_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecGtVec") +fun jni_vec_gt_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_gt_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecGtScalar") +fun jni_vec_gt_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_gt_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLtVec") +fun jni_vec_lt_vec(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_lt_vec(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLtScalar") +fun jni_vec_lt_scalar(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_lt_scalar(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/math.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/math.kt new file mode 100644 index 0000000..e672b3d --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/math.kt @@ -0,0 +1,125 @@ +package com.martmists.ndarray.simd.jni + +import jni.* +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecSqrt") +fun jni_vec_sqrt(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_sqrt(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecPow") +fun jni_vec_pow(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_pow(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_veciPow") +fun jni_veci_pow(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_ipow(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecExp") +fun jni_vec_exp(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_exp(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLog") +fun jni_vec_log(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_log(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLogBase") +fun jni_vec_log_base(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_logbase(refA, b, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecExpm1") +fun jni_vec_expm1(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_expm1(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLog1p") +fun jni_vec_log1p(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_log1p(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLog2") +fun jni_vec_log2(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_log2(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecLog10") +fun jni_vec_log10(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_log10(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/misc.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/misc.kt new file mode 100644 index 0000000..3f98b26 --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/misc.kt @@ -0,0 +1,29 @@ +package com.martmists.ndarray.simd.jni + +import jni.JNIEnvVar +import jni.jdoubleArray +import jni.jint +import jni.jobject +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecCopy") +fun jni_vec_copy(env: CPointer, thisObject: jobject, dest: jdoubleArray, destOffset: jint, destSize: jint, src: jdoubleArray, srcOffset: jint) { + val size = env.pointed.pointed!!.GetArrayLength!!.invoke(env, src) + memScoped { + val arrSrc = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, src, null)!!.reinterpret() + val refSrc = interpretCPointer(arrSrc.rawValue + srcOffset * sizeOf()) + val arrDest = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, dest, null)!!.reinterpret() + val refDest = interpretCPointer(arrDest.rawValue + destOffset * sizeOf()) + + vec_copy(refSrc, refDest, destSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, src, arrSrc, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, dest, arrDest, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_getSimdSize") +fun jni_get_simd_size(env: CPointer, thisObject: jobject): jint { + return get_simd_size().convert() +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/procedure.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/procedure.kt new file mode 100644 index 0000000..d44a1ee --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/procedure.kt @@ -0,0 +1,117 @@ +package com.martmists.ndarray.simd.jni + +import jni.* +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecSum") +fun jni_vec_sum(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint): Double { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_sum(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecMin") +fun jni_vec_min(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint): Double { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_min(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecMax") +fun jni_vec_max(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint): Double { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_max(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecProduct") +fun jni_vec_product(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint): Double { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_prod(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecMean") +fun jni_vec_mean(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint): Double { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_mean(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecVariance") +fun jni_vec_variance(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint): Double { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_var(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecStd") +fun jni_vec_std(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint): Double { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_std(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecCoerce") +fun jni_vec_coerce(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, min: jdouble, max: jdouble) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + val result = vec_coerce(refA, aSize, min, max) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + + return result + } +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/trigonometry.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/trigonometry.kt new file mode 100644 index 0000000..0c3b2b4 --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/trigonometry.kt @@ -0,0 +1,179 @@ +package com.martmists.ndarray.simd.jni + +import jni.* +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecSin") +fun jni_vec_sin(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_sin(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecCos") +fun jni_vec_cos(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_cos(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecTan") +fun jni_vec_tan(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_tan(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAsin") +fun jni_vec_asin(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_asin(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAcos") +fun jni_vec_acos(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_acos(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAtan") +fun jni_vec_atan(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_atan(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAtan2") +fun jni_vec_atan2(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_atan2(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecSinh") +fun jni_vec_sinh(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_sinh(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecCosh") +fun jni_vec_cosh(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_cosh(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecTanh") +fun jni_vec_tanh(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_tanh(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAsinh") +fun jni_vec_asinh(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_asinh(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAcosh") +fun jni_vec_acosh(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_acosh(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecAtanh") +fun jni_vec_atanh(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + + vec_atanh(refA, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecHypot") +fun jni_vec_hypot(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint) { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + vec_hypot(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + } +} diff --git a/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/vector.kt b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/vector.kt new file mode 100644 index 0000000..aa2be26 --- /dev/null +++ b/src/nativeMain/kotlin/com/martmists/ndarray/simd/jni/vector.kt @@ -0,0 +1,42 @@ +package com.martmists.ndarray.simd.jni + +import jni.* +import kotlinx.cinterop.* +import simd.* + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecDot") +fun jni_vec_dot(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint): jdouble { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + + val res = vec_dot(refA, refB, aSize) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical !!.invoke(env, b, arrB, 0) + + return res + } +} + +@CName("Java_com_martmists_ndarray_simd_NativeSpeedup_vecMatMul") +fun jni_vec_mat_mul(env: CPointer, thisObject: jobject, a: jdoubleArray, aOffset: jint, aSize: jint, b: jdoubleArray, bOffset: jint, m: jint, n: jint, p: jint): jdoubleArray? { + memScoped { + val arrA = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, a, null)!!.reinterpret() + val refA = interpretCPointer(arrA.rawValue + aOffset * sizeOf()) + val arrB = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, b, null)!!.reinterpret() + val refB = interpretCPointer(arrB.rawValue + bOffset * sizeOf()) + val c = env.pointed.pointed!!.NewDoubleArray!!.invoke(env, m * p) + val arrC = env.pointed.pointed!!.GetPrimitiveArrayCritical!!.invoke(env, c, null)!!.reinterpret() + + vec_matmul(refA, refB, arrC, m, n, p) + + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, a, arrA, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, b, arrB, 0) + env.pointed.pointed!!.ReleasePrimitiveArrayCritical!!.invoke(env, c, arrC, 0) + + return c + } +}