From 58841a0f02fee18ad989787c7b4a7d8f931e8486 Mon Sep 17 00:00:00 2001 From: Patrick Longa Date: Tue, 25 May 2021 21:46:27 -0700 Subject: [PATCH] Add SIKE software --- Python_script/script_security45nm.py | 1147 ++++++ README.md | 15 +- SIKE_sw/Makefile | 263 ++ SIKE_sw/README.md | 57 + SIKE_sw/Visual Studio/SIDH/SIDH.sln | 293 ++ SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj | 590 +++ .../SIDH/SIDHp377.vcxproj.filters | 81 + SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj | 587 +++ .../SIDH/SIDHp434.vcxproj.filters | 81 + SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj | 598 ++++ .../SIDH/SIDHp503.vcxproj.filters | 81 + SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj | 584 +++ .../SIDH/SIDHp546.vcxproj.filters | 81 + SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj | 577 +++ .../SIDH/SIDHp610.vcxproj.filters | 81 + SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj | 572 +++ .../SIDH/SIDHp697.vcxproj.filters | 81 + SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj | 598 ++++ .../SIDH/SIDHp751.vcxproj.filters | 81 + .../arith_tests/arith_tests-P377.vcxproj | 432 +++ .../arith_tests-P377.vcxproj.filters | 30 + .../arith_tests/arith_tests-P434.vcxproj | 432 +++ .../arith_tests-P434.vcxproj.filters | 30 + .../arith_tests/arith_tests-P503.vcxproj | 432 +++ .../arith_tests-P503.vcxproj.filters | 30 + .../arith_tests/arith_tests-P546.vcxproj | 432 +++ .../arith_tests-P546.vcxproj.filters | 30 + .../arith_tests/arith_tests-P610.vcxproj | 432 +++ .../arith_tests-P610.vcxproj.filters | 30 + .../arith_tests/arith_tests-P697.vcxproj | 432 +++ .../arith_tests-P697.vcxproj.filters | 30 + .../arith_tests/arith_tests-P751.vcxproj | 432 +++ .../arith_tests-P751.vcxproj.filters | 30 + .../kem_tests/test-SIKEp377.vcxproj | 487 +++ .../kem_tests/test-SIKEp377.vcxproj.filters | 33 + .../kem_tests/test-SIKEp434.vcxproj | 483 +++ .../kem_tests/test-SIKEp434.vcxproj.filters | 33 + .../kem_tests/test-SIKEp503.vcxproj | 483 +++ .../kem_tests/test-SIKEp503.vcxproj.filters | 33 + .../kem_tests/test-SIKEp546.vcxproj | 483 +++ .../kem_tests/test-SIKEp546.vcxproj.filters | 33 + .../kem_tests/test-SIKEp610.vcxproj | 487 +++ .../kem_tests/test-SIKEp610.vcxproj.filters | 33 + .../kem_tests/test-SIKEp697.vcxproj | 487 +++ .../kem_tests/test-SIKEp697.vcxproj.filters | 33 + .../kem_tests/test-SIKEp751.vcxproj | 487 +++ .../kem_tests/test-SIKEp751.vcxproj.filters | 33 + SIKE_sw/src/P377/AMD64/fp_x64.c | 439 +++ SIKE_sw/src/P377/AMD64/fp_x64_asm.S | 747 ++++ SIKE_sw/src/P377/P377.c | 114 + SIKE_sw/src/P377/P377_api.h | 112 + SIKE_sw/src/P377/P377_internal.h | 165 + SIKE_sw/src/P377/generic/fp_generic.c | 259 ++ SIKE_sw/src/P434/AMD64/fp_x64.c | 491 +++ SIKE_sw/src/P434/AMD64/fp_x64_asm.S | 1024 ++++++ SIKE_sw/src/P434/P434.c | 133 + SIKE_sw/src/P434/P434_api.h | 112 + SIKE_sw/src/P434/P434_internal.h | 175 + SIKE_sw/src/P434/generic/fp_generic.c | 259 ++ SIKE_sw/src/P503/AMD64/fp_x64.c | 572 +++ SIKE_sw/src/P503/AMD64/fp_x64_asm.S | 1824 ++++++++++ SIKE_sw/src/P503/P503.c | 138 + SIKE_sw/src/P503/P503_api.h | 112 + SIKE_sw/src/P503/P503_internal.h | 175 + SIKE_sw/src/P503/generic/fp_generic.c | 259 ++ SIKE_sw/src/P546/AMD64/fp_x64.c | 634 ++++ SIKE_sw/src/P546/AMD64/fp_x64_asm.S | 1353 +++++++ SIKE_sw/src/P546/P546.c | 135 + SIKE_sw/src/P546/P546_api.h | 112 + SIKE_sw/src/P546/P546_internal.h | 175 + SIKE_sw/src/P546/generic/fp_generic.c | 259 ++ SIKE_sw/src/P610/AMD64/fp_x64.c | 729 ++++ SIKE_sw/src/P610/AMD64/fp_x64_asm.S | 1314 +++++++ SIKE_sw/src/P610/P610.c | 140 + SIKE_sw/src/P610/P610_api.h | 112 + SIKE_sw/src/P610/P610_internal.h | 174 + SIKE_sw/src/P610/generic/fp_generic.c | 259 ++ SIKE_sw/src/P697/AMD64/fp_x64.c | 802 +++++ SIKE_sw/src/P697/AMD64/fp_x64_asm.S | 1681 +++++++++ SIKE_sw/src/P697/P697.c | 139 + SIKE_sw/src/P697/P697_api.h | 112 + SIKE_sw/src/P697/P697_internal.h | 175 + SIKE_sw/src/P697/generic/fp_generic.c | 259 ++ SIKE_sw/src/P751/AMD64/fp_x64.c | 910 +++++ SIKE_sw/src/P751/AMD64/fp_x64_asm.S | 3147 +++++++++++++++++ SIKE_sw/src/P751/P751.c | 142 + SIKE_sw/src/P751/P751_api.h | 112 + SIKE_sw/src/P751/P751_internal.h | 175 + SIKE_sw/src/P751/generic/fp_generic.c | 259 ++ SIKE_sw/src/config.h | 271 ++ SIKE_sw/src/ec_isogeny.c | 416 +++ SIKE_sw/src/fpx.c | 1103 ++++++ SIKE_sw/src/internal.h | 116 + SIKE_sw/src/random/random.c | 61 + SIKE_sw/src/random/random.h | 9 + SIKE_sw/src/sha3/fips202.c | 573 +++ SIKE_sw/src/sha3/fips202.h | 27 + SIKE_sw/src/sidh.c | 263 ++ SIKE_sw/src/sike.c | 98 + SIKE_sw/tests/arith_tests-p377.c | 616 ++++ SIKE_sw/tests/arith_tests-p434.c | 616 ++++ SIKE_sw/tests/arith_tests-p503.c | 616 ++++ SIKE_sw/tests/arith_tests-p546.c | 616 ++++ SIKE_sw/tests/arith_tests-p610.c | 617 ++++ SIKE_sw/tests/arith_tests-p697.c | 616 ++++ SIKE_sw/tests/arith_tests-p751.c | 617 ++++ SIKE_sw/tests/test_SIKEp377.c | 17 + SIKE_sw/tests/test_SIKEp434.c | 19 + SIKE_sw/tests/test_SIKEp503.c | 19 + SIKE_sw/tests/test_SIKEp546.c | 17 + SIKE_sw/tests/test_SIKEp610.c | 19 + SIKE_sw/tests/test_SIKEp697.c | 17 + SIKE_sw/tests/test_SIKEp751.c | 19 + SIKE_sw/tests/test_extras.c | 283 ++ SIKE_sw/tests/test_extras.h | 76 + SIKE_sw/tests/test_sike.c | 132 + .../Montgomery_multiplier_add.v | 0 .../Montgomery_multiplier_sub.v | 0 .../Montgomery_multiplier_tb/.gitignore | 0 .../Montgomery_multiplier_tb/Makefile | 0 .../Montgomery_multiplier_tb.v | 0 .../gen_test_add.sage | 0 .../gen_test_sub.sage | 0 .../README | 0 .../Vivado/.gitignore | 0 .../Vivado/Makefile | 0 .../Vivado/board.tcl | 0 .../Vivado/board.xdc | 0 .../Vivado/program.tcl | 0 .../Vivado/proj.src | 0 .../multiplier.v | 0 .../step_add.v | 0 .../step_sub.v | 0 {src => SIKE_vOW_hw-sw}/hardware/README | 0 .../hardware/controller_eval_4_isog/README | 0 .../controller_eval_4_isog/Vivado/.gitignore | 0 .../controller_eval_4_isog/Vivado/Makefile | 0 .../Vivado/batch-synth.sh | 0 .../controller_eval_4_isog/Vivado/board.tcl | 0 .../controller_eval_4_isog/Vivado/board.xdc | 0 .../controller_eval_4_isog/Vivado/params.mk | 0 .../controller_eval_4_isog/Vivado/program.tcl | 0 .../controller_eval_4_isog/Vivado/proj.src | 0 .../controller_eval_4_isog/controller.v | 0 .../controller_tb/.gitignore | 0 .../controller_tb/Makefile | 0 .../controller_tb/batch-sim.sh | 0 .../controller_tb/controller_tb.v | 0 .../controller_tb/gen_test.sage | 0 .../controller_eval_4_isog/eval_4_isog_FSM.v | 0 .../single_to_double_memory_wrapper.v | 0 .../hardware/controller_get_4_isog/README | 0 .../controller_get_4_isog/Vivado/.gitignore | 0 .../controller_get_4_isog/Vivado/Makefile | 0 .../Vivado/batch-synth.sh | 0 .../controller_get_4_isog/Vivado/board.tcl | 0 .../controller_get_4_isog/Vivado/board.xdc | 0 .../controller_get_4_isog/Vivado/params.mk | 0 .../controller_get_4_isog/Vivado/program.tcl | 0 .../controller_get_4_isog/Vivado/proj.src | 0 .../controller_get_4_isog/controller.v | 0 .../controller_tb/.gitignore | 0 .../controller_tb/Makefile | 0 .../controller_tb/batch-sim.sh | 0 .../controller_tb/controller_tb.v | 0 .../controller_tb/gen_test.sage | 0 .../double_to_single_memory_wrapper.v | 0 .../controller_get_4_isog/get_4_isog_FSM.v | 0 .../hardware/controller_xADD/README | 0 .../hardware/controller_xADD/controller.v | 0 .../controller_xADD/controller_tb/Makefile | 0 .../controller_tb/batch-sim.sh | 0 .../controller_tb/controller_tb.v | 0 .../controller_tb/gen_test.sage | 0 .../double_to_single_memory_wrapper.v | 0 .../hardware/controller_xADD/xADD_FSM.v | 0 .../hardware/controller_xDBL/README | 0 .../controller_xDBL/Vivado/.gitignore | 0 .../hardware/controller_xDBL/Vivado/Makefile | 0 .../controller_xDBL/Vivado/batch-synth.sh | 0 .../hardware/controller_xDBL/Vivado/board.tcl | 0 .../hardware/controller_xDBL/Vivado/board.xdc | 0 .../hardware/controller_xDBL/Vivado/params.mk | 0 .../controller_xDBL/Vivado/program.tcl | 0 .../hardware/controller_xDBL/Vivado/proj.src | 0 .../hardware/controller_xDBL/controller.v | 0 .../controller_xDBL/controller_tb/.gitignore | 0 .../controller_xDBL/controller_tb/Makefile | 0 .../controller_tb/batch-sim.sh | 0 .../controller_tb/controller_tb.v | 0 .../controller_tb/gen_test.sage | 0 .../double_to_single_memory_wrapper.v | 0 .../hardware/controller_xDBL/xDBL_FSM.v | 0 .../README | 0 .../Vivado/.gitignore | 0 .../Vivado/Makefile | 0 .../Vivado/batch-synth.sh | 0 .../Vivado/board.tcl | 0 .../Vivado/board.xdc | 0 .../Vivado/program.tcl | 0 .../Vivado/proj.src | 0 .../controller.v | 0 .../controller_tb/.gitignore | 0 .../controller_tb/Makefile | 0 .../controller_tb/batch-sim.sh | 0 .../controller_tb/controller_tb.v | 0 .../single_to_double_memory_wrapper.v | 0 .../fp2_mont_mul_one_cycle_pipeline/README | 0 .../Vivado/.gitignore | 0 .../Vivado/Makefile | 0 .../Vivado/batch-synth.sh | 0 .../Vivado/board.tcl | 0 .../Vivado/board.xdc | 0 .../Vivado/params.mk | 0 .../Vivado/program.tcl | 0 .../Vivado/proj.src | 0 .../fp2_mont_mul.v | 0 .../fp2_mont_mul_tb/.gitignore | 0 .../fp2_mont_mul_tb/Makefile | 0 .../fp2_mont_mul_tb/batch-sim.sh | 0 .../fp2_mont_mul_tb/fp2_mont_mul_tb.v | 0 .../fp2_mont_mul_tb/gen_input.sage | 0 .../fp2_sub_add_correction/.gitignore | 0 .../hardware/fp2_sub_add_correction/README | 0 .../fp2_sub_add_correction/Vivado/.gitignore | 0 .../fp2_sub_add_correction/Vivado/Makefile | 0 .../Vivado/batch-synth.sh | 0 .../fp2_sub_add_correction/Vivado/board.tcl | 0 .../fp2_sub_add_correction/Vivado/board.xdc | 0 .../fp2_sub_add_correction/Vivado/params.mk | 0 .../fp2_sub_add_correction/Vivado/program.tcl | 0 .../fp2_sub_add_correction/Vivado/proj.src | 0 .../fp2_sub_add_correction.v | 0 .../fp2_sub_add_correction_tb/.gitignore | 0 .../fp2_sub_add_correction_tb/Makefile | 0 .../fp2_sub_add_correction_tb/batch-sim.sh | 0 .../fp2_sub_add_correction_tb.v | 0 .../fp2_sub_add_correction_tb/gen_test.sage | 0 .../hardware/fp2_sub_add_correction/gen.mk | 0 .../gen_serial_comparator.py | 0 .../hardware/fp_sub_and_add/README | 0 .../fp_sub_and_add/fp_add_and_compare.v | 0 .../hardware/fp_sub_and_add/fp_adder.v | 0 .../fp_sub_and_add/fp_sub_and_compare.v | 0 .../hardware/fp_sub_and_add/gen.mk | 0 .../fp_sub_and_add/gen_serial_comparator.py | 0 .../hardware/fp_sub_and_add/unit_adder.v | 0 .../hardware/top_controller/README | 0 .../hardware/top_controller/Vivado/.gitignore | 0 .../hardware/top_controller/Vivado/Makefile | 0 .../top_controller/Vivado/batch-synth.sh | 0 .../hardware/top_controller/Vivado/board.tcl | 0 .../hardware/top_controller/Vivado/board.xdc | 0 .../hardware/top_controller/Vivado/gen.mk | 0 .../top_controller/Vivado/gen_p_mem.sage | 0 .../top_controller/Vivado/program.tcl | 0 .../hardware/top_controller/Vivado/proj.src | 0 .../top_controller/gen_mem_wrapper.py | 0 .../top_controller/opt/top_controller.v | 0 .../hardware/top_controller/tb/.gitignore | 0 .../hardware/top_controller/tb/Makefile | 0 .../hardware/top_controller/tb/batch-sim.sh | 0 .../hardware/top_controller/tb/gen_test.sage | 0 .../hardware/top_controller/tb/top_tb.v | 0 {src => SIKE_vOW_hw-sw}/hardware/util/clog2.v | 0 {src => SIKE_vOW_hw-sw}/hardware/util/delay.v | 0 .../hardware/util/single_port_mem.v | 0 {src => SIKE_vOW_hw-sw}/murax/README | 0 .../murax/software/README.md | 0 .../software/VexRiscvSocSoftware/README.md | 0 .../software/VexRiscvSocSoftware/libs/gpio.h | 0 .../software/VexRiscvSocSoftware/libs/hex.h | 0 .../VexRiscvSocSoftware/libs/interrupt.h | 0 .../VexRiscvSocSoftware/libs/prescaler.h | 0 .../software/VexRiscvSocSoftware/libs/timer.h | 0 .../software/VexRiscvSocSoftware/libs/uart.h | 0 .../software/VexRiscvSocSoftware/libs/vga.h | 0 .../projects/murax/hex/cmd.gbd | 0 .../projects/murax/hex/makefile | 0 .../projects/murax/hex/src/crt.S | 0 .../projects/murax/hex/src/main.c | 0 .../projects/murax/libs/linker.ld | 0 .../projects/murax/libs/makefile | 0 .../projects/murax/libs/murax.h | 0 .../projects/murax/libs/murax_hex.h | 0 285 files changed, 41593 insertions(+), 5 deletions(-) create mode 100644 Python_script/script_security45nm.py create mode 100644 SIKE_sw/Makefile create mode 100644 SIKE_sw/README.md create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDH.sln create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters create mode 100644 SIKE_sw/src/P377/AMD64/fp_x64.c create mode 100644 SIKE_sw/src/P377/AMD64/fp_x64_asm.S create mode 100644 SIKE_sw/src/P377/P377.c create mode 100644 SIKE_sw/src/P377/P377_api.h create mode 100644 SIKE_sw/src/P377/P377_internal.h create mode 100644 SIKE_sw/src/P377/generic/fp_generic.c create mode 100644 SIKE_sw/src/P434/AMD64/fp_x64.c create mode 100644 SIKE_sw/src/P434/AMD64/fp_x64_asm.S create mode 100644 SIKE_sw/src/P434/P434.c create mode 100644 SIKE_sw/src/P434/P434_api.h create mode 100644 SIKE_sw/src/P434/P434_internal.h create mode 100644 SIKE_sw/src/P434/generic/fp_generic.c create mode 100644 SIKE_sw/src/P503/AMD64/fp_x64.c create mode 100644 SIKE_sw/src/P503/AMD64/fp_x64_asm.S create mode 100644 SIKE_sw/src/P503/P503.c create mode 100644 SIKE_sw/src/P503/P503_api.h create mode 100644 SIKE_sw/src/P503/P503_internal.h create mode 100644 SIKE_sw/src/P503/generic/fp_generic.c create mode 100644 SIKE_sw/src/P546/AMD64/fp_x64.c create mode 100644 SIKE_sw/src/P546/AMD64/fp_x64_asm.S create mode 100644 SIKE_sw/src/P546/P546.c create mode 100644 SIKE_sw/src/P546/P546_api.h create mode 100644 SIKE_sw/src/P546/P546_internal.h create mode 100644 SIKE_sw/src/P546/generic/fp_generic.c create mode 100644 SIKE_sw/src/P610/AMD64/fp_x64.c create mode 100644 SIKE_sw/src/P610/AMD64/fp_x64_asm.S create mode 100644 SIKE_sw/src/P610/P610.c create mode 100644 SIKE_sw/src/P610/P610_api.h create mode 100644 SIKE_sw/src/P610/P610_internal.h create mode 100644 SIKE_sw/src/P610/generic/fp_generic.c create mode 100644 SIKE_sw/src/P697/AMD64/fp_x64.c create mode 100644 SIKE_sw/src/P697/AMD64/fp_x64_asm.S create mode 100644 SIKE_sw/src/P697/P697.c create mode 100644 SIKE_sw/src/P697/P697_api.h create mode 100644 SIKE_sw/src/P697/P697_internal.h create mode 100644 SIKE_sw/src/P697/generic/fp_generic.c create mode 100644 SIKE_sw/src/P751/AMD64/fp_x64.c create mode 100644 SIKE_sw/src/P751/AMD64/fp_x64_asm.S create mode 100644 SIKE_sw/src/P751/P751.c create mode 100644 SIKE_sw/src/P751/P751_api.h create mode 100644 SIKE_sw/src/P751/P751_internal.h create mode 100644 SIKE_sw/src/P751/generic/fp_generic.c create mode 100644 SIKE_sw/src/config.h create mode 100644 SIKE_sw/src/ec_isogeny.c create mode 100644 SIKE_sw/src/fpx.c create mode 100644 SIKE_sw/src/internal.h create mode 100644 SIKE_sw/src/random/random.c create mode 100644 SIKE_sw/src/random/random.h create mode 100644 SIKE_sw/src/sha3/fips202.c create mode 100644 SIKE_sw/src/sha3/fips202.h create mode 100644 SIKE_sw/src/sidh.c create mode 100644 SIKE_sw/src/sike.c create mode 100644 SIKE_sw/tests/arith_tests-p377.c create mode 100644 SIKE_sw/tests/arith_tests-p434.c create mode 100644 SIKE_sw/tests/arith_tests-p503.c create mode 100644 SIKE_sw/tests/arith_tests-p546.c create mode 100644 SIKE_sw/tests/arith_tests-p610.c create mode 100644 SIKE_sw/tests/arith_tests-p697.c create mode 100644 SIKE_sw/tests/arith_tests-p751.c create mode 100644 SIKE_sw/tests/test_SIKEp377.c create mode 100644 SIKE_sw/tests/test_SIKEp434.c create mode 100644 SIKE_sw/tests/test_SIKEp503.c create mode 100644 SIKE_sw/tests/test_SIKEp546.c create mode 100644 SIKE_sw/tests/test_SIKEp610.c create mode 100644 SIKE_sw/tests/test_SIKEp697.c create mode 100644 SIKE_sw/tests/test_SIKEp751.c create mode 100644 SIKE_sw/tests/test_extras.c create mode 100644 SIKE_sw/tests/test_extras.h create mode 100644 SIKE_sw/tests/test_sike.c rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/batch-synth.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/params.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/controller_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/gen_test.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/eval_4_isog_FSM.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/batch-synth.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/params.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/controller_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/gen_test.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/get_4_isog_FSM.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/controller_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/gen_test.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/double_to_single_memory_wrapper.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/xADD_FSM.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/batch-synth.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/params.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/controller_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/gen_test.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/double_to_single_memory_wrapper.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/xDBL_FSM.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/params.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/gen.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/gen_serial_comparator.py (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/fp_add_and_compare.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/fp_adder.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/fp_sub_and_compare.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/gen.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/gen_serial_comparator.py (100%) rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/unit_adder.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/README (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/batch-synth.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/board.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/board.xdc (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/gen.mk (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/gen_p_mem.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/program.tcl (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/proj.src (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/gen_mem_wrapper.py (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/opt/top_controller.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/.gitignore (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/Makefile (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/batch-sim.sh (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/gen_test.sage (100%) rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/top_tb.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/util/clog2.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/util/delay.v (100%) rename {src => SIKE_vOW_hw-sw}/hardware/util/single_port_mem.v (100%) rename {src => SIKE_vOW_hw-sw}/murax/README (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/README.md (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/README.md (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/gpio.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/hex.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/interrupt.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/prescaler.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/timer.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/uart.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/vga.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h (100%) rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h (100%) diff --git a/Python_script/script_security45nm.py b/Python_script/script_security45nm.py new file mode 100644 index 0000000..75dd20a --- /dev/null +++ b/Python_script/script_security45nm.py @@ -0,0 +1,1147 @@ +##################################################################################################################### +# Python3 script to calculate security estimates using a budget-based cost model on ASICs +# Targeted primitives: SIKE, AES and SHA-3 +# Technology used by the hardware implementations used in the model: NanGate 45nm open-cell library +# +# The script produces all the figures and security estimates included in the paper: +# "The Cost to Break SIKE: A Comparative Hardware-Based Analysis with AES and SHA-3", +# Patrick Longa, Wen Wang, Jakub Szefer. CRYPTO 2021 +# https://eprint.iacr.org/2020/1457 +##################################################################################################################### + +import math +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import MultipleLocator + +# Assumptions and constants +NumberTransPerGate = 4 # Number of transistors per 2-NAND CMOS Gate Equivalent (GE) +SecondsPerYear = 3600*24*365 # Seconds in a year +MoneyOptions = [1e6, 10e6, 100e6, 1000e6, 10000e6, 100000e6, 1000000e6] # One million, ten million, hundred million, one billion, ten billion, hundred billion, one trillion (in US$) +titlefigure = "on" +dividepricebyfactor = "on" # Reduction factor applied to the transistor and memory release prices. +reductionpricefactor = 7.40 # This factor is obtained using the estimated transistor cost at production for year 2020 (reference: Khan and Mann (2020)) + # In contrast to release prices, the adjusted prices are expected to match more closely production costs in bulk. + +############################################################################################ +#### Historical prices of memory and transistors/gates (see paper for references) + +# Hard drive disk (HDD) cost US$, years 2000-2020 +CostHDD = [125.00, 259.00, 146.00, 89.99, 97.50, 130.00, 69.99, 99.99, 99.99, 69.99, 89.99, 54.99, 54.99, 54.99, 104.99, 84.99, 221.63, 99.99, 93.49, 149.99, 129.99] +# Hard drive disk (HDD) bytes, years 2000-2020 +BytesHDD = [3.07e10, 1e11, 1.2e11, 1.2e11, 1.6e11, 3.2e11, 3.2e11, 5.0e11, 1.0e12, 1.0e12, 2.0e12, 1.5e12, 1.5e12, 1.5e12, 3.0e12, 3.0e12, 8.0e12, 4.0e12, 4.0e12, 8.0e12, 8.0e12] + +# Dynamic random-access memory (RAM) cost US$, years 2000-2020 +CostDRAM = [89.00, 18.89, 34.19, 39.00, 39.00, 39.00, 148.99, 49.95, 39.99, 39.99, 39.99, 41.99, 29.99, 29.99, 29.99, 29.99, 44.99, 44.99, 44.99, 44.99, 44.99] +# Dynamic random-access memory (RAM) bytes, years 2000-2020 +BytesDRAM = [1.31e8, 1.31e8, 2.62e8, 5.24e8, 5.24e8, 5.24e8, 20.97e8, 20.97e8, 41.94e8, 41.94e8, 41.94e8, 83.89e8, 83.89e8, 83.89e8, 83.89e8, 83.89e8, 167.77e8, 167.77e8, 167.77e8, 167.77e8, 167.77e8] + +# Solid state drive (SSD) cost US$, years 2000-2020 +CostSSD = [None, None, None, None, None, None, None, None, None, None, None, None, None, 159.99, 179.99, 59.99, 194.99, 194.99, 49.99, 75.99, 75.99] +# Solid state drive (SSD) bytes, years 2000-2020 +BytesSSD = [None, None, None, None, None, None, None, None, None, None, None, None, None, 2.56e11, 4.80e11, 2.40e11, 9.60e11, 9.60e11, 4.80e11, 9.60e11, 9.60e11] + +# MPU cost US$ (Intel), years 2000-2020 +CostMPU_Intel = [112.0, 64.0, 33.0, 33.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 70.0, 42.0, 117.0, 122.0, 42.0, 42.0, None, None, None, None, None ] +# MPU cost US$ (AMD), years 2000-2020 +CostMPU_AMD = [None, None, None, None, None, None, None, None, None, None, None, 79.0, 71.0, 71.0, 101.0, 79.0, 58.0, 51.0, 51.0, 51.0, 60.0] +# Intel and AMD MPU costs US$, years 2000-2020 (corresponding to the lowest cost per transistor per year) +CostMPU = [112.0, 64.0, 33.0, 33.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 70.0, 79.0, 71.0, 71.0, 42.0, 42.0, 58.0, 51.0, 51.0, 51.0, 60.0] + +# MPU transistors (Intel), years 2000-2020 +TransMPU_Intel = [28.1e6, 28.1e6, 55e6, 55e6, 125e6, 125e6, 125e6, 125e6, 125e6, 125e6, 382e6, 624e6, 1400e6, 1400e6, 1400e6, 1400e6, None, None, None, None, None] +# MPU transistors (AMD), years 2000-2020 +TransMPU_AMD = [None, None, None, None, None, None, None, None, None, None, None, 1178e6, 1303e6, 1303e6, 2410e6, 2410e6, 3100e6, 3100e6, 3100e6, 3100e6, 4940e6] +# Intel and AMD MPU transistors, years 2000-2020 (corresponding to lowest cost per transistor per year) +TransMPU = [28.1e6, 28.1e6, 55e6, 55e6, 125e6, 125e6, 125e6, 125e6, 125e6, 125e6, 382e6, 1178e6, 1303e6, 1303e6, 1400e6, 1400e6, 3100e6, 3100e6, 3100e6, 3100e6, 4940e6] + +DollarsPerByte_HDD = [] +BytesPerDollar_HDD = [] +DollarsPerByte_DRAM = [] +BytesPerDollar_DRAM = [] +DollarsPerByte_SSD = [] +BytesPerDollar_SSD = [] +DollarsPerTrans_MPU = [] +TransPerDollar_MPU = [] +DollarsPerGate_MPU = [] +GatesPerDollar_MPU = [] +BytesPerGate = [] +for i in range(0,21): + DollarsPerByte_HDD.append(CostHDD[i]/BytesHDD[i]) + BytesPerDollar_HDD.append(1/DollarsPerByte_HDD[i]) + DollarsPerByte_DRAM.append(CostDRAM[i]/BytesDRAM[i]) + BytesPerDollar_DRAM.append(1/DollarsPerByte_DRAM[i]) + if CostSSD[i] == None: + DollarsPerByte_SSD.append(None) + BytesPerDollar_SSD.append(None) + else: + DollarsPerByte_SSD.append(CostSSD[i]/BytesSSD[i]) + BytesPerDollar_SSD.append(1/DollarsPerByte_SSD[i]) + DollarsPerTrans_MPU.append(CostMPU[i]/TransMPU[i]) + TransPerDollar_MPU.append(TransMPU[i]/CostMPU[i]) + DollarsPerGate_MPU.append((CostMPU[i]*NumberTransPerGate)/TransMPU[i]) + GatesPerDollar_MPU.append(1/DollarsPerGate_MPU[i]) + if dividepricebyfactor == 'on': + BytesPerDollar_HDD[i] *= reductionpricefactor + GatesPerDollar_MPU[i] *= reductionpricefactor + BytesPerDollar_DRAM[i] *= reductionpricefactor + if BytesPerDollar_SSD[i] != None: BytesPerDollar_SSD[i] *= reductionpricefactor + BytesPerGate.append(BytesPerDollar_HDD[i]/GatesPerDollar_MPU[i]) + +# Linley Group report (ITRS 2014) with costs of transistors, years 2002-2014 (every two years) +GatesPerDollar_Linley = [None, None, 2.6e6/NumberTransPerGate, None, 4.4e6/NumberTransPerGate, None, 7.3e6/NumberTransPerGate, None, 11.2e6/NumberTransPerGate, + None, 16.0e6/NumberTransPerGate, None, 20.0e6/NumberTransPerGate, None, 20.0e6/NumberTransPerGate, 19.0e6/NumberTransPerGate, None, None, None, None, None] + +# ITRS 2007 forecast for costs of transistors, years 2002-2014 (every two years) +GatesPerDollar_ITRS = [None, 1/(9.7e-7*NumberTransPerGate), 1/(6.9e-7*NumberTransPerGate), 1/(4.9e-7*NumberTransPerGate), 1/(3.4e-7*NumberTransPerGate), + 1/(2.44e-7*NumberTransPerGate), 1/(1.72e-7*NumberTransPerGate), 1/(1.22e-7*NumberTransPerGate), 1/(8.6e-8*NumberTransPerGate), + 1/(6.1e-8*NumberTransPerGate), 1/(4.3e-8*NumberTransPerGate), 1/(3.0e-8*NumberTransPerGate), 1/(2.2e-8*NumberTransPerGate), + 1/(1.5e-8*NumberTransPerGate), 1/(1.1e-8*NumberTransPerGate), 1/(7.6e-9*NumberTransPerGate), 1/(5.4e-9*NumberTransPerGate), + 1/(3.8e-9*NumberTransPerGate), 1/(2.7e-9*NumberTransPerGate), 1/(1.9e-9*NumberTransPerGate), 1/(1.3e-9*NumberTransPerGate)] + +print (BytesPerDollar_HDD) +print (BytesPerDollar_DRAM) +print (BytesPerDollar_SSD) +print (TransPerDollar_MPU) +print (GatesPerDollar_MPU) +print (BytesPerGate) + +######################################################################################################### +#### "Optimistic" projections for prices of memory and transistors/gates, years 2025-2040, every 5 years. +#### Based on a constant rate in cost reduction derived from data between years 2015 and 2020 +#### For memory (HDD): reduction factor = BytesPerDollar_SSD[20] / BytesPerDollar_SSD[15] +#### For gates (MPU): reduction factor = GatesPerDollar_MPU[20] / GatesPerDollar_MPU[15] + +memrate = BytesPerDollar_SSD[20] / BytesPerDollar_SSD[15] +transrate = GatesPerDollar_MPU[20] / GatesPerDollar_MPU[15] + +ProjBytesPerDollar_HDD = [BytesPerDollar_HDD[0], BytesPerDollar_HDD[5], BytesPerDollar_HDD[10], BytesPerDollar_HDD[15], BytesPerDollar_HDD[20], + BytesPerDollar_HDD[20]*memrate, BytesPerDollar_HDD[20]*memrate**2, BytesPerDollar_HDD[20]*memrate**3, BytesPerDollar_HDD[20]*memrate**4] + +ProjGatesPerDollar_MPU = [GatesPerDollar_MPU[0], GatesPerDollar_MPU[5], GatesPerDollar_MPU[10], GatesPerDollar_MPU[15], GatesPerDollar_MPU[20], + GatesPerDollar_MPU[20]*transrate, GatesPerDollar_MPU[20]*transrate**2, GatesPerDollar_MPU[20]*transrate**3, GatesPerDollar_MPU[20]*transrate**4] + +############################################################################################ +#### AES security estimator + +def AES_estimator(version, AESgates, AEStime, YearIndex, Money, BytesPerDollar_HDD, GatesPerDollar_MPU): + N=2**version # Number of AES operations (search space) + AESperYear=SecondsPerYear/AEStime # Number of AES operations per year per key-search engine + bytesIO=version/8 # Number of bytes to represent input and outputs + + p=Money*GatesPerDollar_MPU[YearIndex]/AESgates # Number of key-search engines I can buy + w=p*(2*bytesIO + bytesIO) # Required storage: two input buffers and one output buffer per engine + + if w*GatesPerDollar_MPU[YearIndex] > p*BytesPerDollar_HDD[YearIndex]*AESgates/8: # Check that cost of memory is relatively small + return 'failed', 0, 0, 0 + LogMemBytes = math.log2(w) + LogEngUnits = math.log2(p) + LogYears = math.log2(N/(p * AESperYear)) + + return 'passed', LogYears, LogMemBytes, LogEngUnits + +############################################################################################ +#### AES128 + +version = 128 # AES128 +AESgates = 11587 # Number of GEs occupied by Ueno et al.'s AES128 implementation +node = 45 # 45nm +NISTgates=2**15 # AES gate complexity according to NIST + +if version == 128: + AEStime = (13.97e-9 * 10/11) # InvThroughput of AES encryption implementation by Ueno et al. on 45nm +elif version == 192: + AEStime = (17.16e-9 * 12/13) +elif version == 256: + AEStime = (19.35e-9 * 14/15) + +print ("\nAES" +repr(version)) +print ("-------------------") +print ("\nSerial key-search (NIST):") +N=2**version +p=1 # Processor use +print ("N * AES in gates: 2 ^", math.log2((N * NISTgates) * p)) + +print ("\nParallel key-search, 45nm, based on Ueno et al.'s implementation:") +print ("Ueno et al. 2020 using 45nm: throughput of 13.97 * 10/11 = 12.7nsec/AES128 encryption, area of 11,587 GE") +print ("N * AES in seconds: 2 ^", math.log2(N * AEStime), "\n") + +YearsAES128 = [[None for i in range(21)] for j in range(7)] +MemBytesAES128 = [[None for i in range(21)] for j in range(7)] +EngUnitsAES128 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("AES128: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + if CostMPU[YearIndex] != None: + test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], BytesPerDollar_HDD, GatesPerDollar_MPU) + if test == 'passed': + YearsAES128[k][YearIndex] = LogYears + MemBytesAES128[k][YearIndex] = LogMemBytes + EngUnitsAES128[k][YearIndex] = LogEngUnits + else: + print ("ERROR: memory is not negligible") + print ("Log(years):", YearsAES128[k]); print ("Log(memory bytes):", MemBytesAES128[k]); print ("Log(engine units):", EngUnitsAES128[k], "\n") + +ProjYearsAES128 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesAES128 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsAES128 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("AES128 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + if CostMPU[YearIndex] != None: + test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if test == 'passed': + ProjYearsAES128[k][YearIndex] = LogYears + ProjMemBytesAES128[k][YearIndex] = LogMemBytes + ProjEngUnitsAES128[k][YearIndex] = LogEngUnits + else: + print ("ERROR: memory is not negligible") + print ("Log(years):", ProjYearsAES128[k]); print ("Log(memory bytes):", ProjMemBytesAES128[k]); print ("Log(engine units):", ProjEngUnitsAES128[k], "\n") + +############################################################################################ +#### AES192 + +version = 192 # AES192 +AESgates = 13319 # Number of GEs occupied by Ueno et al.'s AES192 implementation +node = 45 # 45nm +NISTgates=2**15 # AES gate complexity according to NIST + +if version == 128: + AEStime = (13.97e-9 * 10/11) +elif version == 192: + AEStime = (17.16e-9 * 12/13) # InvThroughput of AES encryption implementation by Ueno et al. on 45nm +elif version == 256: + AEStime = (19.35e-9 * 14/15) + +print ("\nAES" +repr(version)) +print ("-------------------") +print ("\nSerial key-search (NIST):") +N=2**version +p=1 # Processor use +print ("N * AES in gates: 2 ^", math.log2((N * NISTgates) * p)) + +print ("\nParallel key-search, 45nm, based on Ueno et al.'s implementation:") +print ("Ueno et al. 2020 using 45nm: throughput of 17.16 * 12/13 = 15.84nsec/AES192 encryption, area of 13,319 GE") +print ("N * AES in seconds: 2 ^", math.log2(N * AEStime), "\n") + +YearsAES192 = [[None for i in range(21)] for j in range(7)] +MemBytesAES192 = [[None for i in range(21)] for j in range(7)] +EngUnitsAES192 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("AES192: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + if CostMPU[YearIndex] != None: + test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], BytesPerDollar_HDD, GatesPerDollar_MPU) + if test == 'passed': + YearsAES192[k][YearIndex] = LogYears + MemBytesAES192[k][YearIndex] = LogMemBytes + EngUnitsAES192[k][YearIndex] = LogEngUnits + else: + print ("ERROR: memory is not negligible") + print ("Log(years):", YearsAES192[k]); print ("Log(memory bytes):", MemBytesAES192[k]); print ("Log(engine units):", EngUnitsAES192[k], "\n") + +ProjYearsAES192 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesAES192 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsAES192 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("AES192 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + if CostMPU[YearIndex] != None: + test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if test == 'passed': + ProjYearsAES192[k][YearIndex] = LogYears + ProjMemBytesAES192[k][YearIndex] = LogMemBytes + ProjEngUnitsAES192[k][YearIndex] = LogEngUnits + else: + print ("ERROR: memory is not negligible") + print ("Log(years):", ProjYearsAES192[k]); print ("Log(memory bytes):", ProjMemBytesAES192[k]); print ("Log(engine units):", ProjEngUnitsAES192[k], "\n") + +############################################################################################ +#### AES256 + +version = 256 # AES256 +AESgates = 13974 # Number of GEs occupied by Ueno et al.'s AES256 implementation +node = 45 # 45nm +NISTgates=2**16 # AES gate complexity according to NIST + +if version == 128: + AEStime = (13.97e-9 * 10/11) +elif version == 192: + AEStime = (17.16e-9 * 12/13) +elif version == 256: + AEStime = (19.35e-9 * 14/15) # InvThroughput of AES encryption implementation by Ueno et al. on 45nm + +print ("\nAES" +repr(version)) +print ("-------------------") +print ("\nSerial key-search (NIST):") +N=2**version +p=1 # Processor use +print ("N * AES in gates: 2 ^", math.log2((N * NISTgates) * p)) + +print ("\nParallel key-search, 45nm, based on Ueno et al.'s implementation:") +print ("Ueno et al. 2020 using 45nm: throughput of 19.35 * 14/15 = 18.06nsec/AES256 encryption, area of 13,974 GE") +print ("N * AES in seconds: 2 ^", math.log2(N * AEStime), "\n") + +YearsAES256 = [[None for i in range(21)] for j in range(7)] +MemBytesAES256 = [[None for i in range(21)] for j in range(7)] +EngUnitsAES256 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("AES256: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + if CostMPU[YearIndex] != None: + test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], BytesPerDollar_HDD, GatesPerDollar_MPU) + if test == 'passed': + YearsAES256[k][YearIndex] = LogYears + MemBytesAES256[k][YearIndex] = LogMemBytes + EngUnitsAES256[k][YearIndex] = LogEngUnits + else: + print ("ERROR: memory is not negligible") + print ("Log(years):", YearsAES256[k]); print ("Log(memory bytes):", MemBytesAES256[k]); print ("Log(engine units):", EngUnitsAES256[k], "\n") + +ProjYearsAES256 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesAES256 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsAES256 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("AES256 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + if CostMPU[YearIndex] != None: + test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if test == 'passed': + ProjYearsAES256[k][YearIndex] = LogYears + ProjMemBytesAES256[k][YearIndex] = LogMemBytes + ProjEngUnitsAES256[k][YearIndex] = LogEngUnits + else: + print ("ERROR: memory is not negligible") + print ("Log(years):", ProjYearsAES256[k]); print ("Log(memory bytes):", ProjMemBytesAES256[k]); print ("Log(engine units):", ProjEngUnitsAES256[k], "\n") + +############################################################################################ +#### SHA-3 security estimator + +def SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, Money, p, top_zero_bits, BytesPerDollar_HDD, GatesPerDollar_MPU): + N=2**version # Number of SHA-3 operations (search space) + SHA3perYear=SecondsPerYear/SHA3time # Number of SHA-3 operations per year per collision-search engine + + theta = 2**-top_zero_bits + mem_unit=version/8 + (version/8 - math.floor(top_zero_bits/8)) + 6 # Bytes per memory unit + w=(Money - p * SHA3gates / GatesPerDollar_MPU[YearIndex]) * BytesPerDollar_HDD[YearIndex] / mem_unit # Number of memory units I can buy + LogYears = 0; LogMemUnits = 0; LogEngUnits = 0; SHA3inSeconds = 0 + if w > 0: + LogMemUnits = math.log2(w) + LogEngUnits = math.log2(p) + LogYears = math.log2((math.sqrt(math.pi*N/2)/p + 2.5/theta) * SHA3time/SecondsPerYear) + SHA3inSeconds = math.log2((math.sqrt(math.pi*N/2)/p + 2.5/theta) * SHA3time) + + return LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds + +############################################################################################ +#### SHA3-256 + +version = 256 # SHA3-256 +SHA3gates = 10500 * 1.2 # Number of GEs occupied by Akin et al.'s implementation (SMH option), scaled to include initialization and absorb stages +SHA3time = (54.95e-9 * (45/90)**2)*1.5 # Latency of implementation (scaled to 45nm from 90nm, scaled to include initialization and absorb stages ) +node = 45 # 45nm + +print ("\nSHA3-" +repr(version)+ " on " +repr(node)+ "nm node") +print ("-------------------") +print ("Akin et al. implementation using 90nm: 54.95nsec/Keccak computation, 10.5KGE. Area and timing results are scaled to 45nm and SHA-3") + +MinYearsSHA3 = [[None for i in range(21)] for j in range(7)] +MemBytesSHA3 = [[None for i in range(21)] for j in range(7)] +EngUnitsSHA3 = [[None for i in range(21)] for j in range(7)] + +top_zero_bits = 74 #### NOTE: this can be tuned per option + +for k in range(0, 7): + print ("SHA3-256: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + engines = 2**(i+j/10) + LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds + MinYearsSHA3[k][YearIndex] = MinLogYears + MemBytesSHA3[k][YearIndex] = MemBytes + EngUnitsSHA3[k][YearIndex] = EngUnits + #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-256 in seconds: 2 ^", t) + print ("Log(years):", MinYearsSHA3[k]); print ("Log(memory bytes):", MemBytesSHA3[k]); print ("Log(engine units):", EngUnitsSHA3[k], "\n") + +ProjMinYearsSHA3 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSHA3 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSHA3 = [[None for i in range(9)] for j in range(7)] + +top_zero_bits = 77 #### NOTE: this can be tuned per option + +for k in range(0, 7): + print ("SHA3-256 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + engines = 2**(i+j/10) + LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds + ProjMinYearsSHA3[k][YearIndex] = MinLogYears + ProjMemBytesSHA3[k][YearIndex] = MemBytes + ProjEngUnitsSHA3[k][YearIndex] = EngUnits + #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-256 in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSHA3[k]); print ("Log(memory bytes):", ProjMemBytesSHA3[k]); print ("Log(engine units):", ProjEngUnitsSHA3[k], "\n") + +############################################################################################ +#### SHA3-384 + +version = 384 # SHA3-384 +SHA3gates = 10500 * 1.2 # Number of GEs occupied by Akin et al.'s implementation (SMH option), scaled to include initialization and absorb stages +SHA3time = (54.95e-9 * (45/90)**2)*1.5 # Latency of implementation (scaled to 45nm from 90nm, scaled to include initialization and absorb stages ) +node = 45 # 45nm + +print ("\nSHA3-" +repr(version)+ " on " +repr(node)+ "nm node") +print ("-------------------") +print ("Akin et al. implementation using 90nm: 54.95nsec/Keccak computation, 10.5KGE. Area and timing results are scaled to 45nm and SHA-3") + +MinYearsSHA3_384 = [[None for i in range(21)] for j in range(7)] +MemBytesSHA3_384 = [[None for i in range(21)] for j in range(7)] +EngUnitsSHA3_384 = [[None for i in range(21)] for j in range(7)] + +top_zero_bits = 74 #### NOTE: this can be tuned per option + +for k in range(0, 7): + print ("SHA3-384: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + engines = 2**(i+j/10) + LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds + MinYearsSHA3_384[k][YearIndex] = MinLogYears + MemBytesSHA3_384[k][YearIndex] = MemBytes + EngUnitsSHA3_384[k][YearIndex] = EngUnits + #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-384 in seconds: 2 ^", t) + print ("Log(years):", MinYearsSHA3_384[k]); print ("Log(memory bytes):", MemBytesSHA3_384[k]); print ("Log(engine units):", EngUnitsSHA3_384[k], "\n") + +ProjMinYearsSHA3_384 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSHA3_384 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSHA3_384 = [[None for i in range(9)] for j in range(7)] + +top_zero_bits = 77 #### NOTE: this can be tuned per option + +for k in range(0, 7): + print ("SHA3-384 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + engines = 2**(i+j/10) + LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds + ProjMinYearsSHA3_384[k][YearIndex] = MinLogYears + ProjMemBytesSHA3_384[k][YearIndex] = MemBytes + ProjEngUnitsSHA3_384[k][YearIndex] = EngUnits + #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-384 in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSHA3_384[k]); print ("Log(memory bytes):", ProjMemBytesSHA3_384[k]); print ("Log(engine units):", ProjEngUnitsSHA3_384[k], "\n") + +############################################################################################ +#### SIKE security estimator + +def SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, Money, memory, BytesPerDollar_HDD, GatesPerDollar_MPU): + SIKEtime45nm = SIKEtime # Latency of half-degree isogeny implementation by the proposed implementation on 45nm + t=SecondsPerYear/SIKEtime45nm # Number of half-degree isogeny operations per year per collision-search engine + + if version == 377: # Determine search space + if isogeny == 2: + e2 = 191 + N=2**((e2-1)/2) + else: + e3 = 117 + N=3**((e3-1)/2) + elif version == 434: + e2 = 216 + N=2**(e2/2-1) + elif version == 503: + e2 = 250 + N=2**(e2/2-1) + elif version == 546: + e2 = 273 + N=2**((e2-1)/2) + elif version == 610: + e2 = 305 + N=2**((e2-1)/2) + elif version == 697: + if isogeny == 2: + e2 = 356 + N=2**(e2/2-1) + else: + e3 = 215 + N=3**((e3-1)/2) + elif version == 751: + e2 = 372 + N=2**(e2/2-1) + + mem_unit=math.ceil((2*math.log2(N) + math.log2(20))/8); # Bytes per memory unit + w=memory/mem_unit # Memory units + p=(Money - (1/BytesPerDollar_HDD[YearIndex] * w * mem_unit))*GatesPerDollar_MPU[YearIndex]/SIKEgates # Number of engines I can buy + LogYears = 0; LogMemUnits = 0; LogEngUnits = 0; SIKEinSeconds = 0 + if p > 0: + LogMemUnits = math.log2(w) + LogEngUnits = math.log2(p) + LogYears = math.log2(2.5*math.sqrt(N**3/w)/(p * t)) + SIKEinSeconds = math.log2(2.5*math.sqrt(N**3/w) * SIKEtime45nm) + + return LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds + +############################################################################################ +#### SIKEp377, 2-isogeny attack + +version = 377 # SIKE377 +isogeny = 2 +SIKEgates = 341300 # Number of GEs occupied by the proposed implementation +SIKEtime = 2.347e-3 # Latency of half-degree isogeny implementation on 45nm +node = 45 # 45nm + +print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies") +print ("-------------------") +print ("Proposed implementation using 45nm, radix = 32: 2.347msec/half-degree isogeny, area of 341,300 GE") + +MinYearsSIKEp377 = [[None for i in range(21)] for j in range(7)] +MemBytesSIKEp377 = [[None for i in range(21)] for j in range(7)] +EngUnitsSIKEp377 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp377: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + MinYearsSIKEp377[k][YearIndex] = MinLogYears + MemBytesSIKEp377[k][YearIndex] = MemBytes + EngUnitsSIKEp377[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", MinYearsSIKEp377[k]); print ("Log(memory bytes):", MemBytesSIKEp377[k]); print ("Log(engine units):", EngUnitsSIKEp377[k], "\n") + +ProjMinYearsSIKEp377 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSIKEp377 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSIKEp377 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp377 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + ProjMinYearsSIKEp377[k][YearIndex] = MinLogYears + ProjMemBytesSIKEp377[k][YearIndex] = MemBytes + ProjEngUnitsSIKEp377[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSIKEp377[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp377[k]); print ("Log(engine units):", ProjEngUnitsSIKEp377[k], "\n") + +############################################################################################ +#### SIKEp434 + +version = 434 # SIKE434 +isogeny = 2 +SIKEgates = 372200 # Number of GEs occupied by the proposed implementation +SIKEtime = 3.253e-3 # Latency of half-degree isogeny implementation on 45nm +node = 45 # 45nm + +print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies") +print ("-------------------") +print ("Proposed implementation using 45nm, radix = 32: 3.253msec/half-degree isogeny, area of 372,200 GE") + +MinYearsSIKEp434 = [[None for i in range(21)] for j in range(7)] +MemBytesSIKEp434 = [[None for i in range(21)] for j in range(7)] +EngUnitsSIKEp434 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp434: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + MinYearsSIKEp434[k][YearIndex] = MinLogYears + MemBytesSIKEp434[k][YearIndex] = MemBytes + EngUnitsSIKEp434[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", MinYearsSIKEp434[k]); print ("Log(memory bytes):", MemBytesSIKEp434[k]); print ("Log(engine units):", EngUnitsSIKEp434[k], "\n") + +ProjMinYearsSIKEp434 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSIKEp434 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSIKEp434 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp434 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + ProjMinYearsSIKEp434[k][YearIndex] = MinLogYears + ProjMemBytesSIKEp434[k][YearIndex] = MemBytes + ProjEngUnitsSIKEp434[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSIKEp434[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp434[k]); print ("Log(engine units):", ProjEngUnitsSIKEp434[k], "\n") + +############################################################################################ +#### SIKEp503 + +version = 503 # SIKE503 +isogeny = 2 +SIKEgates = 409500 # Number of GEs occupied by the proposed implementation +SIKEtime = 4.814e-3 # Latency of half-degree isogeny implementation on 45nm +node = 45 # 45nm + +print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies") +print ("-------------------") +print ("Proposed implementation using 45nm, radix = 32: 4.814msec/half-degree isogeny, area of 409,500 GE") + +MinYearsSIKEp503 = [[None for i in range(21)] for j in range(7)] +MemBytesSIKEp503 = [[None for i in range(21)] for j in range(7)] +EngUnitsSIKEp503 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp503: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + MinYearsSIKEp503[k][YearIndex] = MinLogYears + MemBytesSIKEp503[k][YearIndex] = MemBytes + EngUnitsSIKEp503[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", MinYearsSIKEp503[k]); print ("Log(memory bytes):", MemBytesSIKEp503[k]); print ("Log(engine units):", EngUnitsSIKEp503[k], "\n") + +ProjMinYearsSIKEp503 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSIKEp503 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSIKEp503 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp503 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + ProjMinYearsSIKEp503[k][YearIndex] = MinLogYears + ProjMemBytesSIKEp503[k][YearIndex] = MemBytes + ProjEngUnitsSIKEp503[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSIKEp503[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp503[k]); print ("Log(engine units):", ProjEngUnitsSIKEp503[k], "\n") + +############################################################################################ +#### SIKEp546 + +version = 546 # SIKE546 +isogeny = 2 +SIKEgates = 441100 # Number of GEs occupied by the proposed implementation +SIKEtime = 7.095e-3 # Latency of half-degree isogeny implementation on 45nm +node = 45 # 45nm + +print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies") +print ("-------------------") +print ("Proposed implementation using 45nm, radix = 32: 7.095msec/half-degree isogeny, area of 441,100 GE") + +MinYearsSIKEp546 = [[None for i in range(21)] for j in range(7)] +MemBytesSIKEp546 = [[None for i in range(21)] for j in range(7)] +EngUnitsSIKEp546 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp546: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + MinYearsSIKEp546[k][YearIndex] = MinLogYears + MemBytesSIKEp546[k][YearIndex] = MemBytes + EngUnitsSIKEp546[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", MinYearsSIKEp546[k]); print ("Log(memory bytes):", MemBytesSIKEp546[k]); print ("Log(engine units):", EngUnitsSIKEp546[k], "\n") + +ProjMinYearsSIKEp546 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSIKEp546 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSIKEp546 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp546 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + ProjMinYearsSIKEp546[k][YearIndex] = MinLogYears + ProjMemBytesSIKEp546[k][YearIndex] = MemBytes + ProjEngUnitsSIKEp546[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSIKEp546[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp546[k]); print ("Log(engine units):", ProjEngUnitsSIKEp546[k], "\n") + +############################################################################################ +#### SIKEp610 + +version = 610 # SIKE610 +isogeny = 2 +SIKEgates = 748000 # Number of GEs occupied by the proposed implementation +SIKEtime = 5.803e-3 # Latency of half-degree isogeny implementation on 45nm +node = 45 # 45nm + +print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies") +print ("-------------------") +print ("Proposed implementation using 45nm, radix = 64: 5.803msec/half-degree isogeny, area of 748,000 GE") + +MinYearsSIKEp610 = [[None for i in range(21)] for j in range(7)] +MemBytesSIKEp610 = [[None for i in range(21)] for j in range(7)] +EngUnitsSIKEp610 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp610: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + MinYearsSIKEp610[k][YearIndex] = MinLogYears + MemBytesSIKEp610[k][YearIndex] = MemBytes + EngUnitsSIKEp610[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", MinYearsSIKEp610[k]); print ("Log(memory bytes):", MemBytesSIKEp610[k]); print ("Log(engine units):", EngUnitsSIKEp610[k], "\n") + +ProjMinYearsSIKEp610 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSIKEp610 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSIKEp610 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp610 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + ProjMinYearsSIKEp610[k][YearIndex] = MinLogYears + ProjMemBytesSIKEp610[k][YearIndex] = MemBytes + ProjEngUnitsSIKEp610[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSIKEp610[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp610[k]); print ("Log(engine units):", ProjEngUnitsSIKEp610[k], "\n") + +############################################################################################ +#### SIKEp697 + +version = 697 # SIKE697 +isogeny = 2 +SIKEgates = 798900 # Number of GEs occupied by the proposed implementation +SIKEtime = 8.595e-3 # Latency of half-degree isogeny implementation on 45nm +node = 45 # 45nm + +print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies") +print ("-------------------") +print ("Proposed implementation using 45nm, radix = 64: 8.595msec/half-degree isogeny, area of 798,900 GE") + +MinYearsSIKEp697 = [[None for i in range(21)] for j in range(7)] +MemBytesSIKEp697 = [[None for i in range(21)] for j in range(7)] +EngUnitsSIKEp697 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp697: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + MinYearsSIKEp697[k][YearIndex] = MinLogYears + MemBytesSIKEp697[k][YearIndex] = MemBytes + EngUnitsSIKEp697[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", MinYearsSIKEp697[k]); print ("Log(memory bytes):", MemBytesSIKEp697[k]); print ("Log(engine units):", EngUnitsSIKEp697[k], "\n") + +ProjMinYearsSIKEp697 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSIKEp697 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSIKEp697 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp697 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + ProjMinYearsSIKEp697[k][YearIndex] = MinLogYears + ProjMemBytesSIKEp697[k][YearIndex] = MemBytes + ProjEngUnitsSIKEp697[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSIKEp697[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp697[k]); print ("Log(engine units):", ProjEngUnitsSIKEp697[k], "\n") + +############################################################################################ +#### SIKEp751 + +version = 751 # SIKE751 +isogeny = 2 +SIKEgates = 822300 # Number of GEs occupied by the proposed implementation +SIKEtime = 9.703e-3 # Latency of half-degree isogeny implementation on 45nm +node = 45 # 45nm + +print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies") +print ("-------------------") +print ("Proposed implementation using 45nm, radix = 64: 9.703msec/half-degree isogeny, area of 822,300 GE") + +MinYearsSIKEp751 = [[None for i in range(21)] for j in range(7)] +MemBytesSIKEp751 = [[None for i in range(21)] for j in range(7)] +EngUnitsSIKEp751 = [[None for i in range(21)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp751: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 21): + lock = 0 + if CostMPU[YearIndex] != None: + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + MinYearsSIKEp751[k][YearIndex] = MinLogYears + MemBytesSIKEp751[k][YearIndex] = MemBytes + EngUnitsSIKEp751[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", MinYearsSIKEp751[k]); print ("Log(memory bytes):", MemBytesSIKEp751[k]); print ("Log(engine units):", EngUnitsSIKEp751[k], "\n") + +ProjMinYearsSIKEp751 = [[None for i in range(9)] for j in range(7)] +ProjMemBytesSIKEp751 = [[None for i in range(9)] for j in range(7)] +ProjEngUnitsSIKEp751 = [[None for i in range(9)] for j in range(7)] + +for k in range(0, 7): + print ("SIKEp751 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6)) + + for YearIndex in range(0, 9): + lock = 0 + for i in range(10, 100): + for j in range(0, 10): + memory = 2**(i+j/10) + LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU) + if LogYears != 0: + if lock == 0: MinLogYears = LogYears; lock = 1 + if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds + ProjMinYearsSIKEp751[k][YearIndex] = MinLogYears + ProjMemBytesSIKEp751[k][YearIndex] = MemBytes + ProjEngUnitsSIKEp751[k][YearIndex] = EngUnits + #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t) + print ("Log(years):", ProjMinYearsSIKEp751[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp751[k]); print ("Log(engine units):", ProjEngUnitsSIKEp751[k], "\n") + +############################################################################################ +#### Graph of security estimates (in years) using historical prices of memory (bytes) and +#### computing resources (gates), years 2000-2020 + +def grapher_historical(k): + x = np.linspace(0, 20, 21) + y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12 = YearsAES128[k], YearsAES192[k], YearsAES256[k], MinYearsSHA3[k], MinYearsSHA3_384[k], MinYearsSIKEp377[k], MinYearsSIKEp434[k], MinYearsSIKEp503[k], MinYearsSIKEp546[k], MinYearsSIKEp610[k], MinYearsSIKEp697[k], MinYearsSIKEp751[k] + + # Setting the figure size and resolution + fig, ax = plt.subplots(figsize=(10, 3), dpi=300) + + # Changing spine style + plt.axes().xaxis.set_minor_locator(MultipleLocator(1)) + plt.axes().yaxis.set_minor_locator(MultipleLocator(5)) + plt.grid(color='gray', ls = '-.', lw = 0.25) + + # Setting the color, linewidth, linestyle and legend + plt.plot(x, y1, color="crimson", linewidth=1.0, linestyle="-", label="AES128") + plt.plot(x, y2, color="crimson", linewidth=1.0, linestyle="--", label="AES192") + plt.plot(x, y3, color="crimson", linewidth=1.0, linestyle="-.", label="AES256") + plt.plot(x, y4, color="tab:brown", linewidth=1.0, linestyle="--", label="SHA3-256") + plt.plot(x, y5, color="tab:brown", linewidth=1.0, linestyle="-.", label="SHA3-384") + plt.plot(x, y6, color="royalblue", linewidth=1.0, linestyle="-", label="SIKEp377") + plt.plot(x, y7, color="royalblue", linewidth=1.0, linestyle="--", label="SIKEp434") + plt.plot(x, y8, color="royalblue", linewidth=1.0, linestyle="-.", label="SIKEp503") + plt.plot(x, y9, color="royalblue", linewidth=1.0, linestyle=(0, (5, 1)), label="SIKEp546") + plt.plot(x, y10, color="royalblue", linewidth=1.0, linestyle=(0, (5, 10)), label="SIKEp610") + plt.plot(x, y11, color="royalblue", linewidth=1.0, linestyle=(0, (5, 5)), label="SIKEp697") + plt.plot(x, y12, color="royalblue", linewidth=1.0, linestyle=(0, (1, 5)), label="SIKEp751") + leg = plt.legend(loc='upper right', prop={'size': 6}, frameon=True) + plt.draw() # Draw the figure so you can find the positon of the legend + + # Get the bounding box of the original legend + bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes) + + # Change to location of the legend + xOffset = -0.01 + bb.x0 += xOffset + bb.x1 += xOffset + leg.set_bbox_to_anchor(bb, transform = ax.transAxes) + + # Use Latex to set tick labels + plt.xticks([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [r'2000', r'$2002$', r'$2004$', r'$2006$', r'$2008$', r'$2010$', r'$2012$', r'$2014$', r'$2016$', r'$2018$', r'$2020$']) + plt.xticks(fontsize=8, rotation=0) + plt.yticks(fontsize=8, rotation=0) + plt.xlabel('Year') # add x-label + plt.ylabel('Log(Years)') # add y-label + if titlefigure == 'on': plt.title('Security estimates in years, budget = US$' +repr(int(MoneyOptions[k]/1e6))+ ' million') # add title + + # Setting the boundaries of the figure + plt.xlim(x.min()*1.0, x.max()*1.0) + plt.ylim(0, y3[19]*1.5) + + plt.gcf().subplots_adjust(bottom=0.12) + plt.show() # show figure + fig.savefig("historical_estimates_" +repr(int(MoneyOptions[k]/1e6))+ "million.png", dpi = 300) # save figure + + return + +############################################################################################ +#### Graphing for all the budget options + +for i in range(0, 7): + grapher_historical(i) + +############################################################################################ +#### Graph of security estimates (in years) using projection of prices of memory (bytes) and +#### computing resources (gates), years 2000-2040 +#### Uses historical prices for 2000-2020, projections for 2025-2040 + +def grapher_projection(k): + x = np.linspace(0, 8, 9) + y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12 = ProjYearsAES128[k], ProjYearsAES192[k], ProjYearsAES256[k], ProjMinYearsSHA3[k], ProjMinYearsSHA3_384[k], ProjMinYearsSIKEp377[k], ProjMinYearsSIKEp434[k], ProjMinYearsSIKEp503[k], ProjMinYearsSIKEp546[k], ProjMinYearsSIKEp610[k], ProjMinYearsSIKEp697[k], ProjMinYearsSIKEp751[k] + + # Setting the figure size and resolution + fig, ax = plt.subplots(figsize=(10, 3), dpi=300) + + # Changing spine style + plt.axes().xaxis.set_minor_locator(MultipleLocator(1)) + plt.axes().yaxis.set_minor_locator(MultipleLocator(5)) + plt.grid(color='gray', ls = '-.', lw = 0.25) + + # Setting the color, linewidth, linestyle and legend + plt.plot(x, y1, color="crimson", linewidth=1.3, linestyle="-", label="AES128") + plt.plot(x, y2, color="crimson", linewidth=1.3, linestyle="--", label="AES192") + plt.plot(x, y3, color="crimson", linewidth=1.3, linestyle="-.", label="AES256") + plt.plot(x, y4, color="tab:brown", linewidth=1.3, linestyle="--", label="SHA3-256") + plt.plot(x, y5, color="tab:brown", linewidth=1.3, linestyle="-.", label="SHA3-384") + plt.plot(x, y6, color="royalblue", linewidth=1.3, linestyle="-", label="SIKEp377") + plt.plot(x, y7, color="royalblue", linewidth=1.3, linestyle=(0, (5, 1)), label="SIKEp434") + plt.plot(x, y8, color="royalblue", linewidth=1.3, linestyle="-.", label="SIKEp503") + plt.plot(x, y9, color="royalblue", linewidth=1.3, linestyle="--", label="SIKEp546") + plt.plot(x, y10, color="royalblue", linewidth=1.3, linestyle=(0, (5, 5)), label="SIKEp610") + plt.plot(x, y11, color="royalblue", linewidth=1.3, linestyle=(0, (5, 10)), label="SIKEp697") + plt.plot(x, y12, color="royalblue", linewidth=1.3, linestyle=(0, (1, 5)), label="SIKEp751") + + if titlefigure == 'on': + leg = plt.legend(loc='upper right', prop={'size': 7}, frameon=True) + plt.draw() # Draw the figure so you can find the positon of the legend + + # Get the bounding box of the original legend + bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes) + + # Change to location of the legend + xOffset = -0.01 + bb.x0 += xOffset + bb.x1 += xOffset + leg.set_bbox_to_anchor(bb, transform = ax.transAxes) + + # Use Latex to set tick labels + plt.xticks([0, 1, 2, 3, 4, 5, 6, 7, 8], [r'2000', r'$2005$', r'$2010$', r'$2015$', r'$2020$', r'$2025$', r'$2030$', r'$2035$', r'$2040$']) + plt.xticks(fontsize=8, rotation=0) + plt.yticks(fontsize=8, rotation=0) + plt.xlabel('Year') # add x-label + plt.ylabel('Log(Years)') # add y-label + if titlefigure == 'on': plt.title('Security estimates in years (projection), budget = US$' +repr(int(MoneyOptions[k]/1e6))+ ' million') # add title + + plt.text(0.8, y1[1]-12, 'AES128', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(1.7, y4[2]-12, 'SHA3-256', color='tab:brown', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y6[1]+2, 'SIKEp377', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y7[1]+2, 'SIKEp434', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y8[1]-12, 'SIKEp503', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + if k < 3: + plt.text(0.8, y2[1]+2, 'AES192', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(1.7, y5[2]+1, 'SHA3-384', color='tab:brown', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + else: + plt.text(0.8, y2[1]-10, 'AES192', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(1.7, y5[2]-11, 'SHA3-384', color='tab:brown', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y9[1]+2, 'SIKEp546', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y10[1]+2, 'SIKEp610', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y3[1]-12, 'AES256', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y11[1]+2, 'SIKEp697', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + plt.text(0.8, y12[1]+2, 'SIKEp751', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left') + + # Setting the boundaries of the figure + plt.xlim(x.min()*1.0, x.max()*1.0) + plt.ylim(0, y3[8]*1.5) + + plt.gcf().subplots_adjust(bottom=0.12) + plt.show() # show figure + fig.savefig("projection_estimates_" +repr(int(MoneyOptions[k]/1e6))+ "million.png", dpi = 300) # save figure + + return + +############################################################################################ +#### Graphing for all the budget options + +for i in range(0, 7): + grapher_projection(i) + +############################################################################################ +#### Historical graph of number of components (bytes/gates) that can be bought per dollar + +x = np.linspace(0, 20, 21) +x4 = [i for i in range(0,21)] +LogBytesPerDollar_HDD = [None for i in range(21)] +LogBytesPerDollar_DRAM = [None for i in range(21)] +LogBytesPerDollar_SSD = [None for i in range(21)] +LogBytesPerDollar_HDD = [None for i in range(21)] +LogGatesPerDollar_MPU = [None for i in range(21)] +LogGatesPerDollar_Linley = [None for i in range(21)] +LogGatesPerDollar_ITRS = [None for i in range(21)] +LogBytesPerGate = [None for i in range(21)] + +for i in range(0,21): + if BytesPerDollar_HDD[i] != None: + LogBytesPerDollar_HDD[i] = math.log2(BytesPerDollar_HDD[i]) + if BytesPerDollar_DRAM[i] != None: + LogBytesPerDollar_DRAM[i] = math.log2(BytesPerDollar_DRAM[i]) + if BytesPerDollar_SSD[i] != None: + LogBytesPerDollar_SSD[i] = math.log2(BytesPerDollar_SSD[i]) + if GatesPerDollar_MPU[i] != None: + LogGatesPerDollar_MPU[i] = math.log2(GatesPerDollar_MPU[i]) + if GatesPerDollar_Linley[i] != None: + LogGatesPerDollar_Linley[i] = math.log2(GatesPerDollar_Linley[i]) + if GatesPerDollar_ITRS[i] != None: + LogGatesPerDollar_ITRS[i] = math.log2(GatesPerDollar_ITRS[i]) + if BytesPerGate[i] != None: + LogBytesPerGate[i] = math.log2(BytesPerGate[i]) + +y1, y2, y3, y4, y5, y6, y7 = LogBytesPerDollar_HDD, LogGatesPerDollar_MPU, LogGatesPerDollar_Linley, LogGatesPerDollar_ITRS, LogBytesPerGate, LogBytesPerDollar_DRAM, LogBytesPerDollar_SSD + +print (LogBytesPerDollar_HDD) +print (LogBytesPerDollar_DRAM) +print (LogBytesPerDollar_SSD) +print (LogGatesPerDollar_MPU) +print (LogGatesPerDollar_Linley) +print (LogGatesPerDollar_ITRS) +print (LogBytesPerGate) + +# Setting the figure size and resolution +fig, ax = plt.subplots(figsize=(10, 3), dpi=300) + +# Changing spine style +plt.axes().xaxis.set_minor_locator(MultipleLocator(1)) +plt.axes().yaxis.set_minor_locator(MultipleLocator(5)) +plt.grid(color='gray', ls = '-.', lw = 0.25) + +# Setting the color, linewidth, linestyle and legend +plt.plot(x, y1, color="crimson", linewidth=1.0, linestyle="-", label="Bytes/dollar (HDD)") +plt.plot(x, y2, color="royalblue", linewidth=1.0, linestyle="-", label="Gates/dollar (MPU)") +plt.scatter(x, y3, color="royalblue", s = 7.0, marker='^', label="Gates/dollar (Linley Group)") +plt.scatter(x, y4, color="aquamarine", s = 5.0, marker='x', label="Gates/dollar (ITRS 2001-2007, forecast)") +plt.plot(x, y5, color="olivedrab", linewidth=1.5, linestyle="-", label="Bytes (HDD)/gate (MPU) ratio") +plt.plot(x, y6, color="crimson", linewidth=1.0, linestyle="--", label="Bytes/dollar (DRAM)") +plt.plot(x, y7, color="crimson", linewidth=1.0, linestyle="-.", label="Bytes/dollar (SSD)") +plt.legend(loc='upper left', prop={'size': 6}, frameon=True) + +# Use Latex to set tick labels +plt.xticks([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [r'$2000$', r'$2002$', r'$2004$', r'$2006$', r'$2008$', r'$2010$', r'$2012$', r'$2014$', r'$2016$', r'$2018$', r'$2020$']) +plt.xticks(fontsize=8, rotation=0) +plt.yticks(fontsize=8, rotation=0) +plt.xlabel('Year') # add x-label +plt.ylabel('Log(components/dollar)') # add y-label +if titlefigure == 'on': plt.title('Historical prices of memory and gates (MPUs), 2000-2020') # add title + +# Setting the boundaries of the figure +plt.xlim(x.min()*1.0, x.max()*1.0) +plt.ylim(5, y1[20]*1.5) + +plt.gcf().subplots_adjust(bottom=0.12) +plt.show() # show figure +fig.savefig("historical_mpu_hdd.png", dpi = 300) # save figure \ No newline at end of file diff --git a/README.md b/README.md index a55e297..7fc1ef9 100644 --- a/README.md +++ b/README.md @@ -26,19 +26,24 @@ relative to AES and SHA-3. - - - ## File Organization -- `platform/AC701/` contains hardware development files targeting the Artix-7 AC701 XC7A200TFBG676 FPGA. +- `platforms/AC701/` contains hardware development files targeting the Artix-7 AC701 XC7A200TFBG676 FPGA. - `platforms/Murax/` contains the scala source code for generating the Murax SoC. - `platforms/rtl` contains the APB bridge modules developed for the communication between the software and hardware. -- `src/hardware` contains hardware accelerators source code. +- `Python_script` contains the Python3 script for the security estimation of SIKE relative to AES and SHA-3. -- `src/murax` contains Murax library files. +- `SIKE_sw` contains the software implementation of SIKE, including the new parameter sets SIKEp377, SIKEp546 and SIKEp697. -- `src/ref_c` contains the software implementation of vOW on SIKE, which is based on [3] and the [vOW4SIKE library](https://github.com/microsoft/vOW4SIKE). +- `SIKE_vOW_hw-sw/hardware` contains the hardware accelerators source code. -- `src/ref_c_riscv` contains the software libraries for calling the hardware accelerators and RISC-V testing files. +- `SIKE_vOW_hw-sw/murax` contains the Murax library files. + +- `SIKE_vOW_hw-sw/ref_c` contains the software implementation of vOW on SIKE, which is based on [3] and the [vOW4SIKE library](https://github.com/microsoft/vOW4SIKE). + +- `SIKE_vOW_hw-sw/ref_c_riscv` contains the hardware/software co-design of vOW on SIKE. + It contains the software libraries for calling the hardware accelerators and RISC-V testing files. - `LICENSE` MIT license covering all the implementations, except for the files that are labeled as created by third parties. diff --git a/SIKE_sw/Makefile b/SIKE_sw/Makefile new file mode 100644 index 0000000..33a75fe --- /dev/null +++ b/SIKE_sw/Makefile @@ -0,0 +1,263 @@ +#### Makefile for compilation on Linux #### + +OPT=-O3 # Optimization option by default + +CC=clang + +ifeq "$(CC)" "gcc" + COMPILER=gcc +else ifeq "$(CC)" "clang" + COMPILER=clang +endif + +ARCHITECTURE=_AMD64_ +USE_OPT_LEVEL=_FAST_ +ifeq "$(ARCH)" "x64" + ARCHITECTURE=_AMD64_ + USE_OPT_LEVEL=_FAST_ +else ifeq "$(ARCH)" "x86" + ARCHITECTURE=_X86_ + USE_OPT_LEVEL=_GENERIC_ +endif + +ifeq "$(OPT_LEVEL)" "GENERIC" + USE_OPT_LEVEL=_GENERIC_ +endif + +ifeq "$(ARCHITECTURE)" "_AMD64_" + ifeq "$(USE_OPT_LEVEL)" "_FAST_" + MULX=-D _MULX_ + ifeq "$(USE_MULX)" "FALSE" + MULX= + else + ADX=-D _ADX_ + ifeq "$(USE_ADX)" "FALSE" + ADX= + endif + endif + endif +endif + +ifeq "$(SET)" "EXTENDED" + ADDITIONAL_SETTINGS=-fwrapv -fomit-frame-pointer -march=native +endif + +AR=ar rcs +RANLIB=ranlib + +CFLAGS=$(OPT) $(ADDITIONAL_SETTINGS) -D $(ARCHITECTURE) -D __LINUX__ -D $(USE_OPT_LEVEL) $(MULX) $(ADX) -fPIE +LDFLAGS=-lm + +ifeq "$(ARCHITECTURE)" "_AMD64_" +ifeq "$(USE_OPT_LEVEL)" "_FAST_" +CFLAGS += -mavx2 -maes -msse2 +endif +endif + +ifeq "$(USE_OPT_LEVEL)" "_GENERIC_" + EXTRA_OBJECTS_377=objs377/fp_generic.o + EXTRA_OBJECTS_434=objs434/fp_generic.o + EXTRA_OBJECTS_503=objs503/fp_generic.o + EXTRA_OBJECTS_546=objs546/fp_generic.o + EXTRA_OBJECTS_610=objs610/fp_generic.o + EXTRA_OBJECTS_697=objs697/fp_generic.o + EXTRA_OBJECTS_751=objs751/fp_generic.o +else ifeq "$(USE_OPT_LEVEL)" "_FAST_" +ifeq "$(ARCHITECTURE)" "_AMD64_" + EXTRA_OBJECTS_377=objs377/fp_x64.o objs377/fp_x64_asm.o + EXTRA_OBJECTS_434=objs434/fp_x64.o objs434/fp_x64_asm.o + EXTRA_OBJECTS_503=objs503/fp_x64.o objs503/fp_x64_asm.o + EXTRA_OBJECTS_546=objs546/fp_x64.o objs546/fp_x64_asm.o + EXTRA_OBJECTS_610=objs610/fp_x64.o objs610/fp_x64_asm.o + EXTRA_OBJECTS_697=objs697/fp_x64.o objs697/fp_x64_asm.o + EXTRA_OBJECTS_751=objs751/fp_x64.o objs751/fp_x64_asm.o + CFLAGS+= -fPIC +endif +endif +OBJECTS_377=objs377/P377.o $(EXTRA_OBJECTS_377) objs/random.o objs/fips202.o +OBJECTS_434=objs434/P434.o $(EXTRA_OBJECTS_434) objs/random.o objs/fips202.o +OBJECTS_503=objs503/P503.o $(EXTRA_OBJECTS_503) objs/random.o objs/fips202.o +OBJECTS_546=objs546/P546.o $(EXTRA_OBJECTS_546) objs/random.o objs/fips202.o +OBJECTS_610=objs610/P610.o $(EXTRA_OBJECTS_610) objs/random.o objs/fips202.o +OBJECTS_697=objs697/P697.o $(EXTRA_OBJECTS_697) objs/random.o objs/fips202.o +OBJECTS_751=objs751/P751.o $(EXTRA_OBJECTS_751) objs/random.o objs/fips202.o + +all: lib377 lib434 lib503 lib546 lib610 lib697 lib751 tests tests_sike + +objs377/%.o: src/P377/%.c + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +objs434/%.o: src/P434/%.c + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +objs503/%.o: src/P503/%.c + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +objs546/%.o: src/P546/%.c + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +objs610/%.o: src/P610/%.c + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +objs697/%.o: src/P697/%.c + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +objs751/%.o: src/P751/%.c + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +ifeq "$(USE_OPT_LEVEL)" "_GENERIC_" +objs377/fp_generic.o: src/P377/generic/fp_generic.c + $(CC) -c $(CFLAGS) src/P377/generic/fp_generic.c -o objs377/fp_generic.o + +objs434/fp_generic.o: src/P434/generic/fp_generic.c + $(CC) -c $(CFLAGS) src/P434/generic/fp_generic.c -o objs434/fp_generic.o + +objs503/fp_generic.o: src/P503/generic/fp_generic.c + $(CC) -c $(CFLAGS) src/P503/generic/fp_generic.c -o objs503/fp_generic.o + +objs546/fp_generic.o: src/P546/generic/fp_generic.c + $(CC) -c $(CFLAGS) src/P546/generic/fp_generic.c -o objs546/fp_generic.o + +objs610/fp_generic.o: src/P610/generic/fp_generic.c + $(CC) -c $(CFLAGS) src/P610/generic/fp_generic.c -o objs610/fp_generic.o + +objs697/fp_generic.o: src/P697/generic/fp_generic.c + $(CC) -c $(CFLAGS) src/P697/generic/fp_generic.c -o objs697/fp_generic.o + +objs751/fp_generic.o: src/P751/generic/fp_generic.c + $(CC) -c $(CFLAGS) src/P751/generic/fp_generic.c -o objs751/fp_generic.o +else ifeq "$(USE_OPT_LEVEL)" "_FAST_" +ifeq "$(ARCHITECTURE)" "_AMD64_" +objs377/fp_x64.o: src/P377/AMD64/fp_x64.c + $(CC) -c $(CFLAGS) src/P377/AMD64/fp_x64.c -o objs377/fp_x64.o + +objs377/fp_x64_asm.o: src/P377/AMD64/fp_x64_asm.S + $(CC) -c $(CFLAGS) src/P377/AMD64/fp_x64_asm.S -o objs377/fp_x64_asm.o + +objs434/fp_x64.o: src/P434/AMD64/fp_x64.c + $(CC) -c $(CFLAGS) src/P434/AMD64/fp_x64.c -o objs434/fp_x64.o + +objs434/fp_x64_asm.o: src/P434/AMD64/fp_x64_asm.S + $(CC) -c $(CFLAGS) src/P434/AMD64/fp_x64_asm.S -o objs434/fp_x64_asm.o + +objs503/fp_x64.o: src/P503/AMD64/fp_x64.c + $(CC) -c $(CFLAGS) src/P503/AMD64/fp_x64.c -o objs503/fp_x64.o + +objs503/fp_x64_asm.o: src/P503/AMD64/fp_x64_asm.S + $(CC) -c $(CFLAGS) src/P503/AMD64/fp_x64_asm.S -o objs503/fp_x64_asm.o + +objs546/fp_x64.o: src/P546/AMD64/fp_x64.c + $(CC) -c $(CFLAGS) src/P546/AMD64/fp_x64.c -o objs546/fp_x64.o + +objs546/fp_x64_asm.o: src/P546/AMD64/fp_x64_asm.S + $(CC) -c $(CFLAGS) src/P546/AMD64/fp_x64_asm.S -o objs546/fp_x64_asm.o + +objs610/fp_x64.o: src/P610/AMD64/fp_x64.c + $(CC) -c $(CFLAGS) src/P610/AMD64/fp_x64.c -o objs610/fp_x64.o + +objs610/fp_x64_asm.o: src/P610/AMD64/fp_x64_asm.S + $(CC) -c $(CFLAGS) src/P610/AMD64/fp_x64_asm.S -o objs610/fp_x64_asm.o + +objs697/fp_x64.o: src/P697/AMD64/fp_x64.c + $(CC) -c $(CFLAGS) src/P697/AMD64/fp_x64.c -o objs697/fp_x64.o + +objs697/fp_x64_asm.o: src/P697/AMD64/fp_x64_asm.S + $(CC) -c $(CFLAGS) src/P697/AMD64/fp_x64_asm.S -o objs697/fp_x64_asm.o + +objs751/fp_x64.o: src/P751/AMD64/fp_x64.c + $(CC) -c $(CFLAGS) src/P751/AMD64/fp_x64.c -o objs751/fp_x64.o + +objs751/fp_x64_asm.o: src/P751/AMD64/fp_x64_asm.S + $(CC) -c $(CFLAGS) src/P751/AMD64/fp_x64_asm.S -o objs751/fp_x64_asm.o +endif +endif + +INDEPENDENT_OBJS=objs/random.o objs/fips202.o +objs/random.o: src/random/random.c +objs/fips202.o: src/sha3/fips202.c + +$(INDEPENDENT_OBJS): + @mkdir -p $(@D) + $(CC) -c $(CFLAGS) $< -o $@ + +lib377: $(OBJECTS_377) + rm -rf lib377 sike377 + mkdir lib377 sike377 + $(AR) lib377/libsidh.a $^ + $(RANLIB) lib377/libsidh.a + +lib434: $(OBJECTS_434) + rm -rf lib434 sike434 + mkdir lib434 sike434 + $(AR) lib434/libsidh.a $^ + $(RANLIB) lib434/libsidh.a + +lib503: $(OBJECTS_503) + rm -rf lib503 sike503 + mkdir lib503 sike503 + $(AR) lib503/libsidh.a $^ + $(RANLIB) lib503/libsidh.a + +lib546: $(OBJECTS_546) + rm -rf lib546 sike546 + mkdir lib546 sike546 + $(AR) lib546/libsidh.a $^ + $(RANLIB) lib546/libsidh.a + +lib610: $(OBJECTS_610) + rm -rf lib610 sike610 + mkdir lib610 sike610 + $(AR) lib610/libsidh.a $^ + $(RANLIB) lib610/libsidh.a + +lib697: $(OBJECTS_697) + rm -rf lib697 sike697 + mkdir lib697 sike697 + $(AR) lib697/libsidh.a $^ + $(RANLIB) lib697/libsidh.a + +lib751: $(OBJECTS_751) + rm -rf lib751 sike751 + mkdir lib751 sike751 + $(AR) lib751/libsidh.a $^ + $(RANLIB) lib751/libsidh.a + +tests: lib377 lib434 lib503 lib546 lib610 lib697 lib751 + $(CC) $(CFLAGS) -L./lib377 tests/arith_tests-p377.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p377 $(ARM_SETTING) + $(CC) $(CFLAGS) -L./lib434 tests/arith_tests-p434.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p434 $(ARM_SETTING) + $(CC) $(CFLAGS) -L./lib503 tests/arith_tests-p503.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p503 $(ARM_SETTING) + $(CC) $(CFLAGS) -L./lib546 tests/arith_tests-p546.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p546 $(ARM_SETTING) + $(CC) $(CFLAGS) -L./lib610 tests/arith_tests-p610.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p610 $(ARM_SETTING) + $(CC) $(CFLAGS) -L./lib697 tests/arith_tests-p697.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p697 $(ARM_SETTING) + $(CC) $(CFLAGS) -L./lib751 tests/arith_tests-p751.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p751 $(ARM_SETTING) + +tests_sike377: lib377 + $(CC) $(CFLAGS) -L./lib377 tests/test_SIKEp377.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike377/test_SIKE $(ARM_SETTING) +tests_sike434: lib434 + $(CC) $(CFLAGS) -L./lib434 tests/test_SIKEp434.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike434/test_SIKE $(ARM_SETTING) +tests_sike503: lib503 + $(CC) $(CFLAGS) -L./lib503 tests/test_SIKEp503.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike503/test_SIKE $(ARM_SETTING) +tests_sike546: lib546 + $(CC) $(CFLAGS) -L./lib546 tests/test_SIKEp546.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike546/test_SIKE $(ARM_SETTING) +tests_sike610: lib610 + $(CC) $(CFLAGS) -L./lib610 tests/test_SIKEp610.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike610/test_SIKE $(ARM_SETTING) +tests_sike697: lib697 + $(CC) $(CFLAGS) -L./lib697 tests/test_SIKEp697.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike697/test_SIKE $(ARM_SETTING) +tests_sike751: lib751 + $(CC) $(CFLAGS) -L./lib751 tests/test_SIKEp751.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike751/test_SIKE $(ARM_SETTING) + +tests_sike: tests_sike377 tests_sike434 tests_sike503 tests_sike546 tests_sike610 tests_sike697 tests_sike751 + +check: tests tests_sike + +.PHONY: clean + +clean: + rm -rf *.req objs* lib* sike* arith_tests-* diff --git a/SIKE_sw/README.md b/SIKE_sw/README.md new file mode 100644 index 0000000..fd454ae --- /dev/null +++ b/SIKE_sw/README.md @@ -0,0 +1,57 @@ +# Software implementation of SIKE +# Paper "The Cost to Break SIKE: A Comparative Hardware-Based Analysis with AES and SHA-3" + +This library contains efficient C implementations of the CCA-secure key encapsulation mechanism "SIKE". +This scheme is conjectured to be secure against quantum computer attacks. +The software is based on the SIDH library version 3.3 (https://github.com/microsoft/PQCrypto-SIDH). + +This library includes the following KEM schemes: + +* SIKEp377: matching the post-quantum security of AES128 (NEW, level 1). +* SIKEp434: matching the post-quantum security of AES128 (level 1). +* SIKEp503: matching the post-quantum security of SHA3-256 (level 2). +* SIKEp546: matching the post-quantum security of AES192 (NEW, level 3). +* SIKEp610: matching the post-quantum security of AES192 (level 3). +* SIKEp697: matching the post-quantum security of AES256 (NEW, level 5). +* SIKEp751: matching the post-quantum security of AES256 (level 5). + +## Contents + +In the remainder, pXXX is one of {p377,p434,p503,p546,p610,p697,p751}. + +* [`src folder`](src/): C and header files. Public APIs can be found in src/PXXX/PXXX_api.h. +* Optimized x64 implementation for pXXX (src/PXXX/AMD64/): optimized implementation of the field arithmetic over the prime pXXX for x64 platforms. +* Generic implementation for pXXX (src/PXXX/generic/): implementation of the field arithmetic over the prime pXXX in portable C. +* [`random folder`](src/random/): randombytes function using the system random number generator. +* [`sha3 folder`](src/sha3/): SHAKE256 implementation. +* [`Test folder`](tests/): test files. +* [`Visual Studio folder`](Visual%20Studio/): Visual Studio 2015 files for compilation in Windows. +* [`Makefile`](Makefile): Makefile for compilation using the GNU GCC or clang compilers on Linux. +* [`Readme`](README.md): this readme file. + +## Instructions for Linux + +By executing: + +```sh +$ make +``` + +the library is compiled by default for x64 using clang, optimization level `FAST` that uses assembly-optimized arithmetic +(this option requires CPU support for the instructions MULX and ADX). + +Other options for x64: + +```sh +$ make CC=[gcc/clang] OPT_LEVEL=[FAST/GENERIC] +``` + +The use of `OPT_LEVEL=GENERIC` disables the use of assembly-optimized arithmetic. + +To run the different tests and benchmarking results, execute: + +```sh +$ ./arith_tests-pXXX +$ ./sikeXXX/test_SIKE +``` + diff --git a/SIKE_sw/Visual Studio/SIDH/SIDH.sln b/SIKE_sw/Visual Studio/SIDH/SIDH.sln new file mode 100644 index 0000000..f82ec43 --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDH.sln @@ -0,0 +1,293 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P751", "SIDHp751.vcxproj", "{8283DD76-E88A-4B63-ABDE-33F014178413}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P503", "SIDHp503.vcxproj", "{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp503", "..\kem_tests\test-SIKEp503.vcxproj", "{EF9FE361-D94D-4CE0-8873-739A925326A3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp751", "..\kem_tests\test-SIKEp751.vcxproj", "{0D570915-7551-4D5F-A2F0-A4A6200185F9}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P434", "SIDHp434.vcxproj", "{E46FD055-7619-4C50-8360-FA3BC2F650FB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp434", "..\kem_tests\test-SIKEp434.vcxproj", "{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P377", "SIDHp377.vcxproj", "{05CEF530-F410-4C21-AC70-A7EF991DEE6A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp377", "..\kem_tests\test-SIKEp377.vcxproj", "{0D497554-D408-4061-BA26-2A65F4272841}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P377", "..\arith_tests\arith_tests-P377.vcxproj", "{10C4B543-0224-43D3-B84D-390665AA6C25}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P434", "..\arith_tests\arith_tests-P434.vcxproj", "{8944AC47-A218-4F4D-8AF1-AF704160A727}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P503", "..\arith_tests\arith_tests-P503.vcxproj", "{464B689B-7C93-47A2-B2F5-FE162A4EF404}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P610", "..\arith_tests\arith_tests-P610.vcxproj", "{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P751", "..\arith_tests\arith_tests-P751.vcxproj", "{C9639168-C3FF-4427-BC3B-D907FF11DE73}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P610", "SIDHp610.vcxproj", "{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp610", "..\kem_tests\test-SIKEp610.vcxproj", "{DC10CB31-A905-402E-B466-46ADCD1AD61C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P546", "SIDHp546.vcxproj", "{48010B78-5594-4FE9-81AC-909B670C1516}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P546", "..\arith_tests\arith_tests-P546.vcxproj", "{5572CD5B-7F2F-4F44-B7AC-844291850C6E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp546", "..\kem_tests\test-SIKEp546.vcxproj", "{E52D3FE9-FD9F-4D93-9712-8172DB469831}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P697", "..\arith_tests\arith_tests-P697.vcxproj", "{C0892335-7EB7-48D4-83A1-500953D0526B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp697", "..\kem_tests\test-SIKEp697.vcxproj", "{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P697", "SIDHp697.vcxproj", "{F7447653-7518-4BF0-934D-65801C74D42A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Fast|x64 = Fast|x64 + Fast|x86 = Fast|x86 + Generic|x64 = Generic|x64 + Generic|x86 = Generic|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x64.ActiveCfg = Debug|x64 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x64.Build.0 = Debug|x64 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x86.ActiveCfg = Debug|Win32 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x86.Build.0 = Debug|Win32 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Fast|x64.ActiveCfg = Fast|x64 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Fast|x64.Build.0 = Fast|x64 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Fast|x86.ActiveCfg = Fast|Win32 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x64.ActiveCfg = Generic|x64 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x64.Build.0 = Generic|x64 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x86.ActiveCfg = Generic|Win32 + {8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x86.Build.0 = Generic|Win32 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x64.ActiveCfg = Debug|x64 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x64.Build.0 = Debug|x64 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x86.ActiveCfg = Debug|Win32 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x86.Build.0 = Debug|Win32 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Fast|x64.ActiveCfg = Fast|x64 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Fast|x64.Build.0 = Fast|x64 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Fast|x86.ActiveCfg = Fast|Win32 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x64.ActiveCfg = Generic|x64 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x64.Build.0 = Generic|x64 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x86.ActiveCfg = Generic|Win32 + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x86.Build.0 = Generic|Win32 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x64.ActiveCfg = Debug|x64 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x64.Build.0 = Debug|x64 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x86.ActiveCfg = Debug|Win32 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x86.Build.0 = Debug|Win32 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Fast|x64.ActiveCfg = Fast|x64 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Fast|x64.Build.0 = Fast|x64 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Fast|x86.ActiveCfg = Fast|Win32 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x64.ActiveCfg = Generic|x64 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x64.Build.0 = Generic|x64 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x86.ActiveCfg = Generic|Win32 + {EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x86.Build.0 = Generic|Win32 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x64.ActiveCfg = Debug|x64 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x64.Build.0 = Debug|x64 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x86.ActiveCfg = Debug|Win32 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x86.Build.0 = Debug|Win32 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Fast|x64.ActiveCfg = Fast|x64 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Fast|x64.Build.0 = Fast|x64 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Fast|x86.ActiveCfg = Fast|Win32 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x64.ActiveCfg = Generic|x64 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x64.Build.0 = Generic|x64 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x86.ActiveCfg = Generic|Win32 + {0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x86.Build.0 = Generic|Win32 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x64.ActiveCfg = Debug|x64 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x64.Build.0 = Debug|x64 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x86.ActiveCfg = Debug|Win32 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x86.Build.0 = Debug|Win32 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Fast|x64.ActiveCfg = Fast|x64 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Fast|x64.Build.0 = Fast|x64 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Fast|x86.ActiveCfg = Fast|Win32 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x64.ActiveCfg = Generic|x64 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x64.Build.0 = Generic|x64 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x86.ActiveCfg = Generic|Win32 + {E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x86.Build.0 = Generic|Win32 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x64.ActiveCfg = Debug|x64 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x64.Build.0 = Debug|x64 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x86.ActiveCfg = Debug|Win32 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x86.Build.0 = Debug|Win32 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Fast|x64.ActiveCfg = Fast|x64 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Fast|x64.Build.0 = Fast|x64 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Fast|x86.ActiveCfg = Fast|Win32 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x64.ActiveCfg = Generic|x64 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x64.Build.0 = Generic|x64 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x86.ActiveCfg = Generic|Win32 + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x86.Build.0 = Generic|Win32 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x64.ActiveCfg = Debug|x64 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x64.Build.0 = Debug|x64 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x86.ActiveCfg = Debug|Win32 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x86.Build.0 = Debug|Win32 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Fast|x64.ActiveCfg = Fast|x64 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Fast|x64.Build.0 = Fast|x64 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Fast|x86.ActiveCfg = Fast|Win32 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x64.ActiveCfg = Generic|x64 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x64.Build.0 = Generic|x64 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x86.ActiveCfg = Generic|Win32 + {05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x86.Build.0 = Generic|Win32 + {0D497554-D408-4061-BA26-2A65F4272841}.Debug|x64.ActiveCfg = Debug|x64 + {0D497554-D408-4061-BA26-2A65F4272841}.Debug|x64.Build.0 = Debug|x64 + {0D497554-D408-4061-BA26-2A65F4272841}.Debug|x86.ActiveCfg = Debug|Win32 + {0D497554-D408-4061-BA26-2A65F4272841}.Debug|x86.Build.0 = Debug|Win32 + {0D497554-D408-4061-BA26-2A65F4272841}.Fast|x64.ActiveCfg = Fast|x64 + {0D497554-D408-4061-BA26-2A65F4272841}.Fast|x64.Build.0 = Fast|x64 + {0D497554-D408-4061-BA26-2A65F4272841}.Fast|x86.ActiveCfg = Fast|Win32 + {0D497554-D408-4061-BA26-2A65F4272841}.Generic|x64.ActiveCfg = Generic|x64 + {0D497554-D408-4061-BA26-2A65F4272841}.Generic|x64.Build.0 = Generic|x64 + {0D497554-D408-4061-BA26-2A65F4272841}.Generic|x86.ActiveCfg = Generic|Win32 + {0D497554-D408-4061-BA26-2A65F4272841}.Generic|x86.Build.0 = Generic|Win32 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x64.ActiveCfg = Debug|x64 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x64.Build.0 = Debug|x64 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x86.ActiveCfg = Debug|Win32 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x86.Build.0 = Debug|Win32 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Fast|x64.ActiveCfg = Fast|x64 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Fast|x64.Build.0 = Fast|x64 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Fast|x86.ActiveCfg = Fast|Win32 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x64.ActiveCfg = Generic|x64 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x64.Build.0 = Generic|x64 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x86.ActiveCfg = Generic|Win32 + {10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x86.Build.0 = Generic|Win32 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x64.ActiveCfg = Debug|x64 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x64.Build.0 = Debug|x64 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x86.ActiveCfg = Debug|Win32 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x86.Build.0 = Debug|Win32 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Fast|x64.ActiveCfg = Fast|x64 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Fast|x64.Build.0 = Fast|x64 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Fast|x86.ActiveCfg = Fast|Win32 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x64.ActiveCfg = Generic|x64 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x64.Build.0 = Generic|x64 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x86.ActiveCfg = Generic|Win32 + {8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x86.Build.0 = Generic|Win32 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x64.ActiveCfg = Debug|x64 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x64.Build.0 = Debug|x64 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x86.ActiveCfg = Debug|Win32 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x86.Build.0 = Debug|Win32 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Fast|x64.ActiveCfg = Fast|x64 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Fast|x64.Build.0 = Fast|x64 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Fast|x86.ActiveCfg = Fast|Win32 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x64.ActiveCfg = Generic|x64 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x64.Build.0 = Generic|x64 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x86.ActiveCfg = Generic|Win32 + {464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x86.Build.0 = Generic|Win32 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x64.ActiveCfg = Debug|x64 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x64.Build.0 = Debug|x64 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x86.ActiveCfg = Debug|Win32 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x86.Build.0 = Debug|Win32 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Fast|x64.ActiveCfg = Fast|x64 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Fast|x64.Build.0 = Fast|x64 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Fast|x86.ActiveCfg = Fast|Win32 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x64.ActiveCfg = Generic|x64 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x64.Build.0 = Generic|x64 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x86.ActiveCfg = Generic|Win32 + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x86.Build.0 = Generic|Win32 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x64.ActiveCfg = Debug|x64 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x64.Build.0 = Debug|x64 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x86.ActiveCfg = Debug|Win32 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x86.Build.0 = Debug|Win32 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Fast|x64.ActiveCfg = Fast|x64 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Fast|x64.Build.0 = Fast|x64 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Fast|x86.ActiveCfg = Fast|Win32 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x64.ActiveCfg = Generic|x64 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x64.Build.0 = Generic|x64 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x86.ActiveCfg = Generic|Win32 + {C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x86.Build.0 = Generic|Win32 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x64.ActiveCfg = Debug|x64 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x64.Build.0 = Debug|x64 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x86.ActiveCfg = Debug|Win32 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x86.Build.0 = Debug|Win32 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Fast|x64.ActiveCfg = Fast|x64 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Fast|x64.Build.0 = Fast|x64 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Fast|x86.ActiveCfg = Fast|Win32 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x64.ActiveCfg = Generic|x64 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x64.Build.0 = Generic|x64 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x86.ActiveCfg = Generic|Win32 + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x86.Build.0 = Generic|Win32 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x64.ActiveCfg = Debug|x64 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x64.Build.0 = Debug|x64 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x86.ActiveCfg = Debug|Win32 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x86.Build.0 = Debug|Win32 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Fast|x64.ActiveCfg = Fast|x64 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Fast|x64.Build.0 = Fast|x64 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Fast|x86.ActiveCfg = Fast|Win32 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x64.ActiveCfg = Generic|x64 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x64.Build.0 = Generic|x64 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x86.ActiveCfg = Generic|Win32 + {DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x86.Build.0 = Generic|Win32 + {48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x64.ActiveCfg = Debug|x64 + {48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x64.Build.0 = Debug|x64 + {48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x86.ActiveCfg = Debug|Win32 + {48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x86.Build.0 = Debug|Win32 + {48010B78-5594-4FE9-81AC-909B670C1516}.Fast|x64.ActiveCfg = Fast|x64 + {48010B78-5594-4FE9-81AC-909B670C1516}.Fast|x64.Build.0 = Fast|x64 + {48010B78-5594-4FE9-81AC-909B670C1516}.Fast|x86.ActiveCfg = Fast|Win32 + {48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x64.ActiveCfg = Generic|x64 + {48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x64.Build.0 = Generic|x64 + {48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x86.ActiveCfg = Generic|Win32 + {48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x86.Build.0 = Generic|Win32 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x64.ActiveCfg = Debug|x64 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x64.Build.0 = Debug|x64 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x86.ActiveCfg = Debug|Win32 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x86.Build.0 = Debug|Win32 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Fast|x64.ActiveCfg = Fast|x64 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Fast|x64.Build.0 = Fast|x64 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Fast|x86.ActiveCfg = Fast|Win32 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x64.ActiveCfg = Generic|x64 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x64.Build.0 = Generic|x64 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x86.ActiveCfg = Generic|Win32 + {5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x86.Build.0 = Generic|Win32 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x64.ActiveCfg = Debug|x64 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x64.Build.0 = Debug|x64 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x86.ActiveCfg = Debug|Win32 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x86.Build.0 = Debug|Win32 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Fast|x64.ActiveCfg = Fast|x64 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Fast|x64.Build.0 = Fast|x64 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Fast|x86.ActiveCfg = Fast|Win32 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x64.ActiveCfg = Generic|x64 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x64.Build.0 = Generic|x64 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x86.ActiveCfg = Generic|Win32 + {E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x86.Build.0 = Generic|Win32 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x64.ActiveCfg = Debug|x64 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x64.Build.0 = Debug|x64 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x86.ActiveCfg = Debug|Win32 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x86.Build.0 = Debug|Win32 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Fast|x64.ActiveCfg = Fast|x64 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Fast|x64.Build.0 = Fast|x64 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Fast|x86.ActiveCfg = Fast|Win32 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x64.ActiveCfg = Generic|x64 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x64.Build.0 = Generic|x64 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x86.ActiveCfg = Generic|Win32 + {C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x86.Build.0 = Generic|Win32 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x64.ActiveCfg = Debug|x64 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x64.Build.0 = Debug|x64 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x86.ActiveCfg = Debug|Win32 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x86.Build.0 = Debug|Win32 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Fast|x64.ActiveCfg = Fast|x64 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Fast|x64.Build.0 = Fast|x64 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Fast|x86.ActiveCfg = Fast|Win32 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x64.ActiveCfg = Generic|x64 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x64.Build.0 = Generic|x64 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x86.ActiveCfg = Generic|Win32 + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x86.Build.0 = Generic|Win32 + {F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x64.ActiveCfg = Debug|x64 + {F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x64.Build.0 = Debug|x64 + {F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x86.ActiveCfg = Debug|Win32 + {F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x86.Build.0 = Debug|Win32 + {F7447653-7518-4BF0-934D-65801C74D42A}.Fast|x64.ActiveCfg = Fast|x64 + {F7447653-7518-4BF0-934D-65801C74D42A}.Fast|x64.Build.0 = Fast|x64 + {F7447653-7518-4BF0-934D-65801C74D42A}.Fast|x86.ActiveCfg = Fast|Win32 + {F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x64.ActiveCfg = Generic|x64 + {F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x64.Build.0 = Generic|x64 + {F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x86.ActiveCfg = Generic|Win32 + {F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x86.Build.0 = Generic|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj new file mode 100644 index 0000000..cb653eb --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj @@ -0,0 +1,590 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + {05CEF530-F410-4C21-AC70-A7EF991DEE6A} + Win32Proj + isoECClib + 8.1 + P377 + + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\SIDHp377\ + $(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\SIDHp377\ + $(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\SIDHp377\ + $(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\SIDHp377\ + $(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\SIDHp377\ + $(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\SIDHp377\ + $(Configuration)\SIDHp377\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + ProgramDatabase + false + false + false + Default + MultiThreadedDLL + true + + + Windows + true + + + bcrypt.lib + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + ProgramDatabase + false + false + true + Default + MultiThreadedDLL + + + AdvancedVectorExtensions + + + Windows + true + + + + + + + bcrypt.lib + + + + + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + + + + + true + true + true + + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters new file mode 100644 index 0000000..6c8f8bb --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters @@ -0,0 +1,81 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {c12e408e-2171-41d7-8815-33244cd7b1db} + + + {e81738a2-8bd8-449a-8918-07266c29f2b7} + + + {41f562a7-d335-4517-9c95-c6a4ce94c2f0} + + + {d2fd7e3f-38db-40e6-9994-0c979863e36b} + + + {75ac45a5-2e31-48af-986d-44d27e6a2a42} + + + + + Source Files\random + + + Source Files + + + Source Files + + + Source Files\sha3 + + + Source Files + + + Source Files + + + Source Files + + + Source Files\generic + + + Source Files\x64 + + + + + Header Files\random + + + Source Files\sha3 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj new file mode 100644 index 0000000..db4e574 --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj @@ -0,0 +1,587 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + {E46FD055-7619-4C50-8360-FA3BC2F650FB} + Win32Proj + isoECClib + 8.1 + P434 + + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + ProgramDatabase + false + false + false + Default + MultiThreadedDLL + true + + + Windows + true + + + bcrypt.lib + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + ProgramDatabase + false + false + true + Default + MultiThreadedDLL + + + AdvancedVectorExtensions + + + Windows + true + + + + + + + bcrypt.lib + + + + + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + + + true + true + + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters new file mode 100644 index 0000000..37c7bb1 --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters @@ -0,0 +1,81 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {c12e408e-2171-41d7-8815-33244cd7b1db} + + + {e81738a2-8bd8-449a-8918-07266c29f2b7} + + + {41f562a7-d335-4517-9c95-c6a4ce94c2f0} + + + {d2fd7e3f-38db-40e6-9994-0c979863e36b} + + + {75ac45a5-2e31-48af-986d-44d27e6a2a42} + + + + + Source Files\random + + + Source Files + + + Source Files + + + Source Files\sha3 + + + Source Files + + + Source Files + + + Source Files + + + Source Files\generic + + + Source Files\x64 + + + + + Header Files\random + + + Source Files\sha3 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj new file mode 100644 index 0000000..3b570eb --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj @@ -0,0 +1,598 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + {BBC8647D-B9E2-469F-A9A4-BB55B614ADBE} + Win32Proj + isoECClib + 8.1 + P503 + + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + ProgramDatabase + false + false + false + Default + MultiThreadedDLL + true + + + Windows + true + + + bcrypt.lib + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + ProgramDatabase + false + false + true + Default + MultiThreadedDLL + + + AdvancedVectorExtensions + + + Windows + true + + + + + + + bcrypt.lib + + + + + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + + + true + true + true + true + true + true + true + + + + + + + + + true + true + true + true + + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters new file mode 100644 index 0000000..78d3cbe --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters @@ -0,0 +1,81 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {c12e408e-2171-41d7-8815-33244cd7b1db} + + + {e81738a2-8bd8-449a-8918-07266c29f2b7} + + + {41f562a7-d335-4517-9c95-c6a4ce94c2f0} + + + {d2fd7e3f-38db-40e6-9994-0c979863e36b} + + + {75ac45a5-2e31-48af-986d-44d27e6a2a42} + + + + + Source Files\random + + + Source Files + + + Source Files + + + Source Files\sha3 + + + Source Files + + + Source Files + + + Source Files\generic + + + Source Files\x64 + + + Source Files + + + + + Header Files\random + + + Source Files\sha3 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj new file mode 100644 index 0000000..285372f --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj @@ -0,0 +1,584 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + {48010B78-5594-4FE9-81AC-909B670C1516} + Win32Proj + isoECClib + 8.1 + P546 + + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp546\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + ProgramDatabase + false + false + false + Default + MultiThreadedDLL + true + + + Windows + true + + + bcrypt.lib + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + ProgramDatabase + false + false + true + Default + MultiThreadedDLL + + + AdvancedVectorExtensions + + + Windows + true + + + + + + + bcrypt.lib + + + + + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + + + true + true + + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters new file mode 100644 index 0000000..844bde6 --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters @@ -0,0 +1,81 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {c12e408e-2171-41d7-8815-33244cd7b1db} + + + {e81738a2-8bd8-449a-8918-07266c29f2b7} + + + {41f562a7-d335-4517-9c95-c6a4ce94c2f0} + + + {d2fd7e3f-38db-40e6-9994-0c979863e36b} + + + {75ac45a5-2e31-48af-986d-44d27e6a2a42} + + + + + Source Files\random + + + Source Files + + + Source Files + + + Source Files\sha3 + + + Source Files + + + Source Files + + + Source Files + + + Source Files\x64 + + + Source Files\generic + + + + + Header Files\random + + + Source Files\sha3 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj new file mode 100644 index 0000000..976809f --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj @@ -0,0 +1,577 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + {ED1BA17A-58EA-4D9F-9B19-7061395E22BB} + Win32Proj + isoECClib + 8.1 + P610 + + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + ProgramDatabase + false + false + false + Default + MultiThreadedDLL + true + + + Windows + true + + + bcrypt.lib + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + ProgramDatabase + false + false + true + Default + MultiThreadedDLL + + + AdvancedVectorExtensions + + + Windows + true + + + + + + + bcrypt.lib + + + + + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + true + true + true + true + true + true + true + + + true + true + + + + + + true + true + true + true + true + true + + + true + true + true + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters new file mode 100644 index 0000000..8d9ac78 --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters @@ -0,0 +1,81 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {c12e408e-2171-41d7-8815-33244cd7b1db} + + + {e81738a2-8bd8-449a-8918-07266c29f2b7} + + + {41f562a7-d335-4517-9c95-c6a4ce94c2f0} + + + {d2fd7e3f-38db-40e6-9994-0c979863e36b} + + + {597545a3-c4e5-4065-9d91-c7bec60b6da4} + + + + + Source Files\random + + + Source Files + + + Source Files + + + Source Files\sha3 + + + Source Files\x64 + + + Source Files\generic + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files\random + + + Source Files\sha3 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj new file mode 100644 index 0000000..cbdb6fb --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj @@ -0,0 +1,572 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + {F7447653-7518-4BF0-934D-65801C74D42A} + Win32Proj + isoECClib + 8.1 + P697 + + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Configuration)\SIDHp697\ + $(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp697\ + $(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp610\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\SIDHp697\ + $(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp697\ + $(Platform)\$(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp697\ + $(Platform)\$(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\SIDHp697\ + $(Platform)\$(Configuration)\SIDHp697\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + ProgramDatabase + false + false + false + Default + MultiThreadedDLL + true + + + Windows + true + + + bcrypt.lib + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + ProgramDatabase + false + false + true + Default + MultiThreadedDLL + + + AdvancedVectorExtensions + + + Windows + true + + + + + + + bcrypt.lib + + + + + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + + + true + true + + + + + + true + true + true + true + true + true + + + true + true + true + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters new file mode 100644 index 0000000..c15458a --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters @@ -0,0 +1,81 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {c12e408e-2171-41d7-8815-33244cd7b1db} + + + {e81738a2-8bd8-449a-8918-07266c29f2b7} + + + {41f562a7-d335-4517-9c95-c6a4ce94c2f0} + + + {d2fd7e3f-38db-40e6-9994-0c979863e36b} + + + {597545a3-c4e5-4065-9d91-c7bec60b6da4} + + + + + Source Files\random + + + Source Files + + + Source Files + + + Source Files\sha3 + + + Source Files + + + Source Files + + + Source Files + + + Source Files\generic + + + Source Files\x64 + + + + + Header Files\random + + + Source Files\sha3 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj new file mode 100644 index 0000000..c26ea03 --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj @@ -0,0 +1,598 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + {8283DD76-E88A-4B63-ABDE-33F014178413} + Win32Proj + isoECClib + 8.1 + P751 + + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + true + v140 + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + StaticLibrary + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + ProgramDatabase + false + false + false + Default + MultiThreadedDLL + true + + + Windows + true + + + bcrypt.lib + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + ProgramDatabase + false + false + true + Default + MultiThreadedDLL + + + AdvancedVectorExtensions + + + Windows + true + + + + + + + bcrypt.lib + + + + + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + Level4 + + + MaxSpeed + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Windows + true + true + true + + + bcrypt.lib + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + + + + + + + + + true + true + + + true + true + true + true + + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters new file mode 100644 index 0000000..348c901 --- /dev/null +++ b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters @@ -0,0 +1,81 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {c12e408e-2171-41d7-8815-33244cd7b1db} + + + {e81738a2-8bd8-449a-8918-07266c29f2b7} + + + {41f562a7-d335-4517-9c95-c6a4ce94c2f0} + + + {d2fd7e3f-38db-40e6-9994-0c979863e36b} + + + {597545a3-c4e5-4065-9d91-c7bec60b6da4} + + + + + Source Files\random + + + Source Files + + + Source Files + + + Source Files\sha3 + + + Source Files + + + Source Files + + + Source Files\x64 + + + Source Files\generic + + + Source Files + + + + + Header Files\random + + + Source Files\sha3 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj new file mode 100644 index 0000000..09521b0 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj @@ -0,0 +1,432 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + {05cef530-f410-4c21-ac70-a7ef991dee6a} + + + + {10C4B543-0224-43D3-B84D-390665AA6C25} + Win32Proj + fp_tests + arith_tests-P377 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + v140 + + + v140 + + + v140 + + + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp377\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp377\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp377\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters new file mode 100644 index 0000000..e5e4541 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj new file mode 100644 index 0000000..9dd17c1 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj @@ -0,0 +1,432 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + {10224e47-baef-430e-a8a0-969cc6ceb96b} + + + + {8944AC47-A218-4F4D-8AF1-AF704160A727} + Win32Proj + fp_tests + arith_tests-P434 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + v140 + + + v140 + + + v140 + + + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters new file mode 100644 index 0000000..66b8267 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj new file mode 100644 index 0000000..3a7b23d --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj @@ -0,0 +1,432 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + {bbc8647d-b9e2-469f-a9a4-bb55b614adbe} + + + + {464B689B-7C93-47A2-B2F5-FE162A4EF404} + Win32Proj + fp_tests + arith_tests-P503 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + v140 + + + v140 + + + v140 + + + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters new file mode 100644 index 0000000..4716cee --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj new file mode 100644 index 0000000..9da4cc2 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj @@ -0,0 +1,432 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + {48010b78-5594-4fe9-81ac-909b670c1516} + + + + {5572CD5B-7F2F-4F44-B7AC-844291850C6E} + Win32Proj + fp_tests + arith_tests-P546 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + v140 + + + v140 + + + v140 + + + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp546\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp546\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp546\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp503\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp546\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp503\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters new file mode 100644 index 0000000..9abce0e --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj new file mode 100644 index 0000000..b0ed324 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj @@ -0,0 +1,432 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + {ed1ba17a-58ea-4d9f-9b19-7061395e22bb} + + + + {2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F} + Win32Proj + fp_tests + arith_tests-P610 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + v140 + + + v140 + + + v140 + + + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters new file mode 100644 index 0000000..fc52aa6 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj new file mode 100644 index 0000000..4414eae --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj @@ -0,0 +1,432 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + {f7447653-7518-4bf0-934d-65801c74d42a} + + + + {C0892335-7EB7-48D4-83A1-500953D0526B} + Win32Proj + fp_tests + arith_tests-P697 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + v140 + + + v140 + + + v140 + + + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp697\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp697\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp697\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp697\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters new file mode 100644 index 0000000..f1efc19 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj new file mode 100644 index 0000000..28d7031 --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj @@ -0,0 +1,432 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + {8283dd76-e88a-4b63-abde-33f014178413} + + + + {C9639168-C3FF-4427-BC3B-D907FF11DE73} + Win32Proj + fp_tests + arith_tests-P751 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + v140 + + + v140 + + + v140 + + + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp751\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + false + Default + MultiThreadedDLL + true + ProgramDatabase + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + MultiThreadedDLL + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + MultiThreadedDLL + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _X86_; _GENERIC_; + true + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + Level4 + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + true + AdvancedVectorExtensions + Disabled + + + UseLinkTimeCodeGeneration + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters new file mode 100644 index 0000000..456616e --- /dev/null +++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj new file mode 100644 index 0000000..4508e20 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj @@ -0,0 +1,487 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + {05cef530-f410-4c21-ac70-a7ef991dee6a} + + + + {0D497554-D408-4061-BA26-2A65F4272841} + Win32Proj + kex_tests + 8.1 + test-SIKEp377 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp377\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp377\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp377\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp377\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp377\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp434\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + MultiThreadedDLL + + + Console + true + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters new file mode 100644 index 0000000..493d88c --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj new file mode 100644 index 0000000..a7563b7 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj @@ -0,0 +1,483 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + true + true + true + true + true + true + true + true + + + + + + + + + + {e46fd055-7619-4c50-8360-fa3bc2f650fb} + + + + {53B2CD97-2FE6-4927-86A7-B16E436CFBD5} + Win32Proj + kex_tests + 8.1 + test-SIKEp434 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp434\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp434\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp434\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + MultiThreadedDLL + + + Console + true + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters new file mode 100644 index 0000000..c8ef8f7 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj new file mode 100644 index 0000000..02557a3 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj @@ -0,0 +1,483 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + true + true + true + true + true + true + true + true + + + + + + + + + + {bbc8647d-b9e2-469f-a9a4-bb55b614adbe} + + + + {EF9FE361-D94D-4CE0-8873-739A925326A3} + Win32Proj + kex_tests + 8.1 + test-SIKEp503 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + MultiThreadedDLL + + + Console + true + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters new file mode 100644 index 0000000..a38608b --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj new file mode 100644 index 0000000..2bb1ce8 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj @@ -0,0 +1,483 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + true + true + true + true + true + true + true + true + + + + + + + + + + {48010b78-5594-4fe9-81ac-909b670c1516} + + + + {E52D3FE9-FD9F-4D93-9712-8172DB469831} + Win32Proj + kex_tests + 8.1 + test-SIKEp546 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp546\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp546\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp546\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp546\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp546\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp546\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp503\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + MultiThreadedDLL + + + Console + true + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters new file mode 100644 index 0000000..90a64f2 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj new file mode 100644 index 0000000..997f20e --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj @@ -0,0 +1,487 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + {ed1ba17a-58ea-4d9f-9b19-7061395e22bb} + + + + {DC10CB31-A905-402E-B466-46ADCD1AD61C} + Win32Proj + kex_tests + 8.1 + test-SIKEp610 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + MultiThreadedDLL + + + Console + true + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters new file mode 100644 index 0000000..14944a1 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj new file mode 100644 index 0000000..b596a73 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj @@ -0,0 +1,487 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + {f7447653-7518-4bf0-934d-65801c74d42a} + + + + {1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B} + Win32Proj + kex_tests + 8.1 + test-SIKEp697 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp697\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp697\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp697\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp697\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp697\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp697\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\SIDHp610\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + MultiThreadedDLL + + + Console + true + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters new file mode 100644 index 0000000..8873662 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj new file mode 100644 index 0000000..68f51c2 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj @@ -0,0 +1,487 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Fast + Win32 + + + Fast + x64 + + + Generic + Win32 + + + Generic + x64 + + + Optimized-fast + Win32 + + + Optimized-fast + x64 + + + Optimized-generic + Win32 + + + Optimized-generic + x64 + + + Release + Win32 + + + Release + x64 + + + + + true + true + true + true + true + true + true + true + true + true + true + true + + + + + + + + + + {8283dd76-e88a-4b63-abde-33f014178413} + + + + {0D570915-7551-4D5F-A2F0-A4A6200185F9} + Win32Proj + kex_tests + 8.1 + test-SIKEp751 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp751\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp751\ + + + false + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\test_SIKEp751\ + + + + + + Level4 + Disabled + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + + + + + + + Level4 + Disabled + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + MultiThreadedDLL + + + Console + true + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _X86_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _FAST_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__; _GENERIC_; _AMD64_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + Level4 + + + Disabled + true + true + __WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_; + AdvancedVectorExtensions + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters new file mode 100644 index 0000000..668ae12 --- /dev/null +++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/SIKE_sw/src/P377/AMD64/fp_x64.c b/SIKE_sw/src/P377/AMD64/fp_x64.c new file mode 100644 index 0000000..d49c8f8 --- /dev/null +++ b/SIKE_sw/src/P377/AMD64/fp_x64.c @@ -0,0 +1,439 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: modular arithmetic optimized for x64 platforms for P377 +*********************************************************************************************/ + +#include "../P377_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p377[NWORDS_FIELD]; +extern const uint64_t p377p1[NWORDS_FIELD]; +extern const uint64_t p377x2[NWORDS_FIELD]; +extern const uint64_t p377x4[NWORDS_FIELD]; + + +__inline void mp_sub377_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. +#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p377x2)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub377_p2_asm(a, b, c); + +#endif +} + + +__inline void mp_sub377_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. +#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p377x4)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub377_p4_asm(a, b, c); + +#endif +} + + +__inline void fpadd377(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p377. + // Inputs: a, b in [0, 2*p377-1] + // Output: c in [0, 2*p377-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p377x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p377x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpadd377_asm(a, b, c); + +#endif +} + + +__inline void fpsub377(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p377. + // Inputs: a, b in [0, 2*p377-1] + // Output: c in [0, 2*p377-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p377x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpsub377_asm(a, b, c); + +#endif +} + + +__inline void fpneg377(digit_t* a) +{ // Modular negation, a = -a mod p377. + // Input/output: a in [0, 2*p377-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p377x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_377(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p377. + // Input : a in [0, 2*p377-1] + // Output: c in [0, 2*p377-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p377 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p377)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection377(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p377)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p377)[i] & mask, borrow, a[i]); + } +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + (void)nwords; + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[5], b[5], uv, carry, uv); + c[10] = uv[0]; + c[11] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + mul377_asm(a, b, c); + +#endif +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Montgomery reduction exploiting special form of the prime. + // mc = ma*R^-1 mod p377x2, where R = 2^384. + // If ma < 2^384*p377, the output mc is in the range [0, 2*p377-1]. + // ma is assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + MUL128(mc[0], ((digit_t*)p377p1)[2], uv); + ADDC(0, uv[0], ma[2], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p377p1)[3], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p377p1)[2], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[3], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p377p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p377p1)[3], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p377p1)[2], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[4], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p377p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p377p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p377p1)[3], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p377p1)[2], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p377p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p377p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p377p1)[3], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p377p1)[2], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p377p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p377p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p377p1)[3], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p377p1)[2], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p377p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p377p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p377p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p377p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p377p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p377p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, mc[4]); + ADDC(carry, uv[1], 0, carry, uv[1]); + ADDC(0, uv[1], ma[11], carry, mc[5]); + +#elif (OS_TARGET == OS_LINUX) + + rdc377_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/SIKE_sw/src/P377/AMD64/fp_x64_asm.S b/SIKE_sw/src/P377/AMD64/fp_x64_asm.S new file mode 100644 index 0000000..af1c4e4 --- /dev/null +++ b/SIKE_sw/src/P377/AMD64/fp_x64_asm.S @@ -0,0 +1,747 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// Copyright (c) Microsoft Corporation +// +// Website: https://github.com/microsoft/PQCrypto-SIDH +// Released under MIT license +// +// Abstract: field arithmetic in x64 assembly for P377 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + +// Define addition instructions +#ifdef _MULX_ +#ifdef _ADX_ + +#define ADD adcx +#define ADC adcx + +#else + +#define ADD add +#define ADC adc + +#endif +#endif + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(fpadd377_asm) +fmt(fpadd377_asm): + push r12 + push r13 + push r14 + push r15 + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + + mov rcx, [rip+p377x2] + sub r8, rcx + mov rdi, [rip+p377x2+8] + sbb r9, rdi + sbb r10, rdi + mov rsi, [rip+p377x2+24] + sbb r11, rsi + mov r14, [rip+p377x2+32] + sbb r12, r14 + mov r15, [rip+p377x2+40] + sbb r13, r15 + sbb rax, 0 + + and rcx, rax + and rdi, rax + and rsi, rax + and r14, rax + and r15, rax + + add r8, rcx + adc r9, rdi + adc r10, rdi + adc r11, rsi + adc r12, r14 + adc r13, r15 + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(fpsub377_asm) +fmt(fpsub377_asm): + push r12 + push r13 + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb rax, 0 + + mov rcx, [rip+p377x2] + mov rdi, [rip+p377x2+8] + mov rsi, [rip+p377x2+24] + and rcx, rax + and rdi, rax + and rsi, rax + add r8, rcx + adc r9, rdi + adc r10, rdi + adc r11, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov rdi, [rip+p377x2+32] + mov rsi, [rip+p377x2+40] + and rdi, rax + and rsi, rax + bt rcx, 0 + adc r12, rdi + adc r13, rsi + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + + pop r13 + pop r12 + ret + + +///////////////////////////////////////////////////////////////// MACRO +.macro SUB377_PX P0 + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rax, [reg_p1+32] + mov rcx, [reg_p1+40] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb rax, [reg_p2+32] + sbb rcx, [reg_p2+40] + + mov rdi, [rip+\P0] + mov rsi, [rip+\P0+8] + add r8, rdi + adc r9, rsi + adc r10, rsi + mov rdi, [rip+\P0+24] + mov rsi, [rip+\P0+32] + adc r11, rdi + mov rdi, [rip+\P0+40] + adc rax, rsi + adc rcx, rdi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rax + mov [reg_p3+40], rcx + .endm + + +//*********************************************************************** +// Multiprecision subtraction with correction with 2*p377 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p377 +//*********************************************************************** +.global fmt(mp_sub377_p2_asm) +fmt(mp_sub377_p2_asm): + + SUB377_PX p377x2 + ret + + +//*********************************************************************** +// Multiprecision subtraction with correction with 4*p377 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p377 +//*********************************************************************** +.global fmt(mp_sub377_p4_asm) +fmt(mp_sub377_p4_asm): + + SUB377_PX p377x4 + ret + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C and regs T1, T2, T0 +// Temps: regs T0:T6 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + xor rax, rax + adox \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adox \T1, \T3 + + mov rdx, 8\M0 + mulx \T3, \T4, \M1 // T3:T4 = A1*B0 + adox \T2, rax + xor rax, rax + mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 + adox \T4, \T0 + mov 8\C, \T4 // C1_final + adcx \T3, \T6 + mulx \T6, \T0, 16\M1 // T6:T0 = A1*B2 + adox \T3, \T1 + adcx \T5, \T0 + adcx \T6, rax + adox \T5, \T2 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + adox \T6, rax + xor rax, rax + mulx \T2, \T4, 8\M1 // T2:T4 = A2*B1 + adox \T0, \T3 + mov 16\C, \T0 // C2_final + adcx \T1, \T5 + mulx \T0, \T3, 16\M1 // T0:T3 = A2*B2 + adcx \T2, \T6 + adcx \T0, rax + adox \T1, \T4 + adox \T2, \T3 + adox \T0, rax +.endm + +#else + +.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + add \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adc \T1, \T3 + + mov rdx, 8\M0 + mulx \T3, \T4, \M1 // T3:T4 = A1*B0 + adc \T2, 0 + mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 + add \T4, \T0 + mov 8\C, \T4 // C1_final + adc \T3, \T1 + adc \T5, \T2 + mulx \T2, \T1, 16\M1 // T2:T1 = A1*B2 + adc \T2, 0 + + add \T3, \T6 + adc \T5, \T1 + adc \T2, 0 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + add \T0, \T3 + mov 16\C, \T0 // C2_final + mulx \T4, \T6, 8\M1 // T4:T6 = A2*B1 + adc \T1, \T5 + adc \T2, \T4 + mulx \T0, \T3, 16\M1 // T0:T3 = A2*B2 + adc \T0, 0 + add \T1, \T6 + adc \T2, \T3 + adc \T0, 0 +.endm +#endif + + +//***************************************************************************** +// 377-bit multiplication using Karatsuba (one level), schoolbook (one level) +//***************************************************************************** +.global fmt(mul377_asm) +fmt(mul377_asm): + push r14 + push r15 + mov rcx, reg_p3 + + // r8-r10 <- AH + AL, rax <- mask + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + push rbx + push rbp + add r8, [reg_p1+24] + adc r9, [reg_p1+32] + adc r10, [reg_p1+40] + sbb rax, 0 + push r12 + push r13 + + // r11-r13 <- BH + BL, rbx <- mask + xor rbx, rbx + mov r11, [reg_p2] + mov r12, [reg_p2+8] + mov r13, [reg_p2+16] + sub rsp, 48 + add r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + sbb rbx, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + mov [rsp+32], r12 + mov [rsp+40], r13 + + // r11-r13 <- masked (BH + BL) + and r11, rax + and r12, rax + and r13, rax + + // r8-r10 <- masked (AH + AL) + and r8, rbx + and r9, rbx + and r10, rbx + + // r8-r10 <- masked (AH + AL) + masked (AH + AL) + add r8, r11 + adc r9, r12 + adc r10, r13 + + // [rcx+48] <- (AH+AL) x (BH+BL), low part + MUL192_SCHOOL [rsp], [rsp+24], [rcx+48], r15, rbx, rbp, r11, r12, r13, r14 + mov [rcx+72], rbx + mov [rcx+80], rbp + mov [rcx+88], r15 + + // [rcx] <- AL x BL + MUL192_SCHOOL [reg_p1], [reg_p2], [rcx], r15, rbx, rbp, r11, r12, r13, r14 // Result C0-C2 + mov [rcx+24], rbx + mov [rcx+32], rbp + mov [rcx+40], r15 + + // [rsp], rbx, rbp, r15 <- AH x BH + MUL192_SCHOOL [reg_p1+24], [reg_p2+24], [rsp], r15, rbx, rbp, r11, r12, r13, r14 + + // r8-r10 <- (AH+AL) x (BH+BL), final step + add r8, [rcx+72] + adc r9, [rcx+80] + adc r10, [rcx+88] + + // r11-r13, r8-r10 <- (AH+AL) x (BH+BL) - ALxBL + mov r11, [rcx+48] + mov r12, [rcx+56] + mov r13, [rcx+64] + sub r11, [rcx] + sbb r12, [rcx+8] + sbb r13, [rcx+16] + sbb r8, [rcx+24] + sbb r9, [rcx+32] + sbb r10, [rcx+40] + + // r11-r13, r8-r10 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + mov rdi, [rsp] + mov rsi, [rsp+8] + mov rdx, [rsp+16] + add rsp, 48 + sub r11, rdi + sbb r12, rsi + sbb r13, rdx + sbb r8, rbx + sbb r9, rbp + sbb r10, r15 + + add r11, [rcx+24] + adc r12, [rcx+32] + adc r13, [rcx+40] + mov [rcx+24], r11 // Result C3-C5 + mov [rcx+32], r12 + mov [rcx+40], r13 + pop r13 + pop r12 + adc r8, rdi + adc r9, rsi + adc r10, rdx + mov [rcx+48], r8 // Result C6-C8 + mov [rcx+56], r9 + mov [rcx+64], r10 + adc rbx, 0 + adc rbp, 0 + adc r15, 0 + mov [rcx+72], rbx // Result C9-C11 + mov [rcx+80], rbp + mov [rcx+88], r15 + + pop rbp + pop rbx + pop r15 + pop r14 + ret + +#else + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + +#endif + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: regs I0, I1 and memory pointer M1 +// Outputs: regs T0:T4 +// Temp: regs T0:T5 +///////////////////////////////////////////////////////////////// + +.macro MUL128x192_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5 + xor rax, rax + mulx \T3, \T5, 16\M1 + ADD \T1, \T4 + ADC \T2, \T5 + ADC \T3, rax + + xor rax, rax + mov rdx, \I1 + mulx \T4, \T5, \M1 + ADD \T1, \T5 // T1 <- C1_final + ADC \T2, \T4 + mulx \T4, \T5, 8\M1 + ADC \T3, rax + ADD \T2, \T5 // T2 <- C2_final + ADC \T3, \T4 + mulx \T4, \T5, 16\M1 + ADC \T4, rax + ADD \T3, \T5 // T3 <- C3_final + ADC \T4, rax // T4 <- C4_final +.endm + + +//************************************************************************************** +// Montgomery reduction, shifted +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +//************************************************************************************** +.global fmt(rdc377_asm) +fmt(rdc377_asm): + + // a[0-1] x 2xp377p1_nz --> result: r8:r12 + mov rdx, [reg_p1] + mov rcx, [reg_p1+8] + mulx r9, r8, [rip+p377p1x2] // result r8 + push r15 + push rbp + push r14 + push r12 + mulx r10, r12, [rip+p377p1x2+8] + push rbx + push r13 + MUL128x192_SCHOOL rdx, rcx, [rip+p377p1x2], r8, r9, r10, r11, r12, r13 + + xor rdx, rdx + shrd rdx, r8, 1 + shrd r8, r9, 1 + shrd r9, r10, 1 + shrd r10, r11, 1 + shrd r11, r12, 1 + shr r12, 1 + add rdx, [reg_p1+16] + adc r8, [reg_p1+24] + adc r9, [reg_p1+32] + adc r10, [reg_p1+40] + mulx rbx, rcx, [rip+p377p1x2] // result rcx + adc r11, [reg_p1+48] + adc r12, [reg_p1+56] + mov [reg_p2], r9 + mov [reg_p2+8], r10 + mov [reg_p2+16], r11 + mov r9, [reg_p1+64] + mov r10, [reg_p1+72] + mov r11, [reg_p1+80] + mov rdi, [reg_p1+88] + mulx rbp, r15, [rip+p377p1x2+8] + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc rdi, 0 + + // a[2-3] x 2xp377p1_nz --> result: rcx, rbx, rbp, r14:r15 + MUL128x192_SCHOOL rdx, r8, [rip+p377p1x2], rcx, rbx, rbp, r14, r15, r13 + + xor rdx, rdx + shrd rdx, rcx, 1 + shrd rcx, rbx, 1 + shrd rbx, rbp, 1 + shrd rbp, r14, 1 + shrd r14, r15, 1 + shr r15, 1 + add rdx, [reg_p2] + adc rcx, [reg_p2+8] + adc rbx, [reg_p2+16] + mov [reg_p2+16], rbx + adc r12, rbp + mulx rbp, rbx, [rip+p377p1x2] // result rbx + adc r14, r9 + adc r15, r10 + mulx r10, r8, [rip+p377p1x2+8] + adc r11, 0 + adc rdi, 0 + + // a[4-5] x 2xp377p1_nz --> result: rbx, rbp, r10:r8 + MUL128x192_SCHOOL rdx, rcx, [rip+p377p1x2], rbx, rbp, r10, r9, r8, r13 + + xor rdx, rdx + pop r13 + shrd rdx, rbx, 1 + shrd rbx, rbp, 1 + shrd rbp, r10, 1 + shrd r10, r9, 1 + shrd r9, r8, 1 + shr r8, 1 + add rdx, [reg_p2+16] + adc rbx, r12 + mov [reg_p2+8], rbx + pop rbx + pop r12 + adc rbp, r14 + pop r14 + mov [reg_p2+16], rbp + pop rbp + adc r10, r15 + pop r15 + mov [reg_p2+24], r10 + adc r9, r11 + adc r8, rdi + mov [reg_p2], rdx // Final result c0-c5 + mov [reg_p2+32], r9 + mov [reg_p2+40], r8 + ret + + #else + + # error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + + #endif + + +//*********************************************************************** +// 377-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(mp_add377_asm) +fmt(mp_add377_asm): + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + + mov r8, [reg_p1+32] + mov r9, [reg_p1+40] + adc r8, [reg_p2+32] + adc r9, [reg_p2+40] + mov [reg_p3+32], r8 + mov [reg_p3+40], r9 + ret + + +//*************************************************************************** +// 2x377-bit multiprecision subtraction/addition +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + p377*p377*2^16 +//*************************************************************************** +.global fmt(mp_subadd377x2_asm) +fmt(mp_subadd377x2_asm): + push r12 + push r13 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov rcx, [rip+p377x16p] + add r8, rcx + adc r9, 0 + adc r10, 0 + mov rcx, [rip+p377x16p+24] + adc r11, rcx + mov rcx, [rip+p377x16p+32] + adc r12, rcx + mov rcx, [rip+p377x16p+40] + adc r13, rcx + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + + mov r8, [reg_p1+48] + mov r9, [reg_p1+56] + mov r10, [reg_p1+64] + mov r11, [reg_p1+72] + mov r12, [reg_p1+80] + mov r13, [reg_p1+88] + bt rax, 0 + adc r8, [rip+p377x16p+48] + adc r9, [rip+p377x16p+56] + adc r10, [rip+p377x16p+64] + adc r11, [rip+p377x16p+72] + adc r12, [rip+p377x16p+80] + adc r13, [rip+p377x16p+88] + bt rcx, 0 + sbb r8, [reg_p2+48] + sbb r9, [reg_p2+56] + sbb r10, [reg_p2+64] + sbb r11, [reg_p2+72] + sbb r12, [reg_p2+80] + sbb r13, [reg_p2+88] + + mov [reg_p3+48], r8 + mov [reg_p3+56], r9 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], r12 + mov [reg_p3+88], r13 + + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Double 2x377-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(mp_dblsub377x2_asm) +fmt(mp_dblsub377x2_asm): + push r12 + push r13 + + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + + mov r8, [reg_p3+48] + mov r9, [reg_p3+56] + mov r10, [reg_p3+64] + mov r11, [reg_p3+72] + mov r12, [reg_p3+80] + mov r13, [reg_p3+88] + bt rax, 0 + sbb r8, [reg_p1+48] + sbb r9, [reg_p1+56] + sbb r10, [reg_p1+64] + sbb r11, [reg_p1+72] + sbb r12, [reg_p1+80] + sbb r13, [reg_p1+88] + bt rcx, 0 + sbb r8, [reg_p2+48] + sbb r9, [reg_p2+56] + sbb r10, [reg_p2+64] + sbb r11, [reg_p2+72] + sbb r12, [reg_p2+80] + sbb r13, [reg_p2+88] + mov [reg_p3+48], r8 + mov [reg_p3+56], r9 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], r12 + mov [reg_p3+88], r13 + + pop r13 + pop r12 + ret \ No newline at end of file diff --git a/SIKE_sw/src/P377/P377.c b/SIKE_sw/src/P377/P377.c new file mode 100644 index 0000000..17a688e --- /dev/null +++ b/SIKE_sw/src/P377/P377.c @@ -0,0 +1,114 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny parameters and generation of functions for P377 +*********************************************************************************************/ + +#include "P377_api.h" +#include "P377_internal.h" +#include "../internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 377-bit field element is represented with Ceil(377 / 64) = 6 64-bit digits or Ceil(377 / 32) = 12 32-bit digits. + +// +// Curve isogeny system "SIDHp377". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p377^2), where A=6, B=1, C=1 and p377 = 2^191*3^117-1 +// + +const uint64_t p377[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x0B46D546BC2A5699, 0xA879CC6988CE7CF5, 0x015B702E0C542196 }; +const uint64_t p377x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x168DAA8D7854AD32, 0x50F398D3119CF9EA, 0x02B6E05C18A8432D }; +const uint64_t p377x4[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x2D1B551AF0A95A65, 0xA1E731A62339F3D4, 0x056DC0B83150865A }; +const uint64_t p377p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0B46D546BC2A5699, 0xA879CC6988CE7CF5, 0x015B702E0C542196 }; +const uint64_t p377p1x2[NWORDS64_FIELD/2] = { 0x168DAA8D7854AD33, 0x50F398D3119CF9EA, 0x02B6E05C18A8432D }; +const uint64_t p377x16p[2*NWORDS64_FIELD] = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x972557287AB52CD0, 0xF0C672CEE630615E, 0xD491FA3E757BCD2A, + 0x2830123FBA97E0A3, 0x44E67AC0C81C9117, 0x942C5A8EFDDE690C, 0x63BDE5C206F0021D, 0xAA49E8B73CCD899E, 0x001D7894DFDBF251 }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x8000000000000000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0x168DAA8D7854AD33, 0x50F398D3119CF9EA, 0x02B6E05C18A8432D }; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p377^2), expressed in Montgomery representation +const uint64_t A_gen[6*NWORDS64_FIELD] = { 0x8AE392AA8312F880, 0xDB7F6BA38CC56011, 0x896F67240AD52C67, 0x21B9C0BD6C0584FF, 0xF064B97DDD0B2BD4, 0x0102EA98B786D4CC, // XPA0 + 0x583DE90ED3D09845, 0x131B1BDFBBE25620, 0x054B16A62F3D59F1, 0x1C3A458EEFFD4A0B, 0x1FBC000608BE1F7A, 0x00225F4BEEF34209, // XPA1 + 0x8AA130E98FE00DE5, 0x6B54CC5A0A538778, 0x46D96D4F04F6605D, 0x069A3CAB971973AE, 0x8923D0F2112DA219, 0x0085C1C47AD21A2A, // XQA0 + 0x50981EA202812D84, 0x61883F048CF1682A, 0x2DBC9EC88567E391, 0xD5E238E99DD189E7, 0x1BFE095BC910EA7D, 0x00203E87957453EB, // XQA1 + 0x296CA63890082DB3, 0x02E16D4D70C2C55A, 0xD4B8FE9CB9481E99, 0xF95F9798C3BECDFB, 0x71B3A2D8A38CB84B, 0x0118DD7682525B04, // XRA0 + 0xF64DD26CEC6E9DF5, 0xBC02B5979FF4F94C, 0x5D8B16849129DE49, 0xE44435C64BEFB9E9, 0x1077D183B5A4727B, 0x0019A2DF755CF268 }; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p377^2), expressed in Montgomery representation +const uint64_t B_gen[6*NWORDS64_FIELD] = { 0x436424EE3C9446F8, 0xB013A914D96E976D, 0x30C376697D926658, 0xE99792AFAA115E68, 0x935421EF522A946B, 0x0032474AECB8799E, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0x5EDE445E538850BC, 0x5BA7DAD976595394, 0xF01F46B8519CD118, 0x9DFA5CB5B40775A1, 0xC7E535F99811B56B, 0x0025BF8D8B00A170, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0xA35AA9C8EA887C42, 0xE5A1BF165361C81A, 0x719BB1C6D6C727C7, 0x348590861EB46882, 0xB57273062A50C238, 0x002C53E0163A1C34, // XRB0 + 0xF12E87A9F00803D8, 0x49C966997253584C, 0x58BBD82219B363ED, 0x6232DFE1A85929F5, 0xC85434A71BF3CC30, 0x005DE7FAB257510D }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^384)^2 mod p377 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x826E131D3839C923, 0x54892C7B7D73E7F7, 0x3F8957D221B867A3, 0xD1217CD71D03BB94, 0xDCCBFB71E3AE5457, 0x00FCC56B6CD4B219 }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000000000BC, 0x0000000000000000, 0x0000000000000000, 0xB7FB600DD0E86746, 0x468DE27F885C3C0B, 0x00D99E2EF237555C }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +38, 26, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 11, 7, 4, 2, 1, 1, 2, +1, 1, 3, 2, 1, 1, 1, 1, 4, 3, 2, 1, 1, 1, 1, 2, 1, 1, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, +4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +54, 31, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 15, 8, 4, 2, 1, 1, +2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 23, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, +1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 9, 6, 4, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy377 +#define fpzero fpzero377 +#define fpadd fpadd377 +#define fpsub fpsub377 +#define fpneg fpneg377 +#define fpdiv2 fpdiv2_377 +#define fpcorrection fpcorrection377 +#define fpmul_mont fpmul377_mont +#define fpsqr_mont fpsqr377_mont +#define fpinv_mont fpinv377_mont +#define fpinv_chain_mont fpinv377_chain_mont +#define fp2copy fp2copy377 +#define fp2zero fp2zero377 +#define fp2add fp2add377 +#define fp2sub fp2sub377 +#define mp_sub_p2 mp_sub377_p2 +#define mp_sub_p4 mp_sub377_p4 +#define sub_p4 mp_sub_p4 +#define fp2neg fp2neg377 +#define fp2div2 fp2div2_377 +#define fp2correction fp2correction377 +#define fp2mul_mont fp2mul377_mont +#define fp2sqr_mont fp2sqr377_mont +#define fp2inv_mont fp2inv377_mont +#define fp2inv_mont_ct fp2inv377_mont_ct +#define fp2inv_mont_bingcd fp2inv377_mont_bingcd +#define fpequal_non_constant_time fpequal377_non_constant_time +#define mp_add_asm mp_add377_asm +#define mp_subaddx2_asm mp_subadd377x2_asm +#define mp_dblsubx2_asm mp_dblsub377x2_asm +#define crypto_kem_keypair crypto_kem_keypair_SIKEp377 +#define crypto_kem_enc crypto_kem_enc_SIKEp377 +#define crypto_kem_dec crypto_kem_dec_SIKEp377 +#define random_mod_order_A random_mod_order_A_SIDHp377 +#define random_mod_order_B random_mod_order_B_SIDHp377 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_SIDHp377 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_SIDHp377 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_SIDHp377 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_SIDHp377 + +#include "../fpx.c" +#include "../ec_isogeny.c" +#include "../sidh.c" +#include "../sike.c" \ No newline at end of file diff --git a/SIKE_sw/src/P377/P377_api.h b/SIKE_sw/src/P377/P377_api.h new file mode 100644 index 0000000..e564224 --- /dev/null +++ b/SIKE_sw/src/P377/P377_api.h @@ -0,0 +1,112 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: API header file for P377 +*********************************************************************************************/ + +#ifndef P377_API_H +#define P377_API_H + + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 328 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 288 +#define CRYPTO_BYTES 16 +#define CRYPTO_CIPHERTEXTBYTES 304 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Algorithm name +#define CRYPTO_ALGNAME "SIKEp377" + +// SIKE's key generation +// It produces a private key sk and computes the public key pk. +// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 322 bytes) +// public key pk (CRYPTO_PUBLICKEYBYTES = 288 bytes) +int crypto_kem_keypair_SIKEp377(unsigned char *pk, unsigned char *sk); + +// SIKE's encapsulation +// Input: public key pk (CRYPTO_PUBLICKEYBYTES = 288 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 10 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 298 bytes) +int crypto_kem_enc_SIKEp377(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +// SIKE's decapsulation +// Input: secret key sk (CRYPTO_SECRETKEYBYTES = 322 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 298 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 10 bytes) +int crypto_kem_dec_SIKEp377(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +// Encoding of keys for KEM-based isogeny system "SIKEp377" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p377) are encoded in 48 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p377^2), where a and b are defined over GF(p377), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 10-byte random value, a value in the range [0, 2^Floor(Log(2,3^117))-1] and the public key pk. In the SIKE API, +// private keys are encoded in 322 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p377^2). In the SIKE API, pk is encoded in 288 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 10-byte value. In the SIKE API, ct is encoded in 288 + 10 = 298 octets. +// Shared keys ss consist of a value of 10 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES_A 24 +#define SIDH_SECRETKEYBYTES_B 24 +#define SIDH_PUBLICKEYBYTES 288 +#define SIDH_BYTES 96 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2010, 2010. +// Extended version available at: http://eprint.iacr.org/2010/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^191 - 1] to be used as Alice's private key +void random_mod_order_A_SIDHp377(unsigned char* random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^117)) - 1] to be used as Bob's private key +void random_mod_order_B_SIDHp377(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^191 - 1], stored in 24 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p377^2) elements encoded in 288 bytes. +int EphemeralKeyGeneration_A_SIDHp377(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^117)) - 1], stored in 24 bytes. +// The public key consists of 3 GF(p377^2) elements encoded in 288 bytes. +int EphemeralKeyGeneration_B_SIDHp377(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^191 - 1], stored in 24 bytes. +// Bob's PublicKeyB consists of 3 GF(p377^2) elements encoded in 288 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p377^2) encoded in 96 bytes. +int EphemeralSecretAgreement_A_SIDHp377(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^117)) - 1], stored in 24 bytes. +// Alice's PublicKeyA consists of 3 GF(p377^2) elements encoded in 288 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p377^2) encoded in 96 bytes. +int EphemeralSecretAgreement_B_SIDHp377(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp377" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p377) are encoded in 48 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p377^2), where a and b are defined over GF(p377), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^191-1] and [0, 2^Floor(Log(2,3^117)) - 1], resp. In the SIDH API, +// Alice's and Bob's private keys are encoded in 24 octets in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p377^2). In the SIDH API, they are encoded in 288 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p377^2). In the SIDH API, they are encoded in 96 octets. + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/P377/P377_internal.h b/SIKE_sw/src/P377/P377_internal.h new file mode 100644 index 0000000..896dbfb --- /dev/null +++ b/SIKE_sw/src/P377/P377_internal.h @@ -0,0 +1,165 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for P377 +*********************************************************************************************/ + +#ifndef P377_INTERNAL_H +#define P377_INTERNAL_H + +#include "../config.h" + + +#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 6 // Number of words of a 377-bit field element + #define p377_ZERO_WORDS 2 // Number of "0" digits in the least significant part of p377 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 12 + #define p377_ZERO_WORDS 5 +#endif + + +// Basic constants + +#define NBITS_FIELD 377 +#define MAXBITS_FIELD 384 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 377-bit field element +#define NBITS_ORDER 192 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 192-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 191 +#define OBOB_BITS 186 +#define OBOB_EXPON 117 +#define MASK_ALICE 0x7F +#define MASK_BOB 0x01 +#define PRIME p377 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 7 +#define MAX_INT_POINTS_BOB 8 +#define MAX_Alice 95 +#define MAX_Bob 117 +#define MSG_BYTES 16 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 377-bit field elements (384-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x377-bit field elements (2x384-bit max.) +typedef felm_t f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p377^2) + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// 377-bit multiprecision addition, c = a+b +void mp_add377(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add377_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 377-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p +extern void mp_sub377_p2(const digit_t* a, const digit_t* b, digit_t* c); +extern void mp_sub377_p4(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub377_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub377_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 2x377-bit multiprecision subtraction followed by addition with p377*2^384, c = a-b+(p377*2^384) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_subadd377x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Double 2x377-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub377x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy377(const digit_t* a, digit_t* c); + +// Zeroing a field element, a = 0 +void fpzero377(digit_t* a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal377_non_constant_time(const digit_t* a, const digit_t* b); + +// Modular addition, c = a+b mod p377 +extern void fpadd377(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd377_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p377 +extern void fpsub377(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub377_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p377 +extern void fpneg377(digit_t* a); + +// Modular division by two, c = a/2 mod p377. +void fpdiv2_377(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1]. +void fpcorrection377(digit_t* a); + +// 377-bit Montgomery reduction, c = a mod p +void rdc377_asm(digit_t* ma, digit_t* mc); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p377, where R=2^768 +void fpmul377_mont(const digit_t* a, const digit_t* b, digit_t* c); +void mul377_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p377, where R=2^768 +void fpsqr377_mont(const digit_t* ma, digit_t* mc); + +// Field inversion, a = a^-1 in GF(p377) +void fpinv377_mont(digit_t* a); + +// Chain to compute (p377-3)/4 using Montgomery arithmetic +void fpinv377_chain_mont(digit_t* a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p377^2) element, c = a +void fp2copy377(const f2elm_t a, f2elm_t c); + +// Zeroing a GF(p377^2) element, a = 0 +void fp2zero377(f2elm_t a); + +// GF(p377^2) negation, a = -a in GF(p377^2) +void fp2neg377(f2elm_t a); + +// GF(p377^2) addition, c = a+b in GF(p377^2) +extern void fp2add377(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p377^2) subtraction, c = a-b in GF(p377^2) +extern void fp2sub377(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p377^2) division by two, c = a/2 in GF(p377^2) +void fp2div2_377(const f2elm_t a, f2elm_t c); + +// Modular correction, a = a in GF(p377^2) +void fp2correction377(f2elm_t a); + +// GF(p377^2) squaring using Montgomery arithmetic, c = a^2 in GF(p377^2) +void fp2sqr377_mont(const f2elm_t a, f2elm_t c); + +// GF(p377^2) multiplication using Montgomery arithmetic, c = a*b in GF(p377^2) +void fp2mul377_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p377^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv377_mont(f2elm_t a); + + +#endif diff --git a/SIKE_sw/src/P377/generic/fp_generic.c b/SIKE_sw/src/P377/generic/fp_generic.c new file mode 100644 index 0000000..57d1dc7 --- /dev/null +++ b/SIKE_sw/src/P377/generic/fp_generic.c @@ -0,0 +1,259 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: portable modular arithmetic for P377 +*********************************************************************************************/ + +#include "../P377_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p377[NWORDS64_FIELD]; +extern const uint64_t p377p1[NWORDS64_FIELD]; +extern const uint64_t p377x2[NWORDS64_FIELD]; +extern const uint64_t p377x4[NWORDS64_FIELD]; + + +__inline void mp_sub377_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p377x2)[i], borrow, c[i]); + } +} + + +__inline void mp_sub377_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p377x4)[i], borrow, c[i]); + } +} + + +__inline void fpadd377(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p377. + // Inputs: a, b in [0, 2*p377-1] + // Output: c in [0, 2*p377-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p377x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p377x2)[i] & mask, carry, c[i]); + } +} + + +__inline void fpsub377(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p377. + // Inputs: a, b in [0, 2*p377-1] + // Output: c in [0, 2*p377-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p377x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void fpneg377(digit_t* a) +{ // Modular negation, a = -a mod p377. + // Input/output: a in [0, 2*p377-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p377x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_377(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p377. + // Input : a in [0, 2*p377-1] + // Output: c in [0, 2*p377-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p377 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p377)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection377(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p377)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p377)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p377. + // mc = ma*R^-1 mod p377x2, where R = 2^384. + // If ma < 2^384*p377, the output mc is in the range [0, 2*p377-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p377_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p377_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p377p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p377p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} \ No newline at end of file diff --git a/SIKE_sw/src/P434/AMD64/fp_x64.c b/SIKE_sw/src/P434/AMD64/fp_x64.c new file mode 100644 index 0000000..8b18850 --- /dev/null +++ b/SIKE_sw/src/P434/AMD64/fp_x64.c @@ -0,0 +1,491 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: modular arithmetic optimized for x64 platforms for P434 +*********************************************************************************************/ + +#include "../P434_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p434[NWORDS_FIELD]; +extern const uint64_t p434p1[NWORDS_FIELD]; +extern const uint64_t p434x2[NWORDS_FIELD]; +extern const uint64_t p434x4[NWORDS_FIELD]; + + +__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p434x2)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub434_p2_asm(a, b, c); + +#endif +} + + +__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p434x4)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub434_p4_asm(a, b, c); + +#endif +} + + +__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p434. + // Inputs: a, b in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p434x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p434x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpadd434_asm(a, b, c); + +#endif +} + + +__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p434. + // Inputs: a, b in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p434x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpsub434_asm(a, b, c); + +#endif +} + + +__inline void fpneg434(digit_t* a) +{ // Modular negation, a = -a mod p434. + // Input/output: a in [0, 2*p434-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p434x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_434(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p434. + // Input : a in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p434 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p434)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection434(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p434)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p434)[i] & mask, borrow, a[i]); + } +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + (void)nwords; + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[6], b[6], uv, carry, uv); + c[12] = uv[0]; + c[13] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + mul434_asm(a, b, c); + +#endif +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Montgomery reduction exploiting special form of the prime. + // mc = ma*R^-1 mod p434x2, where R = 2^448. + // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. + // ma is assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + MUL128(mc[0], ((digit_t*)p434p1)[3], uv); + ADDC(0, uv[0], ma[3], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p434p1)[4], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p434p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[4], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p434p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p434p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p434p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p434p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p434p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p434p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p434p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p434p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p434p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p434p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p434p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p434p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p434p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p434p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p434p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p434p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p434p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p434p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p434p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p434p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p434p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p434p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p434p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p434p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t*)p434p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, mc[5]); + ADDC(carry, uv[1], ma[13], carry, mc[6]); + +#elif (OS_TARGET == OS_LINUX) + + rdc434_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/SIKE_sw/src/P434/AMD64/fp_x64_asm.S b/SIKE_sw/src/P434/AMD64/fp_x64_asm.S new file mode 100644 index 0000000..b04c0e5 --- /dev/null +++ b/SIKE_sw/src/P434/AMD64/fp_x64_asm.S @@ -0,0 +1,1024 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// Copyright (c) Microsoft Corporation +// +// Website: https://github.com/microsoft/PQCrypto-SIDH +// Released under MIT license +// +// Abstract: field arithmetic in x64 assembly for P434 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + +// Define addition instructions +#ifdef _MULX_ +#ifdef _ADX_ + +#define ADD1 adox +#define ADC1 adox +#define ADD2 adcx +#define ADC2 adcx + +#else + +#define ADD1 add +#define ADC1 adc +#define ADD2 add +#define ADC2 adc + +#endif +#endif + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(fpadd434_asm) +fmt(fpadd434_asm): + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + + mov rbx, [rip+fmt(p434x2)] + sub r8, rbx + mov rcx, [rip+fmt(p434x2)+8] + sbb r9, rcx + sbb r10, rcx + mov rdi, [rip+fmt(p434x2)+24] + sbb r11, rdi + mov rsi, [rip+fmt(p434x2)+32] + sbb r12, rsi + mov rbp, [rip+fmt(p434x2)+40] + sbb r13, rbp + mov r15, [rip+fmt(p434x2)+48] + sbb r14, r15 + sbb rax, 0 + + and rbx, rax + and rcx, rax + and rdi, rax + and rsi, rax + and rbp, rax + and r15, rax + + add r8, rbx + adc r9, rcx + adc r10, rcx + adc r11, rdi + adc r12, rsi + adc r13, rbp + adc r14, r15 + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(fpsub434_asm) +fmt(fpsub434_asm): + push r12 + push r13 + push r14 + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb rax, 0 + + mov rcx, [rip+fmt(p434x2)] + mov rdi, [rip+fmt(p434x2)+8] + mov rsi, [rip+fmt(p434x2)+24] + and rcx, rax + and rdi, rax + and rsi, rax + add r8, rcx + adc r9, rdi + adc r10, rdi + adc r11, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov r8, [rip+fmt(p434x2)+32] + mov rdi, [rip+fmt(p434x2)+40] + mov rsi, [rip+fmt(p434x2)+48] + and r8, rax + and rdi, rax + and rsi, rax + bt rcx, 0 + adc r12, r8 + adc r13, rdi + adc r14, rsi + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + + pop r14 + pop r13 + pop r12 + ret + + +///////////////////////////////////////////////////////////////// MACRO +.macro SUB434_PX P0 + push r12 + push r13 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov rcx, [reg_p1+48] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb rcx, [reg_p2+48] + + mov rax, [rip+\P0] + mov rdi, [rip+\P0+8] + mov rsi, [rip+\P0+24] + add r8, rax + mov rax, [rip+\P0+32] + adc r9, rdi + adc r10, rdi + adc r11, rsi + mov rdi, [rip+\P0+40] + mov rsi, [rip+\P0+48] + adc r12, rax + adc r13, rdi + adc rcx, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], rcx + + pop r13 + pop r12 + .endm + + +//*********************************************************************** +// Multiprecision subtraction with correction with 2*p434 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p434 +//*********************************************************************** +.global fmt(mp_sub434_p2_asm) +fmt(mp_sub434_p2_asm): + + SUB434_PX fmt(p434x2) + ret + + +//*********************************************************************** +// Multiprecision subtraction with correction with 4*p434 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p434 +//*********************************************************************** +.global fmt(mp_sub434_p4_asm) +fmt(mp_sub434_p4_asm): + + SUB434_PX fmt(p434x4) + ret + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C and regs T1, T3, rax +// Temps: regs T0:T6 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + xor rax, rax + adox \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adox \T1, \T3 + + mov rdx, 8\M0 + mulx \T3, \T4, \M1 // T3:T4 = A1*B0 + adox \T2, rax + xor rax, rax + mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 + adox \T4, \T0 + mov 8\C, \T4 // C1_final + adcx \T3, \T6 + mulx \T6, \T0, 16\M1 // T6:T0 = A1*B2 + adox \T3, \T1 + adcx \T5, \T0 + adcx \T6, rax + adox \T5, \T2 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + adox \T6, rax + xor rax, rax + mulx \T4, \T2, 8\M1 // T4:T2 = A2*B1 + adox \T0, \T3 + mov 16\C, \T0 // C2_final + adcx \T1, \T5 + mulx \T0, \T3, 16\M1 // T0:T3 = A2*B2 + adcx \T4, \T6 + adcx \T0, rax + adox \T1, \T2 + adox \T3, \T4 + adox rax, \T0 +.endm + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C +// Temps: regs T0:T9 +///////////////////////////////////////////////////////////////// + +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + xor rax, rax + adox \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adox \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adox \T2, \T4 + + mov rdx, 8\M0 + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + adox \T3, rax + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + adox \T4, \T0 + mov 8\C, \T4 // C1_final + adcx \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adcx \T6, \T8 + adox \T5, \T1 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adcx \T7, \T9 + adcx \T8, rax + adox \T6, \T2 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + adox \T7, \T3 + adox \T8, rax + xor rax, rax + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + adox \T0, \T5 + mov 16\C, \T0 // C2_final + adcx \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adcx \T2, \T4 + adox \T1, \T6 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adcx \T3, \T9 + mov rdx, 24\M0 + adcx \T4, rax + + adox \T2, \T7 + adox \T3, \T8 + adox \T4, rax + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + adcx \T5, \T7 + adox \T1, \T0 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adcx \T6, \T8 + adox \T2, \T5 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adcx \T7, \T9 + adcx \T8, rax + + adox \T3, \T6 + adox \T4, \T7 + adox \T8, rax + mov 24\C, \T1 // C3_final + mov 32\C, \T2 // C4_final + mov 40\C, \T3 // C5_final + mov 48\C, \T4 // C6_final + mov 56\C, \T8 // C7_final +.endm + +#else + +.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + add \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adc \T1, \T3 + + mov rdx, 8\M0 + mulx \T3, \T4, \M1 // T3:T4 = A1*B0 + adc \T2, 0 + mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 + add \T4, \T0 + mov 8\C, \T4 // C1_final + adc \T3, \T1 + adc \T5, \T2 + mulx \T2, \T1, 16\M1 // T2:T1 = A1*B2 + adc \T2, 0 + + add \T3, \T6 + adc \T5, \T1 + adc \T2, 0 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + add \T0, \T3 + mov 16\C, \T0 // C2_final + mulx \T4, \T6, 8\M1 // T4:T6 = A2*B1 + adc \T1, \T5 + adc \T2, \T4 + mulx rax, \T3, 16\M1 // rax:T3 = A2*B2 + adc rax, 0 + add \T1, \T6 + adc \T3, \T2 + adc rax, 0 +.endm + +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + add \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adc \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adc \T2, \T4 + mov rdx, 8\M0 + adc \T3, 0 + + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T4, \T0 + mov 8\C, \T4 // C1_final + adc \T5, \T1 + adc \T6, \T2 + adc \T7, \T3 + mov rdx, 16\M0 + adc \T8, 0 + + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + add \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adc \T2, \T4 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adc \T3, \T9 + mov rdx, 24\M0 + adc \T4, 0 + + add \T0, \T5 + mov 16\C, \T0 // C2_final + adc \T1, \T6 + adc \T2, \T7 + adc \T3, \T8 + adc \T4, 0 + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T1, \T0 + mov 24\C, \T1 // C3_final + adc \T2, \T5 + mov 32\C, \T2 // C4_final + adc \T3, \T6 + mov 40\C, \T3 // C5_final + adc \T4, \T7 + mov 48\C, \T4 // C6_final + adc \T8, 0 + mov 56\C, \T8 // C7_final +.endm +#endif + + +//***************************************************************************** +// 434-bit multiplication using Karatsuba (one level), schoolbook (one level) +//***************************************************************************** +.global fmt(mul434_asm) +fmt(mul434_asm): + push r12 + push r13 + push r14 + push r15 + mov rcx, reg_p3 + + // r8-r11 <- AH + AL, rax <- mask + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + push rbx + push rbp + sub rsp, 96 + add r8, [reg_p1+32] + adc r9, [reg_p1+40] + adc r10, [reg_p1+48] + adc r11, 0 + sbb rax, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + + // r12-r15 <- BH + BL, rbx <- mask + xor rbx, rbx + mov r12, [reg_p2] + mov r13, [reg_p2+8] + mov r14, [reg_p2+16] + mov r15, [reg_p2+24] + add r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, 0 + sbb rbx, 0 + mov [rsp+32], r12 + mov [rsp+40], r13 + mov [rsp+48], r14 + mov [rsp+56], r15 + + // r12-r15 <- masked (BH + BL) + and r12, rax + and r13, rax + and r14, rax + and r15, rax + + // r8-r11 <- masked (AH + AL) + and r8, rbx + and r9, rbx + and r10, rbx + and r11, rbx + + // r8-r11 <- masked (AH + AL) + masked (AH + AL) + add r8, r12 + adc r9, r13 + adc r10, r14 + adc r11, r15 + mov [rsp+64], r8 + mov [rsp+72], r9 + mov [rsp+80], r10 + mov [rsp+88], r11 + + // [rsp] <- (AH+AL) x (BH+BL), low part + MUL256_SCHOOL [rsp], [rsp+32], [rsp], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp + + // [rcx] <- AL x BL + MUL256_SCHOOL [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp // Result C0-C3 + + // [rcx+64], rbx, rbp, rax <- AH x BH + MUL192_SCHOOL [reg_p1+32], [reg_p2+32], [rcx+64], r8, rbx, r10, rbp, r12, r13, r14 + + // r8-r11 <- (AH+AL) x (BH+BL), final step + mov r8, [rsp+64] + mov r9, [rsp+72] + mov r10, [rsp+80] + mov r11, [rsp+88] + mov rdx, [rsp+32] + add r8, rdx + mov rdx, [rsp+40] + adc r9, rdx + mov rdx, [rsp+48] + adc r10, rdx + mov rdx, [rsp+56] + adc r11, rdx + + // r8-r15 <- (AH+AL) x (BH+BL) - ALxBL + mov r12, [rsp] + mov r13, [rsp+8] + mov r14, [rsp+16] + mov r15, [rsp+24] + sub r12, [rcx] + sbb r13, [rcx+8] + sbb r14, [rcx+16] + sbb r15, [rcx+24] + sbb r8, [rcx+32] + sbb r9, [rcx+40] + sbb r10, [rcx+48] + sbb r11, [rcx+56] + + // r8-r15 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub r12, [rcx+64] + sbb r13, [rcx+72] + sbb r14, [rcx+80] + sbb r15, rbx + sbb r8, rbp + sbb r9, rax + sbb r10, 0 + sbb r11, 0 + + add r12, [rcx+32] + mov [rcx+32], r12 // Result C4-C7 + adc r13, [rcx+40] + mov [rcx+40], r13 + adc r14, [rcx+48] + mov [rcx+48], r14 + adc r15, [rcx+56] + mov [rcx+56], r15 + adc r8, [rcx+64] + mov [rcx+64], r8 // Result C8-C15 + adc r9, [rcx+72] + mov [rcx+72], r9 + adc r10, [rcx+80] + mov [rcx+80], r10 + adc r11, rbx + mov [rcx+88], r11 + adc rbp, 0 + mov [rcx+96], rbp + adc rax, 0 + mov [rcx+104], rax + + add rsp, 96 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#else + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + +#endif + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: reg I0 and memory pointer M1 +// Outputs: regs T0:T4 +// Temps: regs T0:T5 +///////////////////////////////////////////////////////////////// +.macro MUL64x256_SCHOOL I0, M1, T0, T1, T2, T3, T4, T5 + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + ADD1 \T1, \T4 // T1 <- C1_final + ADC1 \T2, \T5 // T2 <- C2_final + mulx \T4, \T5, 24\M1 + ADC1 \T3, \T5 // T3 <- C3_final + ADC1 \T4, rax // T4 <- C4_final +.endm + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: regs I0 and I1, and memory pointer M1 +// Outputs: regs T0:T5 +// Temps: regs T0:T5 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5 + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + ADD1 \T1, \T4 + ADC1 \T2, \T5 + mulx \T4, \T5, 24\M1 + ADC1 \T3, \T5 + ADC1 \T4, rax + + xor rax, rax + mov rdx, \I1 + mulx \I1, \T5, \M1 + ADD2 \T1, \T5 // T1 <- C1_final + ADC2 \T2, \I1 + mulx \T5, \I1, 8\M1 + ADC2 \T3, \T5 + ADD1 \T2, \I1 + mulx \T5, \I1, 16\M1 + ADC2 \T4, \T5 + ADC1 \T3, \I1 + mulx \T5, \I1, 24\M1 + ADC2 \T5, rax + ADC1 \T4, \I1 + ADC1 \T5, rax +.endm + +#else + +.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5 + mulx \T2, \T4, 8\M1 + mulx \T3, \T5, 16\M1 + add \T1, \T4 + adc \T2, \T5 + mulx \T4, \T5, 24\M1 + adc \T3, \T5 + adc \T4, 0 + + mov rdx, \I1 + mulx \I1, \T5, \M1 + add \T1, \T5 // T1 <- C1_final + adc \T2, \I1 + mulx \T5, \I1, 8\M1 + adc \T3, \T5 + mulx \T5, rax, 16\M1 + adc \T4, \T5 + mulx \T5, rdx, 24\M1 + adc \T5, 0 + add \T2, \I1 + adc \T3, rax + adc \T4, rdx + adc \T5, 0 +.endm +#endif + + +//************************************************************************************** +// Montgomery reduction +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +//************************************************************************************** +.global fmt(rdc434_asm) +fmt(rdc434_asm): + push r14 + + // a[0-1] x p434p1_nz --> result: r8:r13 + mov rdx, [reg_p1] + mov r14, [reg_p1+8] + mulx r9, r8, [rip+fmt(p434p1)+24] // result r8 + push r12 + push r13 + push r15 + push rbp + push rbx + MUL128x256_SCHOOL rdx, r14, [rip+fmt(p434p1)+24], r8, r9, r10, r11, r12, r13 + + mov rdx, [reg_p1+16] + mov rcx, [reg_p1+72] + add r8, [reg_p1+24] + adc r9, [reg_p1+32] + adc r10, [reg_p1+40] + adc r11, [reg_p1+48] + adc r12, [reg_p1+56] + adc r13, [reg_p1+64] + adc rcx, 0 + mulx rbp, rbx, [rip+fmt(p434p1)+24] // result rbx + mov [reg_p2], r9 + mov [reg_p2+8], r10 + mov [reg_p2+16], r11 + mov [reg_p2+24], r12 + mov [reg_p2+32], r13 + mov r9, [reg_p1+80] + mov r10, [reg_p1+88] + mov r11, [reg_p1+96] + mov rdi, [reg_p1+104] + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc rdi, 0 + + // a[2-3] x p434p1_nz --> result: rbx, rbp, r12:r15 + MUL128x256_SCHOOL rdx, r8, [rip+fmt(p434p1)+24], rbx, rbp, r12, r13, r14, r15 + + mov rdx, [reg_p2] + add rbx, [reg_p2+8] + adc rbp, [reg_p2+16] + adc r12, [reg_p2+24] + adc r13, [reg_p2+32] + adc r14, rcx + mov rcx, 0 + adc r15, r9 + adc rcx, r10 + mulx r9, r8, [rip+fmt(p434p1)+24] // result r8 + mov [reg_p2], rbp + mov [reg_p2+8], r12 + mov [reg_p2+16], r13 + adc r11, 0 + adc rdi, 0 + + // a[4-5] x p434p1_nz --> result: r8:r13 + MUL128x256_SCHOOL rdx, rbx, [rip+fmt(p434p1)+24], r8, r9, r10, rbp, r12, r13 + + mov rdx, [reg_p2] + add r8, [reg_p2+8] + adc r9, [reg_p2+16] + adc r10, r14 + adc rbp, r15 + adc r12, rcx + adc r13, r11 + adc rdi, 0 + mulx r15, r14, [rip+fmt(p434p1)+24] // result r14 + mov [reg_p2], r8 // Final result c0-c1 + mov [reg_p2+8], r9 + + // a[6-7] x p434p1_nz --> result: r14:r15, r8:r9, r11 + MUL64x256_SCHOOL rdx, [rip+fmt(p434p1)+24], r14, r15, r8, r9, r11, rcx + + // Final result c2:c6 + add r14, r10 + adc r15, rbp + pop rbx + pop rbp + adc r8, r12 + adc r9, r13 + adc r11, rdi + mov [reg_p2+16], r14 + mov [reg_p2+24], r15 + pop r15 + pop r13 + mov [reg_p2+32], r8 + mov [reg_p2+40], r9 + mov [reg_p2+48], r11 + + pop r12 + pop r14 + ret + + #else + + # error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + + #endif + + +//*********************************************************************** +// 434-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(mp_add434_asm) +fmt(mp_add434_asm): + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + + mov r8, [reg_p1+32] + mov r9, [reg_p1+40] + mov r10, [reg_p1+48] + adc r8, [reg_p2+32] + adc r9, [reg_p2+40] + adc r10, [reg_p2+48] + mov [reg_p3+32], r8 + mov [reg_p3+40], r9 + mov [reg_p3+48], r10 + ret + + +//*************************************************************************** +// 2x434-bit multiprecision subtraction/addition +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]. If c < 0, add p434*2^448 +//*************************************************************************** +.global fmt(mp_subadd434x2_asm) +fmt(mp_subadd434x2_asm): + push r12 + push r13 + push r14 + push r15 + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov r12, [reg_p1+72] + sbb r8, [reg_p2+40] + sbb r9, [reg_p2+48] + sbb r10, [reg_p2+56] + sbb r11, [reg_p2+64] + sbb r12, [reg_p2+72] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + + mov r13, [reg_p1+80] + mov r14, [reg_p1+88] + mov r15, [reg_p1+96] + mov rcx, [reg_p1+104] + sbb r13, [reg_p2+80] + sbb r14, [reg_p2+88] + sbb r15, [reg_p2+96] + sbb rcx, [reg_p2+104] + sbb rax, 0 + + // Add p434 anded with the mask in rax + mov r8, [rip+fmt(p434)] + mov r9, [rip+fmt(p434)+24] + mov r10, [rip+fmt(p434)+32] + mov rdi, [rip+fmt(p434)+40] + mov rsi, [rip+fmt(p434)+48] + and r8, rax + and r9, rax + and r10, rax + and rdi, rax + and rsi, rax + mov rax, [reg_p3+56] + add rax, r8 + adc r11, r8 + adc r12, r8 + adc r13, r9 + adc r14, r10 + adc r15, rdi + adc rcx, rsi + + mov [reg_p3+56], rax + mov [reg_p3+64], r11 + mov [reg_p3+72], r12 + mov [reg_p3+80], r13 + mov [reg_p3+88], r14 + mov [reg_p3+96], r15 + mov [reg_p3+104], rcx + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Double 2x434-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(mp_dblsub434x2_asm) +fmt(mp_dblsub434x2_asm): + push r12 + push r13 + push r14 + + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + mov r14, [reg_p3+48] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + sbb r14, [reg_p1+48] + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + + mov r8, [reg_p3+56] + mov r9, [reg_p3+64] + mov r10, [reg_p3+72] + mov r11, [reg_p3+80] + mov r12, [reg_p3+88] + mov r13, [reg_p3+96] + mov r14, [reg_p3+104] + bt rax, 0 + sbb r8, [reg_p1+56] + sbb r9, [reg_p1+64] + sbb r10, [reg_p1+72] + sbb r11, [reg_p1+80] + sbb r12, [reg_p1+88] + sbb r13, [reg_p1+96] + sbb r14, [reg_p1+104] + bt rcx, 0 + sbb r8, [reg_p2+56] + sbb r9, [reg_p2+64] + sbb r10, [reg_p2+72] + sbb r11, [reg_p2+80] + sbb r12, [reg_p2+88] + sbb r13, [reg_p2+96] + sbb r14, [reg_p2+104] + mov [reg_p3+56], r8 + mov [reg_p3+64], r9 + mov [reg_p3+72], r10 + mov [reg_p3+80], r11 + mov [reg_p3+88], r12 + mov [reg_p3+96], r13 + mov [reg_p3+104], r14 + + pop r14 + pop r13 + pop r12 + ret \ No newline at end of file diff --git a/SIKE_sw/src/P434/P434.c b/SIKE_sw/src/P434/P434.c new file mode 100644 index 0000000..761713d --- /dev/null +++ b/SIKE_sw/src/P434/P434.c @@ -0,0 +1,133 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny parameters and generation of functions for P434 +*********************************************************************************************/ + +#include "P434_api.h" +#include "P434_internal.h" +#include "../internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 434-bit field element is represented with Ceil(434 / 64) = 7 64-bit digits or Ceil(434 / 32) = 14 32-bit digits. + +// +// Curve isogeny system "SIDHp434". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p434^2), where A=6, B=1, C=1 and p434 = 2^216*3^137-1 +// + +const uint64_t p434[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF, + 0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344 }; +const uint64_t p434p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000, + 0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344 }; +const uint64_t p434x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF, + 0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688 }; +const uint64_t p434x4[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xF705D9EB8BFFFFFF, + 0xEF1971E0C562BA8F, 0xB3F17F5A07148159, 0x0008D07C9C5DCD11 }; +const uint64_t p434x16p[2*NWORDS64_FIELD] = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x47D130A3A0000000, + 0x873470F9D4EA2B80, 0x6074052FC75BF530, 0x54497C1B1D119772, 0xC55F373D2CDCA412, + 0x732CA2221C664B96, 0x6445AB96AF6359A5, 0x221708AB42ABE1B4, 0xAE3D3D0063244F01, + 0x18B920F2ECF68816, 0x0000004DB194809D }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000001000000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0x58AEA3FDC1767AE3, 0xC520567BC65C7831, 0x1773446CFC5FD681, 0x0000000002341F27 }; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p434^2), expressed in Montgomery representation +const uint64_t A_gen[6*NWORDS64_FIELD] = { 0x05ADF455C5C345BF, 0x91935C5CC767AC2B, 0xAFE4E879951F0257, 0x70E792DC89FA27B1, + 0xF797F526BB48C8CD, 0x2181DB6131AF621F, 0x00000A1C08B1ECC4, // XPA0 + 0x74840EB87CDA7788, 0x2971AA0ECF9F9D0B, 0xCB5732BDF41715D5, 0x8CD8E51F7AACFFAA, + 0xA7F424730D7E419F, 0xD671EB919A179E8C, 0x0000FFA26C5A924A, // XPA1 + 0xFEC6E64588B7273B, 0xD2A626D74CBBF1C6, 0xF8F58F07A78098C7, 0xE23941F470841B03, + 0x1B63EDA2045538DD, 0x735CFEB0FFD49215, 0x0001C4CB77542876, // XQA0 + 0xADB0F733C17FFDD6, 0x6AFFBD037DA0A050, 0x680EC43DB144E02F, 0x1E2E5D5FF524E374, + 0xE2DDA115260E2995, 0xA6E4B552E2EDE508, 0x00018ECCDDF4B53E, // XQA1 + 0x01BA4DB518CD6C7D, 0x2CB0251FE3CC0611, 0x259B0C6949A9121B, 0x60E17AC16D2F82AD, + 0x3AA41F1CE175D92D, 0x413FBE6A9B9BC4F3, 0x00022A81D8D55643, // XRA0 + 0xB8ADBC70FC82E54A, 0xEF9CDDB0D5FADDED, 0x5820C734C80096A0, 0x7799994BAA96E0E4, + 0x044961599E379AF8, 0xDB2B94FBF09F27E2, 0x0000B87FC716C0C6 }; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p434^2), expressed in Montgomery representation +const uint64_t B_gen[6*NWORDS64_FIELD] = { 0x6E5497556EDD48A3, 0x2A61B501546F1C05, 0xEB919446D049887D, 0x5864A4A69D450C4F, + 0xB883F276A6490D2B, 0x22CC287022D5F5B9, 0x0001BED4772E551F, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0xFAE2A3F93D8B6B8E, 0x494871F51700FE1C, 0xEF1A94228413C27C, 0x498FF4A4AF60BD62, + 0xB00AD2A708267E8A, 0xF4328294E017837F, 0x000034080181D8AE, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0x283B34FAFEFDC8E4, 0x9208F44977C3E647, 0x7DEAE962816F4E9A, 0x68A2BA8AA262EC9D, + 0x8176F112EA43F45B, 0x02106D022634F504, 0x00007E8A50F02E37, // XRB0 + 0xB378B7C1DA22CCB1, 0x6D089C99AD1D9230, 0xEBE15711813E2369, 0x2B35A68239D48A53, + 0x445F6FD138407C93, 0xBEF93B29A3F6B54B, 0x000173FA910377D3 }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^448)^2 mod p434 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x28E55B65DCD69B30, 0xACEC7367768798C2, 0xAB27973F8311688D, 0x175CC6AF8D6C7C0B, + 0xABCD92BF2DDE347E, 0x69E16A61C7686D9A, 0x000025A89BCDD12A }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x000000000000742C, 0x0000000000000000, 0x0000000000000000, 0xB90FF404FC000000, + 0xD801A4FB559FACD4, 0xE93254545F77410C, 0x0000ECEEA7BD2EDA }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +48, 28, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, +1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, +1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +66, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, +2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 32, 16, 8, 4, 3, 1, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, +1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy434 +#define fpzero fpzero434 +#define fpadd fpadd434 +#define fpsub fpsub434 +#define fpneg fpneg434 +#define fpdiv2 fpdiv2_434 +#define fpcorrection fpcorrection434 +#define fpmul_mont fpmul434_mont +#define fpsqr_mont fpsqr434_mont +#define fpinv_mont fpinv434_mont +#define fpinv_chain_mont fpinv434_chain_mont +#define fp2copy fp2copy434 +#define fp2zero fp2zero434 +#define fp2add fp2add434 +#define fp2sub fp2sub434 +#define mp_sub_p2 mp_sub434_p2 +#define mp_sub_p4 mp_sub434_p4 +#define sub_p4 mp_sub_p4 +#define fp2neg fp2neg434 +#define fp2div2 fp2div2_434 +#define fp2correction fp2correction434 +#define fp2mul_mont fp2mul434_mont +#define fp2sqr_mont fp2sqr434_mont +#define fp2inv_mont fp2inv434_mont +#define fp2inv_mont_ct fp2inv434_mont_ct +#define fp2inv_mont_bingcd fp2inv434_mont_bingcd +#define fpequal_non_constant_time fpequal434_non_constant_time +#define mp_add_asm mp_add434_asm +#define mp_subaddx2_asm mp_subadd434x2_asm +#define mp_dblsubx2_asm mp_dblsub434x2_asm +#define crypto_kem_keypair crypto_kem_keypair_SIKEp434 +#define crypto_kem_enc crypto_kem_enc_SIKEp434 +#define crypto_kem_dec crypto_kem_dec_SIKEp434 +#define random_mod_order_A random_mod_order_A_SIDHp434 +#define random_mod_order_B random_mod_order_B_SIDHp434 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_SIDHp434 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_SIDHp434 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_SIDHp434 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_SIDHp434 + +#include "../fpx.c" +#include "../ec_isogeny.c" +#include "../sidh.c" +#include "../sike.c" \ No newline at end of file diff --git a/SIKE_sw/src/P434/P434_api.h b/SIKE_sw/src/P434/P434_api.h new file mode 100644 index 0000000..ba78408 --- /dev/null +++ b/SIKE_sw/src/P434/P434_api.h @@ -0,0 +1,112 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: API header file for P434 +*********************************************************************************************/ + +#ifndef P434_API_H +#define P434_API_H + + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 374 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 330 +#define CRYPTO_BYTES 16 +#define CRYPTO_CIPHERTEXTBYTES 346 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Algorithm name +#define CRYPTO_ALGNAME "SIKEp434" + +// SIKE's key generation +// It produces a private key sk and computes the public key pk. +// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 374 bytes) +// public key pk (CRYPTO_PUBLICKEYBYTES = 330 bytes) +int crypto_kem_keypair_SIKEp434(unsigned char *pk, unsigned char *sk); + +// SIKE's encapsulation +// Input: public key pk (CRYPTO_PUBLICKEYBYTES = 330 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 16 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 346 bytes) +int crypto_kem_enc_SIKEp434(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +// SIKE's decapsulation +// Input: secret key sk (CRYPTO_SECRETKEYBYTES = 374 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 346 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 16 bytes) +int crypto_kem_dec_SIKEp434(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +// Encoding of keys for KEM-based isogeny system "SIKEp434" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p434) are encoded in 55 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 16-byte random value, a value in the range [0, 2^Floor(Log(2,3^137))-1] and the public key pk. In the SIKE API, +// private keys are encoded in 374 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p434^2). In the SIKE API, pk is encoded in 330 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 16-byte value. In the SIKE API, ct is encoded in 330 + 16 = 346 octets. +// Shared keys ss consist of a value of 16 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES_A 27 +#define SIDH_SECRETKEYBYTES_B 28 +#define SIDH_PUBLICKEYBYTES 330 +#define SIDH_BYTES 110 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^216 - 1] to be used as Alice's private key +void random_mod_order_A_SIDHp434(unsigned char* random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^137)) - 1] to be used as Bob's private key +void random_mod_order_B_SIDHp434(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^216 - 1], stored in 27 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p434^2) elements encoded in 330 bytes. +int EphemeralKeyGeneration_A_SIDHp434(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. +// The public key consists of 3 GF(p434^2) elements encoded in 330 bytes. +int EphemeralKeyGeneration_B_SIDHp434(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^216 - 1], stored in 27 bytes. +// Bob's PublicKeyB consists of 3 GF(p434^2) elements encoded in 330 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p434^2) encoded in 110 bytes. +int EphemeralSecretAgreement_A_SIDHp434(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. +// Alice's PublicKeyA consists of 3 GF(p434^2) elements encoded in 330 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p434^2) encoded in 110 bytes. +int EphemeralSecretAgreement_B_SIDHp434(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp434" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p434) are encoded in 55 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^216-1] and [0, 2^Floor(Log(2,3^137)) - 1], resp. In the SIDH API, +// Alice's and Bob's private keys are encoded in 27 and 28 octets, resp., in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p434^2). In the SIDH API, they are encoded in 330 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p434^2). In the SIDH API, they are encoded in 110 octets. + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/P434/P434_internal.h b/SIKE_sw/src/P434/P434_internal.h new file mode 100644 index 0000000..357a487 --- /dev/null +++ b/SIKE_sw/src/P434/P434_internal.h @@ -0,0 +1,175 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for P434 +*********************************************************************************************/ + +#ifndef P434_INTERNAL_H +#define P434_INTERNAL_H + +#include "../config.h" + + +#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 7 // Number of words of a 434-bit field element + #define p434_ZERO_WORDS 3 // Number of "0" digits in the least significant part of p434 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 14 + #define p434_ZERO_WORDS 6 +#endif + + +// Basic constants + +#define NBITS_FIELD 434 +#define MAXBITS_FIELD 448 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 434-bit field element +#define NBITS_ORDER 256 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 224-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 216 +#define OBOB_BITS 218 +#define OBOB_EXPON 137 +#define MASK_ALICE 0xFF +#define MASK_BOB 0x01 +#define PRIME p434 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 7 +#define MAX_INT_POINTS_BOB 8 +#define MAX_Alice 108 +#define MAX_Bob 137 +#define MSG_BYTES 16 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 434-bit field elements (448-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x434-bit field elements (2x448-bit max.) +typedef felm_t f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p434^2) + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + +#ifdef COMPRESS + typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj; // Point representation in full projective XYZ Montgomery coordinates + typedef point_full_proj point_full_proj_t[1]; + + typedef struct { f2elm_t x; f2elm_t y; } point_affine; // Point representation in affine coordinates. + typedef point_affine point_t[1]; + + typedef f2elm_t publickey_t[3]; +#endif + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// 434-bit multiprecision addition, c = a+b +void mp_add434(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add434_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 434-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p +extern void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c); +extern void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub434_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub434_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_subadd434x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Double 2x434-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub434x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy434(const digit_t* a, digit_t* c); + +// Zeroing a field element, a = 0 +void fpzero434(digit_t* a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal434_non_constant_time(const digit_t* a, const digit_t* b); + +// Modular addition, c = a+b mod p434 +extern void fpadd434(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd434_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p434 +extern void fpsub434(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub434_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p434 +extern void fpneg434(digit_t* a); + +// Modular division by two, c = a/2 mod p434. +void fpdiv2_434(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1]. +void fpcorrection434(digit_t* a); + +// 434-bit Montgomery reduction, c = a mod p +void rdc434_asm(digit_t* ma, digit_t* mc); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768 +void fpmul434_mont(const digit_t* a, const digit_t* b, digit_t* c); +void mul434_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768 +void fpsqr434_mont(const digit_t* ma, digit_t* mc); + +// Field inversion, a = a^-1 in GF(p434) +void fpinv434_mont(digit_t* a); + +// Chain to compute (p434-3)/4 using Montgomery arithmetic +void fpinv434_chain_mont(digit_t* a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p434^2) element, c = a +void fp2copy434(const f2elm_t a, f2elm_t c); + +// Zeroing a GF(p434^2) element, a = 0 +void fp2zero434(f2elm_t a); + +// GF(p434^2) negation, a = -a in GF(p434^2) +void fp2neg434(f2elm_t a); + +// GF(p434^2) addition, c = a+b in GF(p434^2) +extern void fp2add434(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p434^2) subtraction, c = a-b in GF(p434^2) +extern void fp2sub434(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p434^2) division by two, c = a/2 in GF(p434^2) +void fp2div2_434(const f2elm_t a, f2elm_t c); + +// Modular correction, a = a in GF(p434^2) +void fp2correction434(f2elm_t a); + +// GF(p434^2) squaring using Montgomery arithmetic, c = a^2 in GF(p434^2) +void fp2sqr434_mont(const f2elm_t a, f2elm_t c); + +// GF(p434^2) multiplication using Montgomery arithmetic, c = a*b in GF(p434^2) +void fp2mul434_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p434^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv434_mont(f2elm_t a); + + +#endif diff --git a/SIKE_sw/src/P434/generic/fp_generic.c b/SIKE_sw/src/P434/generic/fp_generic.c new file mode 100644 index 0000000..5d585c5 --- /dev/null +++ b/SIKE_sw/src/P434/generic/fp_generic.c @@ -0,0 +1,259 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: portable modular arithmetic for P434 +*********************************************************************************************/ + +#include "../P434_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p434[NWORDS64_FIELD]; +extern const uint64_t p434p1[NWORDS64_FIELD]; +extern const uint64_t p434x2[NWORDS64_FIELD]; +extern const uint64_t p434x4[NWORDS64_FIELD]; + + +__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p434x2)[i], borrow, c[i]); + } +} + + +__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p434x4)[i], borrow, c[i]); + } +} + + +__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p434. + // Inputs: a, b in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p434x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p434x2)[i] & mask, carry, c[i]); + } +} + + +__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p434. + // Inputs: a, b in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p434x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void fpneg434(digit_t* a) +{ // Modular negation, a = -a mod p434. + // Input/output: a in [0, 2*p434-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p434x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_434(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p434. + // Input : a in [0, 2*p434-1] + // Output: c in [0, 2*p434-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p434 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p434)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection434(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p434)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p434)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434. + // mc = ma*R^-1 mod p434x2, where R = 2^448. + // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p434_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p434_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p434p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p434p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} \ No newline at end of file diff --git a/SIKE_sw/src/P503/AMD64/fp_x64.c b/SIKE_sw/src/P503/AMD64/fp_x64.c new file mode 100644 index 0000000..ead9e6c --- /dev/null +++ b/SIKE_sw/src/P503/AMD64/fp_x64.c @@ -0,0 +1,572 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: modular arithmetic optimized for x64 platforms for P503 +*********************************************************************************************/ + +#include "../P503_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p503[NWORDS_FIELD]; +extern const uint64_t p503p1[NWORDS_FIELD]; +extern const uint64_t p503x2[NWORDS_FIELD]; +extern const uint64_t p503x4[NWORDS_FIELD]; + + +__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p503x2)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub503_p2_asm(a, b, c); + +#endif +} + + +__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p503x4)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub503_p4_asm(a, b, c); + +#endif +} + + +__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p503x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p503x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpadd503_asm(a, b, c); + +#endif +} + + +__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p503x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpsub503_asm(a, b, c); + +#endif +} + + +__inline void fpneg503(digit_t* a) +{ // Modular negation, a = -a mod p503. + // Input/output: a in [0, 2*p503-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p503x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_503(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p503. + // Input : a in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p503 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p503)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection503(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p503)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p503)[i] & mask, borrow, a[i]); + } +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + (void)nwords; + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[0], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[7], b[1], uv, carry, uv); + t += carry; + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + MULADD128(a[1], b[7], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[7], b[2], uv, carry, uv); + t += carry; + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + MULADD128(a[2], b[7], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[7], b[3], uv, carry, uv); + t += carry; + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + MULADD128(a[3], b[7], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[7], b[4], uv, carry, uv); + t += carry; + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + MULADD128(a[4], b[7], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[7], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[7], uv, carry, uv); + t += carry; + c[12] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[7], b[6], uv, carry, uv); + t += carry; + MULADD128(a[6], b[7], uv, carry, uv); + t += carry; + c[13] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[7], b[7], uv, carry, uv); + c[14] = uv[0]; + c[15] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + mul503_asm(a, b, c); + +#endif +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Montgomery reduction exploiting special form of the prime. + // mc = ma*R^-1 mod p503x2, where R = 2^512. + // If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1]. + // ma is assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + MUL128(mc[0], ((digit_t*)p503p1)[3], uv); + ADDC(0, uv[0], ma[3], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p503p1)[4], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p503p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[4], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p503p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p503p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p503p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p503p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p503p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p503p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p503p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p503p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p503p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p503p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p503p1)[4], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p503p1)[3], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p503p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p503p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p503p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[13], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[7], ((digit_t*)p503p1)[7], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[14], carry, mc[6]); + ADDC(carry, uv[1], 0, carry, uv[1]); + ADDC(0, uv[1], ma[15], carry, mc[7]); + +#elif (OS_TARGET == OS_LINUX) + + rdc503_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/SIKE_sw/src/P503/AMD64/fp_x64_asm.S b/SIKE_sw/src/P503/AMD64/fp_x64_asm.S new file mode 100644 index 0000000..82ccd72 --- /dev/null +++ b/SIKE_sw/src/P503/AMD64/fp_x64_asm.S @@ -0,0 +1,1824 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// Copyright (c) Microsoft Corporation +// +// Website: https://github.com/microsoft/PQCrypto-SIDH +// Released under MIT license +// +// Abstract: field arithmetic in x64 assembly for P503 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + +// Define addition instructions +#ifdef _MULX_ +#ifdef _ADX_ + +#define ADD1 adox +#define ADC1 adox +#define ADD2 adcx +#define ADC2 adcx + +#else + +#define ADD1 add +#define ADC1 adc +#define ADD2 add +#define ADC2 adc + +#endif +#endif + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(fpadd503_asm) +fmt(fpadd503_asm): + push r12 + push r13 + push r14 + push r15 + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + + mov rcx, [rip+fmt(p503x2)] + sub r8, rcx + mov rcx, [rip+fmt(p503x2)+8] + sbb r9, rcx + sbb r10, rcx + mov rcx, [rip+fmt(p503x2)+24] + sbb r11, rcx + mov rcx, [rip+fmt(p503x2)+32] + sbb r12, rcx + mov rcx, [rip+fmt(p503x2)+40] + sbb r13, rcx + mov rcx, [rip+fmt(p503x2)+48] + sbb r14, rcx + mov rcx, [rip+fmt(p503x2)+56] + sbb r15, rcx + sbb rax, 0 + + mov rdi, [rip+fmt(p503x2)] + and rdi, rax + mov rsi, [rip+fmt(p503x2)+8] + and rsi, rax + mov rcx, [rip+fmt(p503x2)+24] + and rcx, rax + + add r8, rdi + adc r9, rsi + adc r10, rsi + adc r11, rcx + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov r8, [rip+fmt(p503x2)+32] + and r8, rax + mov r9, [rip+fmt(p503x2)+40] + and r9, rax + mov r10, [rip+fmt(p503x2)+48] + and r10, rax + mov r11, [rip+fmt(p503x2)+56] + and r11, rax + + bt rcx, 0 + adc r12, r8 + adc r13, r9 + adc r14, r10 + adc r15, r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(fpsub503_asm) +fmt(fpsub503_asm): + push r12 + push r13 + push r14 + push r15 + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rax, 0 + + mov rdi, [rip+fmt(p503x2)] + and rdi, rax + mov rsi, [rip+fmt(p503x2)+8] + and rsi, rax + mov rcx, [rip+fmt(p503x2)+24] + and rcx, rax + + add r8, rdi + adc r9, rsi + adc r10, rsi + adc r11, rcx + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov r8, [rip+fmt(p503x2)+32] + and r8, rax + mov r9, [rip+fmt(p503x2)+40] + and r9, rax + mov r10, [rip+fmt(p503x2)+48] + and r10, rax + mov r11, [rip+fmt(p503x2)+56] + and r11, rax + + bt rcx, 0 + adc r12, r8 + adc r13, r9 + adc r14, r10 + adc r15, r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +///////////////////////////////////////////////////////////////// MACRO +.macro SUB503_PX P0 + push r12 + push r13 + push r14 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov rcx, [reg_p1+56] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb rcx, [reg_p2+56] + + mov rax, [rip+\P0] + mov rdi, [rip+\P0+8] + mov rsi, [rip+\P0+24] + add r8, rax + mov rax, [rip+\P0+32] + adc r9, rdi + adc r10, rdi + adc r11, rsi + adc r12, rax + mov rdi, [rip+\P0+40] + mov rsi, [rip+\P0+48] + mov rax, [rip+\P0+56] + adc r13, rdi + adc r14, rsi + adc rcx, rax + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], rcx + + pop r14 + pop r13 + pop r12 + .endm + + +//*********************************************************************** +// Multiprecision subtraction with correction with 2*p503 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p503 +//*********************************************************************** +.global fmt(mp_sub503_p2_asm) +fmt(mp_sub503_p2_asm): + + SUB503_PX fmt(p503x2) + ret + + +//*********************************************************************** +// Multiprecision subtraction with correction with 4*p503 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p503 +//*********************************************************************** +.global fmt(mp_sub503_p4_asm) +fmt(mp_sub503_p4_asm): + + SUB503_PX fmt(p503x4) + ret + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C and regs T1, T2, T3, T4, T8 +// Temps: regs T0:T9 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + xor rax, rax + adox \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adox \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adox \T2, \T4 + + mov rdx, 8\M0 + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + adox \T3, rax + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + adox \T4, \T0 + mov 8\C, \T4 // C1_final + adcx \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adcx \T6, \T8 + adox \T5, \T1 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adcx \T7, \T9 + adcx \T8, rax + adox \T6, \T2 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + adox \T7, \T3 + adox \T8, rax + xor rax, rax + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + adox \T0, \T5 + mov 16\C, \T0 // C2_final + adcx \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adcx \T2, \T4 + adox \T1, \T6 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adcx \T3, \T9 + mov rdx, 24\M0 + adcx \T4, rax + + adox \T2, \T7 + adox \T3, \T8 + adox \T4, rax + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + adcx \T5, \T7 + adox \T1, \T0 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adcx \T6, \T8 + adox \T2, \T5 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adcx \T7, \T9 + adcx \T8, rax + + adox \T3, \T6 + adox \T4, \T7 + adox \T8, rax +.endm + +#else + +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + add \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adc \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adc \T2, \T4 + mov rdx, 8\M0 + adc \T3, 0 + + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T4, \T0 + mov 8\C, \T4 // C1_final + adc \T5, \T1 + adc \T6, \T2 + adc \T7, \T3 + mov rdx, 16\M0 + adc \T8, 0 + + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + add \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adc \T2, \T4 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adc \T3, \T9 + mov rdx, 24\M0 + adc \T4, 0 + + add \T0, \T5 + mov 16\C, \T0 // C2_final + adc \T1, \T6 + adc \T2, \T7 + adc \T3, \T8 + adc \T4, 0 + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T1, \T0 + adc \T2, \T5 + adc \T3, \T6 + adc \T4, \T7 + adc \T8, 0 +.endm +#endif + + +//***************************************************************************** +// 503-bit multiplication using Karatsuba (one level), schoolbook (one level) +//***************************************************************************** +.global fmt(mul503_asm) +fmt(mul503_asm): + push r12 + push r13 + push r14 + push r15 + mov rcx, reg_p3 + + // r8-r11 <- AH + AL, rax <- mask + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + push rbx + push rbp + sub rsp, 96 + add r8, [reg_p1+32] + adc r9, [reg_p1+40] + adc r10, [reg_p1+48] + adc r11, [reg_p1+56] + sbb rax, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + + // r12-r15 <- BH + BL, rbx <- mask + xor rbx, rbx + mov r12, [reg_p2] + mov r13, [reg_p2+8] + mov r14, [reg_p2+16] + mov r15, [reg_p2+24] + add r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + sbb rbx, 0 + mov [rsp+32], r12 + mov [rsp+40], r13 + mov [rsp+48], r14 + mov [rsp+56], r15 + + // r12-r15 <- masked (BH + BL) + and r12, rax + and r13, rax + and r14, rax + and r15, rax + + // r8-r11 <- masked (AH + AL) + and r8, rbx + and r9, rbx + and r10, rbx + and r11, rbx + + // r8-r11 <- masked (AH + AL) + masked (AH + AL) + add r8, r12 + adc r9, r13 + adc r10, r14 + adc r11, r15 + mov [rsp+64], r8 + mov [rsp+72], r9 + mov [rsp+80], r10 + mov [rsp+88], r11 + + // [rcx+64], r9-r12, rbx <- (AH+AL) x (BH+BL), low part + MUL256_SCHOOL [rsp], [rsp+32], [rcx+64], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp + mov [rcx+88], r9 + mov [rcx+96], r10 + mov [rcx+104], r11 + mov [rcx+112], r12 + mov [rcx+120], rbx + + // [rcx], r9-r12, rbx <- AL x BL + MUL256_SCHOOL [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp // Result C0-C3 + mov [rcx+24], r9 + mov [rcx+32], r10 + mov [rcx+40], r11 + mov [rcx+48], r12 + mov [rcx+56], rbx + + // [rsp], rbx, rbp, r13-r15 <- AH x BH + MUL256_SCHOOL [reg_p1+32], [reg_p2+32], [rsp], r8, rbx, rbp, r13, r14, r9, r10, r11, r15, r12 + + // r8-r11 <- (AH+AL) x (BH+BL), final step + mov r8, [rsp+64] + mov r9, [rsp+72] + mov r10, [rsp+80] + mov r11, [rsp+88] + mov rax, [rcx+96] + add r8, rax + mov rax, [rcx+104] + adc r9, rax + mov rax, [rcx+112] + adc r10, rax + mov rax, [rcx+120] + adc r11, rax + + // r8-r12, rdi, rsi, rdx <- (AH+AL) x (BH+BL) - ALxBL + mov r12, [rcx+64] + mov rdi, [rcx+72] + mov rsi, [rcx+80] + mov rdx, [rcx+88] + sub r12, [rcx] + sbb rdi, [rcx+8] + sbb rsi, [rcx+16] + sbb rdx, [rcx+24] + sbb r8, [rcx+32] + sbb r9, [rcx+40] + sbb r10, [rcx+48] + sbb r11, [rcx+56] + + // r8-r12, rdi, rsi, rdx <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub r12, [rsp] + sbb rdi, [rsp+8] + sbb rsi, [rsp+16] + sbb rdx, rbx + sbb r8, rbp + sbb r9, r13 + sbb r10, r14 + sbb r11, r15 + + add r12, [rcx+32] + mov [rcx+32], r12 // Result C4-C7 + adc rdi, [rcx+40] + mov [rcx+40], rdi + adc rsi, [rcx+48] + mov [rcx+48], rsi + adc rdx, [rcx+56] + mov [rcx+56], rdx + mov rax, [rsp] + adc r8, rax + mov [rcx+64], r8 // Result C8-C15 + mov rax, [rsp+8] + adc r9, rax + mov [rcx+72], r9 + mov rax, [rsp+16] + adc r10, rax + mov [rcx+80], r10 + adc r11, rbx + mov [rcx+88], r11 + adc rbp, 0 + mov [rcx+96], rbp + adc r13, 0 + mov [rcx+104], r13 + adc r14, 0 + mov [rcx+112], r14 + adc r15, 0 + mov [rcx+120], r15 + + add rsp, 96 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#else + +//*********************************************************************** +// Integer multiplication +// Based on Karatsuba method +// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] +// NOTE: a=c or b=c are not allowed +//*********************************************************************** +.global fmt(mul503_asm) +fmt(mul503_asm): + push r12 + push r13 + push r14 + mov rcx, reg_p3 + + // rcx[0-3] <- AH+AL + xor rax, rax + mov r8, [reg_p1+32] + mov r9, [reg_p1+40] + mov r10, [reg_p1+48] + mov r11, [reg_p1+56] + add r8, [reg_p1] + adc r9, [reg_p1+8] + adc r10, [reg_p1+16] + adc r11, [reg_p1+24] + push r15 + mov [rcx], r8 + mov [rcx+8], r9 + mov [rcx+16], r10 + mov [rcx+24], r11 + sbb rax, 0 + sub rsp, 80 // Allocating space in stack + + // r12-r15 <- BH+BL + xor rdx, rdx + mov r12, [reg_p2+32] + mov r13, [reg_p2+40] + mov r14, [reg_p2+48] + mov r15, [reg_p2+56] + add r12, [reg_p2] + adc r13, [reg_p2+8] + adc r14, [reg_p2+16] + adc r15, [reg_p2+24] + sbb rdx, 0 + mov [rsp+64], rax + mov [rsp+72], rdx + + // (rsp[0-3],r8,r9,r10,r11) <- (AH+AL)*(BH+BL) + mov rax, [rcx] + mul r12 + mov [rsp], rax // c0 + mov r8, rdx + + xor r9, r9 + mov rax, [rcx] + mul r13 + add r8, rax + adc r9, rdx + + xor r10, r10 + mov rax, [rcx+8] + mul r12 + add r8, rax + mov [rsp+8], r8 // c1 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [rcx] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+16] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+8] + mul r13 + add r9, rax + mov [rsp+16], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+24] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+8] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+16] + mul r13 + add r10, rax + mov [rsp+24], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [rcx+8] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+24] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+16] + mul r14 + add r8, rax + mov [rsp+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r11, r11 + mov rax, [rcx+16] + mul r15 + add r9, rax + adc r10, rdx + adc r11, 0 + + mov rax, [rcx+24] + mul r14 + add r9, rax // c5 + adc r10, rdx + adc r11, 0 + + mov rax, [rcx+24] + mul r15 + add r10, rax // c6 + adc r11, rdx // c7 + + mov rax, [rsp+64] + and r12, rax + and r13, rax + and r14, rax + and r15, rax + add r12, r8 + adc r13, r9 + adc r14, r10 + adc r15, r11 + + mov rax, [rsp+72] + mov r8, [rcx] + mov r9, [rcx+8] + mov r10, [rcx+16] + mov r11, [rcx+24] + and r8, rax + and r9, rax + and r10, rax + and r11, rax + add r8, r12 + adc r9, r13 + adc r10, r14 + adc r11, r15 + mov [rsp+32], r8 + mov [rsp+40], r9 + mov [rsp+48], r10 + mov [rsp+56], r11 + + // rcx[0-7] <- AL*BL + mov r11, [reg_p1] + mov rax, [reg_p2] + mul r11 + xor r9, r9 + mov [rcx], rax // c0 + mov r8, rdx + + mov r14, [reg_p1+16] + mov rax, [reg_p2+8] + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+8] + mov rax, [reg_p2] + mul r12 + add r8, rax + mov [rcx+8], r8 // c1 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+16] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2] + mov rax, r14 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+8] + mul r12 + add r9, rax + mov [rcx+16], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+24] + mul r11 + mov r15, [reg_p1+24] + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, r15 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+16] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+8] + mul r14 + add r10, rax + mov [rcx+24], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+24] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+8] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+16] + mul r14 + add r8, rax + mov [rcx+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+24] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+16] + mul r15 + add r9, rax + mov [rcx+40], r9 // c5 + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+24] + mul r15 + add r10, rax + mov [rcx+48], r10 // c6 + adc r8, rdx + mov [rcx+56], r8 // c7 + + // rcx[8-15] <- AH*BH + mov r11, [reg_p1+32] + mov rax, [reg_p2+32] + mul r11 + xor r9, r9 + mov [rcx+64], rax // c0 + mov r8, rdx + + mov r14, [reg_p1+48] + mov rax, [reg_p2+40] + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+40] + mov rax, [reg_p2+32] + mul r12 + add r8, rax + mov [rcx+72], r8 // c1 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+48] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2+32] + mov rax, r14 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+40] + mul r12 + add r9, rax + mov [rcx+80], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+56] + mul r11 + mov r15, [reg_p1+56] + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, r15 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+48] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+40] + mul r14 + add r10, rax + mov [rcx+88], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+56] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+40] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+48] + mul r14 + add r8, rax + mov [rcx+96], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+56] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+48] + mul r15 + add r9, rax + mov [rcx+104], r9 // c5 + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+56] + mul r15 + add r10, rax + mov [rcx+112], r10 // c6 + adc r8, rdx + mov [rcx+120], r8 // c7 + + // [r8-r15] <- (AH+AL)*(BH+BL) - AL*BL + mov r8, [rsp] + sub r8, [rcx] + mov r9, [rsp+8] + sbb r9, [rcx+8] + mov r10, [rsp+16] + sbb r10, [rcx+16] + mov r11, [rsp+24] + sbb r11, [rcx+24] + mov r12, [rsp+32] + sbb r12, [rcx+32] + mov r13, [rsp+40] + sbb r13, [rcx+40] + mov r14, [rsp+48] + sbb r14, [rcx+48] + mov r15, [rsp+56] + sbb r15, [rcx+56] + + // [r8-r15] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH + mov rax, [rcx+64] + sub r8, rax + mov rax, [rcx+72] + sbb r9, rax + mov rax, [rcx+80] + sbb r10, rax + mov rax, [rcx+88] + sbb r11, rax + mov rax, [rcx+96] + sbb r12, rax + mov rdx, [rcx+104] + sbb r13, rdx + mov rdi, [rcx+112] + sbb r14, rdi + mov rsi, [rcx+120] + sbb r15, rsi + + // Final result + add r8, [rcx+32] + mov [rcx+32], r8 + adc r9, [rcx+40] + mov [rcx+40], r9 + adc r10, [rcx+48] + mov [rcx+48], r10 + adc r11, [rcx+56] + mov [rcx+56], r11 + adc r12, [rcx+64] + mov [rcx+64], r12 + adc r13, [rcx+72] + mov [rcx+72], r13 + adc r14, [rcx+80] + mov [rcx+80], r14 + adc r15, [rcx+88] + mov [rcx+88], r15 + adc rax, 0 + mov [rcx+96], rax + adc rdx, 0 + mov [rcx+104], rdx + adc rdi, 0 + mov [rcx+112], rdi + adc rsi, 0 + mov [rcx+120], rsi + + add rsp, 80 // Restoring space in stack + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#endif + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: regs I0 and I1, and memory pointer M1 +// Outputs: regs T0:T5 +// Temps: regs T0:T5 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5 + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + ADD1 \T1, \T4 + ADC1 \T2, \T5 + mulx \T4, \T5, 24\M1 + ADC1 \T3, \T5 + ADC1 \T4, rax + + xor rax, rax + mov rdx, \I1 + mulx \I1, \T5, \M1 + ADD2 \T1, \T5 // T1 <- C1_final + ADC2 \T2, \I1 + mulx \T5, \I1, 8\M1 + ADC2 \T3, \T5 + ADD1 \T2, \I1 + mulx \T5, \I1, 16\M1 + ADC2 \T4, \T5 + ADC1 \T3, \I1 + mulx \T5, \I1, 24\M1 + ADC2 \T5, rax + ADC1 \T4, \I1 + ADC1 \T5, rax +.endm + +#else + +.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5 + mulx \T2, \T4, 8\M1 + mulx \T3, \T5, 16\M1 + add \T1, \T4 + adc \T2, \T5 + mulx \T4, \T5, 24\M1 + adc \T3, \T5 + adc \T4, 0 + + mov rdx, \I1 + mulx \I1, \T5, \M1 + add \T1, \T5 // T1 <- C1_final + adc \T2, \I1 + mulx \T5, \I1, 8\M1 + adc \T3, \T5 + mulx \T5, rax, 16\M1 + adc \T4, \T5 + mulx \T5, rdx, 24\M1 + adc \T5, 0 + add \T2, \I1 + adc \T3, rax + adc \T4, rdx + adc \T5, 0 +.endm +#endif + + +//************************************************************************************** +// Montgomery reduction +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +//************************************************************************************** +.global fmt(rdc503_asm) +fmt(rdc503_asm): + + // a[0-1] x 64xp503p1_nz --> result: r8:r13 + mov rdx, [reg_p1] + mov rcx, [reg_p1+8] + mulx r9, r8, [rip+fmt(p503p1x64)] // result r8 + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + MUL128x256_SCHOOL rdx, rcx, [rip+fmt(p503p1x64)], r8, r9, r10, r11, r12, r13 + + xor r15, r15 + shrd r15, r8, 6 + shrd r8, r9, 6 + shrd r9, r10, 6 + shrd r10, r11, 6 + shrd r11, r12, 6 + shrd r12, r13, 6 + shr r13, 6 + mov rdx, [reg_p1+16] + mov r14, [reg_p1+80] + add r15, [reg_p1+24] + adc r8, [reg_p1+32] + adc r9, [reg_p1+40] + adc r10, [reg_p1+48] + adc r11, [reg_p1+56] + adc r12, [reg_p1+64] + adc r13, [reg_p1+72] + mulx rbx, rcx, [rip+fmt(p503p1x64)] // result rcx + adc r14, 0 + mov [reg_p2], r8 + mov [reg_p2+8], r9 + mov [reg_p2+16], r10 + mov [reg_p2+24], r11 + mov [reg_p2+32], r12 + mov [reg_p2+40], r13 + mov [reg_p2+48], r14 + mov r9, [reg_p1+88] + mov r10, [reg_p1+96] + mov r11, [reg_p1+104] + mov r12, [reg_p1+112] + mov rdi, [reg_p1+120] + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc r12, 0 + adc rdi, 0 + + // a[2-3] x 64xp503p1_nz --> result: rcx, rbx, rbp, r14, r8, r13 + MUL128x256_SCHOOL rdx, r15, [rip+fmt(p503p1x64)], rcx, rbx, rbp, r14, r8, r13 + + xor r15, r15 + shrd r15, rcx, 6 + shrd rcx, rbx, 6 + shrd rbx, rbp, 6 + shrd rbp, r14, 6 + shrd r14, r8, 6 + shrd r8, r13, 6 + shr r13, 6 + mov rdx, [reg_p2] + add r15, [reg_p2+8] + adc rcx, [reg_p2+16] + adc rbx, [reg_p2+24] + adc rbp, [reg_p2+32] + adc r14, [reg_p2+40] + adc r8, [reg_p2+48] + mov [reg_p2+16], rcx + mov [reg_p2+24], rbx + mov [reg_p2+32], rbp + mov [reg_p2+40], r14 + mov [reg_p2+48], r8 + mulx rbp, rbx, [rip+fmt(p503p1x64)] // result rbx + adc r9, r13 + adc r10, 0 + adc r11, 0 + adc r12, 0 + adc rdi, 0 + + // a[4-5] x 64xp503p1_nz --> result: rbx, rbp, r14, r8, r13, rcx + MUL128x256_SCHOOL rdx, r15, [rip+fmt(p503p1x64)], rbx, rbp, r14, r8, r13, rcx + + xor r15, r15 + shrd r15, rbx, 6 + shrd rbx, rbp, 6 + shrd rbp, r14, 6 + shrd r14, r8, 6 + shrd r8, r13, 6 + shrd r13, rcx, 6 + shr rcx, 6 + mov rdx, [reg_p2+16] + add r15, [reg_p2+24] + adc rbx, [reg_p2+32] + adc rbp, [reg_p2+40] + adc r14, [reg_p2+48] + mov [reg_p2], rbx // Final result c0 + mov [reg_p2+8], rbp + mov [reg_p2+16], r14 + adc r9, r8 + adc r10, r13 + mulx r14, rbp, [rip+fmt(p503p1x64)] // result rbp + adc r11, rcx + adc r12, 0 + adc rdi, 0 + + // a[6-7] x 64xp503p1_nz --> result: rbp, r14, r8, r13, rcx, rbx + MUL128x256_SCHOOL rdx, r15, [rip+fmt(p503p1x64)], rbp, r14, r8, r13, rcx, rbx + + xor r15, r15 + shrd r15, rbp, 6 + shrd rbp, r14, 6 + shrd r14, r8, 6 + shrd r8, r13, 6 + shrd r13, rcx, 6 + shrd rcx, rbx, 6 + shr rbx, 6 + add r15, [reg_p2+8] + adc rbp, [reg_p2+16] + mov [reg_p2+8], r15 // Final result c1-c7 + mov [reg_p2+16], rbp + adc r9, r14 + adc r10, r8 + adc r11, r13 + adc r12, rcx + adc rdi, rbx + mov [reg_p2+24], r9 + mov [reg_p2+32], r10 + mov [reg_p2+40], r11 + mov [reg_p2+48], r12 + mov [reg_p2+56], rdi + + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + ret + + #else + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//*********************************************************************** +.global fmt(rdc503_asm) +fmt(rdc503_asm): + push r12 + push r13 + push r14 + push r15 + + mov r11, [reg_p1] + mov rax, [rip+fmt(p503p1)+24] + mul r11 + xor r8, r8 + add rax, [reg_p1+24] + mov [reg_p2+24], rax // z3 + adc r8, rdx + + xor r9, r9 + mov rax, [rip+fmt(p503p1)+32] + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+8] + mov rax, [rip+fmt(p503p1)+24] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+32] + mov [reg_p2+32], r8 // z4 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p503p1)+40] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p503p1)+32] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p1+16] + mov rax, [rip+fmt(p503p1)+24] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+40] + mov [reg_p2+40], r9 // z5 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p503p1)+48] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p503p1)+40] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p503p1)+32] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p2+24] + mov rax, [rip+fmt(p503p1)+24] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+48] + mov [reg_p2+48], r10 // z6 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p503p1)+56] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+48] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+40] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+32] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p2+32] + mov rax, [rip+fmt(p503p1)+24] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+56] + mov [reg_p2+56], r8 // z7 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p503p1)+56] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p503p1)+48] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p503p1)+40] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p503p1)+32] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rcx, [reg_p2+40] + mov rax, [rip+fmt(p503p1)+24] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+64] + mov [reg_p2], r9 // z0 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p503p1)+56] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p503p1)+48] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p503p1)+40] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p503p1)+32] + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r13, [reg_p2+48] + mov rax, [rip+fmt(p503p1)+24] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+72] + mov [reg_p2+8], r10 // z1 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p503p1)+56] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+48] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+40] + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+32] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r14, [reg_p2+56] + mov rax, [rip+fmt(p503p1)+24] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+80] + mov [reg_p2+16], r8 // z2 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p503p1)+56] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p503p1)+48] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p503p1)+40] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p503p1)+32] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+88] + mov [reg_p2+24], r9 // z3 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p503p1)+56] + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p503p1)+48] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p503p1)+40] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+96] + mov [reg_p2+32], r10 // z4 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p503p1)+56] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+48] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+104] // z5 + mov [reg_p2+40], r8 // z5 + adc r9, 0 + adc r10, 0 + + mov rax, [rip+fmt(p503p1)+56] + mul r14 + add r9, rax + adc r10, rdx + add r9, [reg_p1+112] // z6 + mov [reg_p2+48], r9 // z6 + adc r10, 0 + add r10, [reg_p1+120] // z7 + mov [reg_p2+56], r10 // z7 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + #endif + + +//*********************************************************************** +// 503-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(mp_add503_asm) +fmt(mp_add503_asm): + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + + mov r8, [reg_p1+32] + mov r9, [reg_p1+40] + mov r10, [reg_p1+48] + mov r11, [reg_p1+56] + adc r8, [reg_p2+32] + adc r9, [reg_p2+40] + adc r10, [reg_p2+48] + adc r11, [reg_p2+56] + mov [reg_p3+32], r8 + mov [reg_p3+40], r9 + mov [reg_p3+48], r10 + mov [reg_p3+56], r11 + ret + + +//*********************************************************************** +// 2x503-bit multiprecision subtraction/addition +// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512 +//*********************************************************************** +.global fmt(mp_subadd503x2_asm) +fmt(mp_subadd503x2_asm): + push r12 + push r13 + push r14 + push r15 + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + + mov r8, [reg_p1+32] + mov r9, [reg_p1+40] + mov r10, [reg_p1+48] + mov r11, [reg_p1+56] + sbb r8, [reg_p2+32] + sbb r9, [reg_p2+40] + sbb r10, [reg_p2+48] + sbb r11, [reg_p2+56] + mov [reg_p3+32], r8 + mov [reg_p3+40], r9 + mov [reg_p3+48], r10 + mov [reg_p3+56], r11 + + mov r8, [reg_p1+64] + mov r9, [reg_p1+72] + mov r10, [reg_p1+80] + mov r11, [reg_p1+88] + sbb r8, [reg_p2+64] + sbb r9, [reg_p2+72] + sbb r10, [reg_p2+80] + sbb r11, [reg_p2+88] + mov [reg_p3+64], r8 + mov [reg_p3+72], r9 + mov [reg_p3+80], r10 + mov [reg_p3+88], r11 + + mov r12, [reg_p1+96] + mov r13, [reg_p1+104] + mov r14, [reg_p1+112] + mov r15, [reg_p1+120] + sbb r12, [reg_p2+96] + sbb r13, [reg_p2+104] + sbb r14, [reg_p2+112] + sbb r15, [reg_p2+120] + sbb rax, 0 + + // Add p503 anded with the mask in rax + mov r8, [rip+fmt(p503)] + mov r9, [rip+fmt(p503)+24] + mov r10, [rip+fmt(p503)+32] + mov r11, [rip+fmt(p503)+40] + mov rdi, [rip+fmt(p503)+48] + mov rsi, [rip+fmt(p503)+56] + and r8, rax + and r9, rax + and r10, rax + and r11, rax + and rdi, rax + and rsi, rax + mov rax, [reg_p3+64] + add rax, r8 + mov [reg_p3+64], rax + mov rax, [reg_p3+72] + adc rax, r8 + mov [reg_p3+72], rax + mov rax, [reg_p3+80] + adc rax, r8 + mov [reg_p3+80], rax + mov rax, [reg_p3+88] + adc rax, r9 + mov [reg_p3+88], rax + adc r12, r10 + adc r13, r11 + adc r14, rdi + adc r15, rsi + + mov [reg_p3+96], r12 + mov [reg_p3+104], r13 + mov [reg_p3+112], r14 + mov [reg_p3+120], r15 + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Double 2x503-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(mp_dblsub503x2_asm) +fmt(mp_dblsub503x2_asm): + push r12 + push r13 + + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + + mov r8, [reg_p3+48] + mov r9, [reg_p3+56] + mov r10, [reg_p3+64] + mov r11, [reg_p3+72] + mov r12, [reg_p3+80] + mov r13, [reg_p3+88] + bt rax, 0 + sbb r8, [reg_p1+48] + sbb r9, [reg_p1+56] + sbb r10, [reg_p1+64] + sbb r11, [reg_p1+72] + sbb r12, [reg_p1+80] + sbb r13, [reg_p1+88] + setc al + bt rcx, 0 + sbb r8, [reg_p2+48] + sbb r9, [reg_p2+56] + sbb r10, [reg_p2+64] + sbb r11, [reg_p2+72] + sbb r12, [reg_p2+80] + sbb r13, [reg_p2+88] + setc cl + mov [reg_p3+48], r8 + mov [reg_p3+56], r9 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], r12 + mov [reg_p3+88], r13 + + mov r8, [reg_p3+96] + mov r9, [reg_p3+104] + mov r10, [reg_p3+112] + mov r11, [reg_p3+120] + bt rax, 0 + sbb r8, [reg_p1+96] + sbb r9, [reg_p1+104] + sbb r10, [reg_p1+112] + sbb r11, [reg_p1+120] + bt rcx, 0 + sbb r8, [reg_p2+96] + sbb r9, [reg_p2+104] + sbb r10, [reg_p2+112] + sbb r11, [reg_p2+120] + mov [reg_p3+96], r8 + mov [reg_p3+104], r9 + mov [reg_p3+112], r10 + mov [reg_p3+120], r11 + + pop r13 + pop r12 + ret \ No newline at end of file diff --git a/SIKE_sw/src/P503/P503.c b/SIKE_sw/src/P503/P503.c new file mode 100644 index 0000000..dd30b4b --- /dev/null +++ b/SIKE_sw/src/P503/P503.c @@ -0,0 +1,138 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny parameters and generation of functions for P503 +*********************************************************************************************/ + +#include "P503_api.h" +#include "P503_internal.h" +#include "../internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 503-bit field element is represented with Ceil(503 / 64) = 8 64-bit digits or Ceil(503 / 32) = 16 32-bit digits. + +// +// Curve isogeny system "SIDHp503". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p503^2), where A=6, B=1, C=1 and p503 = 2^250*3^159-1 +// + +const uint64_t p503[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF, + 0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E }; +const uint64_t p503p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000, + 0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E }; +const uint64_t p503x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF, + 0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C }; +const uint64_t p503x4[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xAFFFFFFFFFFFFFFF, + 0x4C216F6888479E82, 0x6E6FDB21EDF9F6BC, 0x81171AF769DE9340, 0x01019BD506047879 }; +const uint64_t p503p1x64[NWORDS64_FIELD/2] = { 0xC216F6888479E82B, 0xE6FDB21EDF9F6BC4, 0x1171AF769DE93406, 0x1019BD5060478798 }; +const uint64_t p503x16p[2*NWORDS64_FIELD] = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, + 0x9EF484BBBDC30BEA, 0x8C8126F090304A1D, 0xF7472844B10B65FC, 0x30F32157CFDC3C33, + 0x1463AB4329A333F7, 0xDFC933977C47D3A4, 0x338A3767F6F2520B, 0x4F8CB7565CCC13FA, + 0xDE43B73AACD2189B, 0xBCF845CAC5405FBD, 0x516D02A09E684B7A, 0x0001033A4091BB86 }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0400000000000000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0xC216F6888479E82B, 0xE6FDB21EDF9F6BC4, 0x1171AF769DE93406, 0x1019BD5060478798 }; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} in GF(p503^2), expressed in Montgomery representation +const uint64_t A_gen[6*NWORDS64_FIELD] = { 0x5D083011589AD893, 0xADFD8D2CB67D0637, 0x330C9AC34FFB6361, 0xF0D47489A2E805A2, + 0x27E2789259C6B8DC, 0x63866A2C121931B9, 0x8D4C65A7137DCF44, 0x003A183AE5967B3F, // XPA0 + 0x7E3541B8C96D1519, 0xD3ADAEEC0D61A26C, 0xC0A2219CE7703DD9, 0xFF3E46658FCDBC52, + 0xD5B38DEAE6E196FF, 0x1AAC826364956D58, 0xEC9F4875B9A5F27A, 0x001B0B475AB99843, // XPA1 + 0x4D83695107D03BAD, 0x221F3299005E2FCF, 0x78E6AE22F30DECF2, 0x6D982DB5111253E4, + 0x504C80A8AB4526A8, 0xEFD0C3AA210BB024, 0xCB77483501DC6FCF, 0x001052544A96BDF3, // XQA0 + 0x0D74FE3402BCAE47, 0xDF5B8CDA832D8AED, 0xB86BCF06E4BD837E, 0x892A2933A0FA1F63, + 0x9F88FC67B6CCB461, 0x822926EA9DDA3AC8, 0xEAC8DDE5855425ED, 0x000618FE6DA37A80, // XQA1 + 0x1D9D32D2DC877C17, 0x5517CD8F71D5B02B, 0x395AFB8F6B60C117, 0x3AE31AC85F9098C8, + 0x5F5341C198450848, 0xF8C609DBEA435C6A, 0xD832BC7EDC7BA5E4, 0x002AD98AA6968BF5, // XRA0 + 0xC466CAB0F73C2E5B, 0x7B1817148FB2CF9C, 0x873E87C099E470A0, 0xBB17AC6D17A7BAC1, + 0xA146FDCD0F2E2A58, 0x88B311E9CEAB6201, 0x37604CF5C7951757, 0x0006804071C74BF9 }; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p503^2), expressed in Montgomery representation +const uint64_t B_gen[6*NWORDS64_FIELD] = { 0xDF630FC5FB2468DB, 0xC30C5541C102040E, 0x3CDC9987B76511FC, 0xF54B5A09353D0CDD, + 0x3ADBA8E00703C42F, 0x8253F9303DDC95D0, 0x62D30778763ABFD7, 0x001CD00FB581CD55, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0x2E3457A12B429261, 0x311F94E89627DCF8, 0x5B71C98FD1DB73F6, 0x3671DB7DCFC21541, + 0xB6D1484C9FE0CF4F, 0x19CD110717356E35, 0xF4F9FB00AC9919DF, 0x0035BC124D38A70B, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0x2E08BB99413D2952, 0xD3021467CD088D72, 0x21017AF859752245, 0x26314ED8FFD9DE5C, + 0x4AF43C73344B6686, 0xCFA1F91149DF0993, 0xF327A95365587A89, 0x000DBF54E03D3906, // XRB0 + 0x03E03FF342F5F304, 0x993D604D7B4B6E56, 0x80412F4D9280E71F, 0x0FFDC9EF990B3982, + 0xE584E64C51604931, 0x1374F42AC8B0BBD7, 0x07D5BC37DFA41A5F, 0x00396CCFD61FD34C }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^512)^2 mod p503 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x5289A0CF641D011F, 0x9B88257189FED2B9, 0xA3B365D58DC8F17A, 0x5BC57AB6EFF168EC, + 0x9E51998BD84D4423, 0xBF8999CBAC3B5695, 0x46E9127BCE14CDB6, 0x003F6CFCE8B81771 }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000000003F9, 0x0000000000000000, 0x0000000000000000, 0xB400000000000000, + 0x63CB1A6EA6DED2B4, 0x51689D8D667EB37D, 0x8ACD77C71AB24142, 0x0026FBAEC60F5953 }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +61, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, +4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, +1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 29, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, +1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, +1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +71, 38, 21, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, +1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9, +5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, +1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, +2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, +1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy503 +#define fpzero fpzero503 +#define fpadd fpadd503 +#define fpsub fpsub503 +#define fpneg fpneg503 +#define fpdiv2 fpdiv2_503 +#define fpcorrection fpcorrection503 +#define fpmul_mont fpmul503_mont +#define fpsqr_mont fpsqr503_mont +#define fpinv_mont fpinv503_mont +#define fpinv_chain_mont fpinv503_chain_mont +#define fp2copy fp2copy503 +#define fp2zero fp2zero503 +#define fp2add fp2add503 +#define fp2sub fp2sub503 +#define mp_sub_p2 mp_sub503_p2 +#define mp_sub_p4 mp_sub503_p4 +#define sub_p4 mp_sub_p4 +#define fp2neg fp2neg503 +#define fp2div2 fp2div2_503 +#define fp2correction fp2correction503 +#define fp2mul_mont fp2mul503_mont +#define fp2sqr_mont fp2sqr503_mont +#define fp2inv_mont fp2inv503_mont +#define fp2inv_mont_ct fp2inv503_mont_ct +#define fp2inv_mont_bingcd fp2inv503_mont_bingcd +#define mp_add_asm mp_add503_asm +#define mp_subaddx2_asm mp_subadd503x2_asm +#define mp_dblsubx2_asm mp_dblsub503x2_asm +#define crypto_kem_keypair crypto_kem_keypair_SIKEp503 +#define crypto_kem_enc crypto_kem_enc_SIKEp503 +#define crypto_kem_dec crypto_kem_dec_SIKEp503 +#define random_mod_order_A random_mod_order_A_SIDHp503 +#define random_mod_order_B random_mod_order_B_SIDHp503 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_SIDHp503 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_SIDHp503 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_SIDHp503 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_SIDHp503 + +#include "../fpx.c" +#include "../ec_isogeny.c" +#include "../sidh.c" +#include "../sike.c" \ No newline at end of file diff --git a/SIKE_sw/src/P503/P503_api.h b/SIKE_sw/src/P503/P503_api.h new file mode 100644 index 0000000..a4bc296 --- /dev/null +++ b/SIKE_sw/src/P503/P503_api.h @@ -0,0 +1,112 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: API header file for P503 +*********************************************************************************************/ + +#ifndef P503_API_H +#define P503_API_H + + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 434 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 378 +#define CRYPTO_BYTES 24 +#define CRYPTO_CIPHERTEXTBYTES 402 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Algorithm name +#define CRYPTO_ALGNAME "SIKEp503" + +// SIKE's key generation +// It produces a private key sk and computes the public key pk. +// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 434 bytes) +// public key pk (CRYPTO_PUBLICKEYBYTES = 378 bytes) +int crypto_kem_keypair_SIKEp503(unsigned char *pk, unsigned char *sk); + +// SIKE's encapsulation +// Input: public key pk (CRYPTO_PUBLICKEYBYTES = 378 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 24 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 402 bytes) +int crypto_kem_enc_SIKEp503(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +// SIKE's decapsulation +// Input: secret key sk (CRYPTO_SECRETKEYBYTES = 434 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 402 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 24 bytes) +int crypto_kem_dec_SIKEp503(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +// Encoding of keys for KEM-based isogeny system "SIKEp503" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^Floor(Log(2,3^159))-1] and the public key pk. In the SIKE API, +// private keys are encoded in 434 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p503^2). In the SIKE API, pk is encoded in 378 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 378 + 24 = 402 octets. +// Shared keys ss consist of a value of 24 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES_A 32 +#define SIDH_SECRETKEYBYTES_B 32 +#define SIDH_PUBLICKEYBYTES 378 +#define SIDH_BYTES 126 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^250 - 1] to be used as Alice's private key +void random_mod_order_A_SIDHp503(unsigned char* random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^159)) - 1] to be used as Bob's private key +void random_mod_order_B_SIDHp503(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^250 - 1], stored in 32 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p503^2) elements encoded in 378 bytes. +int EphemeralKeyGeneration_A_SIDHp503(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes. +int EphemeralKeyGeneration_B_SIDHp503(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. +// Bob's PublicKeyB consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p503^2) encoded in 126 bytes. +int EphemeralSecretAgreement_A_SIDHp503(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// Alice's PublicKeyA consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p503^2) encoded in 126 bytes. +int EphemeralSecretAgreement_B_SIDHp503(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp503" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^250-1] and [0, 2^Floor(Log(2,3^159)) - 1], resp. In the SIDH API, +// Alice's and Bob's private keys are encoded in 32 octets in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p503^2). In the SIDH API, they are encoded in 378 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p503^2). In the SIDH API, they are encoded in 126 octets. + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/P503/P503_internal.h b/SIKE_sw/src/P503/P503_internal.h new file mode 100644 index 0000000..5a42c0f --- /dev/null +++ b/SIKE_sw/src/P503/P503_internal.h @@ -0,0 +1,175 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for P503 +*********************************************************************************************/ + +#ifndef P503_INTERNAL_H +#define P503_INTERNAL_H + +#include "../config.h" + + +#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 8 // Number of words of a 503-bit field element + #define p503_ZERO_WORDS 3 // Number of "0" digits in the least significant part of p503 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 16 + #define p503_ZERO_WORDS 7 +#endif + + +// Basic constants + +#define NBITS_FIELD 503 +#define MAXBITS_FIELD 512 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 503-bit field element +#define NBITS_ORDER 256 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 256-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 250 +#define OBOB_BITS 253 +#define OBOB_EXPON 159 +#define MASK_ALICE 0x03 +#define MASK_BOB 0x0F +#define PRIME p503 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 7 +#define MAX_INT_POINTS_BOB 8 +#define MAX_Alice 125 +#define MAX_Bob 159 +#define MSG_BYTES 24 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 503-bit field elements (512-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x503-bit field elements (2x512-bit max.) +typedef felm_t f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p503^2) + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + +#ifdef COMPRESS + typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj; // Point representation in full projective XYZ Montgomery coordinates + typedef point_full_proj point_full_proj_t[1]; + + typedef struct { f2elm_t x; f2elm_t y; } point_affine; // Point representation in affine coordinates. + typedef point_affine point_t[1]; + + typedef f2elm_t publickey_t[3]; +#endif + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// 503-bit multiprecision addition, c = a+b +void mp_add503(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add503_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 503-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p +extern void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c); +extern void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub503_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub503_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 2x503-bit multiprecision subtraction followed by addition with p503*2^512, c = a-b+(p503*2^512) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_subadd503x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Double 2x503-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub503x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy503(const digit_t* a, digit_t* c); + +// Zeroing a field element, a = 0 +void fpzero503(digit_t* a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal503_non_constant_time(const digit_t* a, const digit_t* b); + +// Modular addition, c = a+b mod p503 +extern void fpadd503(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd503_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p503 +extern void fpsub503(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub503_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p503 +extern void fpneg503(digit_t* a); + +// Modular division by two, c = a/2 mod p503. +void fpdiv2_503(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. +void fpcorrection503(digit_t* a); + +// 503-bit Montgomery reduction, c = a mod p +void rdc503_asm(digit_t* ma, digit_t* mc); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 +void fpmul503_mont(const digit_t* a, const digit_t* b, digit_t* c); +void mul503_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 +void fpsqr503_mont(const digit_t* ma, digit_t* mc); + +// Field inversion, a = a^-1 in GF(p503) +void fpinv503_mont(digit_t* a); + +// Chain to compute (p503-3)/4 using Montgomery arithmetic +void fpinv503_chain_mont(digit_t* a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p503^2) element, c = a +void fp2copy503(const f2elm_t a, f2elm_t c); + +// Zeroing a GF(p503^2) element, a = 0 +void fp2zero503(f2elm_t a); + +// GF(p503^2) negation, a = -a in GF(p503^2) +void fp2neg503(f2elm_t a); + +// GF(p503^2) addition, c = a+b in GF(p503^2) +extern void fp2add503(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p503^2) subtraction, c = a-b in GF(p503^2) +extern void fp2sub503(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p503^2) division by two, c = a/2 in GF(p503^2) +void fp2div2_503(const f2elm_t a, f2elm_t c); + +// Modular correction, a = a in GF(p503^2) +void fp2correction503(f2elm_t a); + +// GF(p503^2) squaring using Montgomery arithmetic, c = a^2 in GF(p503^2) +void fp2sqr503_mont(const f2elm_t a, f2elm_t c); + +// GF(p503^2) multiplication using Montgomery arithmetic, c = a*b in GF(p503^2) +void fp2mul503_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p503^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv503_mont(f2elm_t a); + + +#endif diff --git a/SIKE_sw/src/P503/generic/fp_generic.c b/SIKE_sw/src/P503/generic/fp_generic.c new file mode 100644 index 0000000..1215ed7 --- /dev/null +++ b/SIKE_sw/src/P503/generic/fp_generic.c @@ -0,0 +1,259 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: portable modular arithmetic for P503 +*********************************************************************************************/ + +#include "../P503_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p503[NWORDS64_FIELD]; +extern const uint64_t p503p1[NWORDS64_FIELD]; +extern const uint64_t p503x2[NWORDS64_FIELD]; +extern const uint64_t p503x4[NWORDS64_FIELD]; + + +__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p503x2)[i], borrow, c[i]); + } +} + + +__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p503x4)[i], borrow, c[i]); + } +} + + +__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p503x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p503x2)[i] & mask, carry, c[i]); + } +} + + +__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p503x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void fpneg503(digit_t* a) +{ // Modular negation, a = -a mod p503. + // Input/output: a in [0, 2*p503-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p503x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_503(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p503. + // Input : a in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p503 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p503)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection503(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p503)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p503)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503. + // mc = ma*R^-1 mod p503x2, where R = 2^512. + // If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p503_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p503_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p503p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p503p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} \ No newline at end of file diff --git a/SIKE_sw/src/P546/AMD64/fp_x64.c b/SIKE_sw/src/P546/AMD64/fp_x64.c new file mode 100644 index 0000000..f8fe479 --- /dev/null +++ b/SIKE_sw/src/P546/AMD64/fp_x64.c @@ -0,0 +1,634 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: modular arithmetic optimized for x64 platforms for P546 +*********************************************************************************************/ + +#include "../P546_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p546[NWORDS_FIELD]; +extern const uint64_t p546p1[NWORDS_FIELD]; +extern const uint64_t p546x2[NWORDS_FIELD]; +extern const uint64_t p546x4[NWORDS_FIELD]; + + +__inline void mp_sub546_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p546x2)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub546_p2_asm(a, b, c); + +#endif +} + + +__inline void mp_sub546_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p546x4)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub546_p4_asm(a, b, c); + +#endif +} + + +__inline void fpadd546(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p546. + // Inputs: a, b in [0, 2*p546-1] + // Output: c in [0, 2*p546-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p546x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p546x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpadd546_asm(a, b, c); + +#endif +} + + +__inline void fpsub546(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p546. + // Inputs: a, b in [0, 2*p546-1] + // Output: c in [0, 2*p546-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p546x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpsub546_asm(a, b, c); + +#endif +} + + +__inline void fpneg546(digit_t* a) +{ // Modular negation, a = -a mod p546. + // Input/output: a in [0, 2*p546-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p546x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_546(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p546. + // Input : a in [0, 2*p546-1] + // Output: c in [0, 2*p546-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p546 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p546)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection546(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p546-1] to [0, p546-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p546)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p546)[i] & mask, borrow, a[i]); + } +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + (void)nwords; + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[0], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[1], uv, carry, uv); + t += carry; + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + MULADD128(a[1], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[0], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[8], b[1], uv, carry, uv); + t += carry; + MULADD128(a[7], b[2], uv, carry, uv); + t += carry; + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + MULADD128(a[2], b[7], uv, carry, uv); + t += carry; + MULADD128(a[1], b[8], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[8], b[2], uv, carry, uv); + t += carry; + MULADD128(a[7], b[3], uv, carry, uv); + t += carry; + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + MULADD128(a[3], b[7], uv, carry, uv); + t += carry; + MULADD128(a[2], b[8], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[8], b[3], uv, carry, uv); + t += carry; + MULADD128(a[7], b[4], uv, carry, uv); + t += carry; + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + MULADD128(a[4], b[7], uv, carry, uv); + t += carry; + MULADD128(a[3], b[8], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[8], b[4], uv, carry, uv); + t += carry; + MULADD128(a[7], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[7], uv, carry, uv); + t += carry; + MULADD128(a[4], b[8], uv, carry, uv); + t += carry; + c[12] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[8], b[5], uv, carry, uv); + t += carry; + MULADD128(a[7], b[6], uv, carry, uv); + t += carry; + MULADD128(a[6], b[7], uv, carry, uv); + t += carry; + MULADD128(a[5], b[8], uv, carry, uv); + t += carry; + c[13] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[8], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[8], uv, carry, uv); + t += carry; + c[14] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[8], b[7], uv, carry, uv); + t += carry; + MULADD128(a[7], b[8], uv, carry, uv); + t += carry; + c[15] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[8], b[8], uv, carry, uv); + c[16] = uv[0]; + c[17] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + mul546_asm(a, b, c); + +#endif +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Montgomery reduction exploiting special form of the prime. + // mc = ma*R^-1 mod p546x2, where R = 2^576. + // If ma < 2^576*p546, the output mc is in the range [0, 2*p546-1]. + // ma is assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + mc[3] = ma[3]; + MUL128(mc[0], ((digit_t*)p546p1)[4], uv); + ADDC(0, uv[0], ma[4], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p546p1)[5], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p546p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p546p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[13], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p546p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[14], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[7], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p546p1)[7], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[15], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[8], ((digit_t*)p546p1)[8], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[16], carry, mc[7]); + ADDC(carry, uv[1], ma[17], carry, mc[8]); + +#elif (OS_TARGET == OS_LINUX) + + rdc546_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/SIKE_sw/src/P546/AMD64/fp_x64_asm.S b/SIKE_sw/src/P546/AMD64/fp_x64_asm.S new file mode 100644 index 0000000..26ea865 --- /dev/null +++ b/SIKE_sw/src/P546/AMD64/fp_x64_asm.S @@ -0,0 +1,1353 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// Copyright (c) Microsoft Corporation +// +// Website: https://github.com/microsoft/PQCrypto-SIDH +// Released under MIT license +// +// Abstract: field arithmetic in x64 assembly for P546 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(fpadd546_asm) +fmt(fpadd546_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rcx, [reg_p2+64] + + mov rax, [rip+fmt(p546x2)] + sub r8, rax + mov rax, [rip+fmt(p546x2)+8] + sbb r9, rax + sbb r10, rax + sbb r11, rax + mov rax, [rip+fmt(p546x2)+32] + sbb r12, rax + mov rax, [rip+fmt(p546x2)+40] + sbb r13, rax + mov rax, [rip+fmt(p546x2)+48] + sbb r14, rax + mov rax, [rip+fmt(p546x2)+56] + sbb r15, rax + mov rax, [rip+fmt(p546x2)+64] + sbb rcx, rax + mov [reg_p3+64], rcx + mov rax, 0 + sbb rax, 0 + + mov rsi, [rip+fmt(p546x2)] + and rsi, rax + mov rdi, [rip+fmt(p546x2)+8] + and rdi, rax + + add r8, rsi + adc r9, rdi + adc r10, rdi + adc r11, rdi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov rdi, [rip+fmt(p546x2)+32] + and rdi, rax + mov rsi, [rip+fmt(p546x2)+40] + and rsi, rax + mov r8, [rip+fmt(p546x2)+48] + and r8, rax + mov r9, [rip+fmt(p546x2)+56] + and r9, rax + mov r10, [rip+fmt(p546x2)+64] + and r10, rax + + bt rcx, 0 + adc r12, rdi + adc r13, rsi + adc r14, r8 + adc r15, r9 + mov rsi, [reg_p3+64] + adc rsi, r10 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rsi + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(fpsub546_asm) +fmt(fpsub546_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rcx, [reg_p2+64] + mov [reg_p3+64], rcx + mov rax, 0 + sbb rax, 0 + + mov rsi, [rip+fmt(p546x2)] + and rsi, rax + mov rdi, [rip+fmt(p546x2)+8] + and rdi, rax + + add r8, rsi + adc r9, rdi + adc r10, rdi + adc r11, rdi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov rdi, [rip+fmt(p546x2)+32] + and rdi, rax + mov rsi, [rip+fmt(p546x2)+40] + and rsi, rax + mov r8, [rip+fmt(p546x2)+48] + and r8, rax + mov r9, [rip+fmt(p546x2)+56] + and r9, rax + mov r10, [rip+fmt(p546x2)+64] + and r10, rax + + bt rcx, 0 + adc r12, rdi + adc r13, rsi + adc r14, r8 + adc r15, r9 + mov rsi, [reg_p3+64] + adc rsi, r10 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rsi + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +///////////////////////////////////////////////////////////////// MACRO +.macro SUB546_PX P0 + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rax, [reg_p1+64] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rax, [reg_p2+64] + + mov rdi, [rip+\P0] + mov rsi, [rip+\P0+8] + add r8, rdi + adc r9, rsi + adc r10, rsi + adc r11, rsi + mov rcx, [rip+\P0+32] + mov rdi, [rip+\P0+40] + mov rsi, [rip+\P0+48] + adc r12, rcx + adc r13, rdi + adc r14, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov rdi, [rip+\P0+56] + mov rsi, [rip+\P0+64] + adc r15, rdi + adc rax, rsi + mov [reg_p3+56], r15 + mov [reg_p3+64], rax + + pop r15 + pop r14 + pop r13 + pop r12 + .endm + + +//*********************************************************************** +// Multiprecision subtraction with correction with 2*p546 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p546 +//*********************************************************************** +.global fmt(mp_sub546_p2_asm) +fmt(mp_sub546_p2_asm): + + SUB546_PX fmt(p546x2) + ret + + +//*********************************************************************** +// Multiprecision subtraction with correction with 4*p546 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p546 +//*********************************************************************** +.global fmt(mp_sub546_p4_asm) +fmt(mp_sub546_p4_asm): + + SUB546_PX fmt(p546x4) + ret + + +#ifdef _MULX_ + +/////////////////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C +// Temps: regs T0:T7 +/////////////////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + adox \T0, \T3 + adox \T2, \T5 + mulx \T1, \T3, 24\M1 + adox \T4, \T3 + mulx \T5, \T6, 32\M1 + adox \T1, \T6 + adox \T5, rax + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 8\C, \T0 // C1_final + adcx \T2, \T6 + mulx \T6, \T7, 8\M1 + adox \T2, \T7 + adcx \T4, \T6 + mulx \T0, \T6, 16\M1 + adox \T4, \T6 + adcx \T0, \T1 + mulx \T1, \T7, 24\M1 + adcx \T1, \T5 + adox \T0, \T7 + mulx \T5, \T6, 32\M1 + adcx \T5, rax + adox \T1, \T6 + adox \T5, rax + + mov rdx, 16\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T2, \T7 + mov 16\C, \T2 // C2_final + adcx \T4, \T6 + mulx \T6, \T7, 8\M1 + adox \T4, \T7 + adcx \T0, \T6 + mulx \T2, \T6, 16\M1 + adox \T0, \T6 + adcx \T1, \T2 + mulx \T2, \T7, 24\M1 + adcx \T5, \T2 + adox \T1, \T7 + mulx \T2, \T6, 32\M1 + adcx \T2, rax + adox \T5, \T6 + adox \T2, rax + + mov rdx, 24\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T4, \T7 + mov 24\C, \T4 // C3_final + adcx \T0, \T6 + mulx \T6, \T7, 8\M1 + adox \T0, \T7 + adcx \T1, \T6 + mulx \T4, \T6, 16\M1 + adox \T1, \T6 + adcx \T5, \T4 + mulx \T4, \T7, 24\M1 + adcx \T2, \T4 + adox \T5, \T7 + mulx \T4, \T6, 32\M1 + adcx \T4, rax + adox \T2, \T6 + adox \T4, rax + + mov rdx, 32\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 32\C, \T0 // C4_final + adcx \T1, \T6 + mulx \T6, \T7, 8\M1 + adox \T1, \T7 + adcx \T5, \T6 + mulx \T0, \T6, 16\M1 + adox \T5, \T6 + adcx \T2, \T0 + mulx \T0, \T7, 24\M1 + adcx \T4, \T0 + adox \T2, \T7 + mulx \T0, \T6, 32\M1 + adcx \T0, rax + adox \T4, \T6 + adox \T0, rax + + mov 40\C, \T1 + mov 48\C, \T5 + mov 56\C, \T2 + mov 64\C, \T4 + mov 72\C, \T0 +.endm + +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + xor rax, rax + adox \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adox \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adox \T2, \T4 + + mov rdx, 8\M0 + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + adox \T3, rax + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + adox \T4, \T0 + mov 8\C, \T4 // C1_final + adcx \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adcx \T6, \T8 + adox \T5, \T1 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adcx \T7, \T9 + adcx \T8, rax + adox \T6, \T2 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + adox \T7, \T3 + adox \T8, rax + xor rax, rax + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + adox \T0, \T5 + mov 16\C, \T0 // C2_final + adcx \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adcx \T2, \T4 + adox \T1, \T6 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adcx \T3, \T9 + mov rdx, 24\M0 + adcx \T4, rax + + adox \T2, \T7 + adox \T3, \T8 + adox \T4, rax + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + adcx \T5, \T7 + adox \T1, \T0 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adcx \T6, \T8 + adox \T2, \T5 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adcx \T7, \T9 + adcx \T8, rax + + adox \T3, \T6 + adox \T4, \T7 + adox \T8, rax + mov 24\C, \T1 // C3_final + mov 32\C, \T2 // C4_final + mov 40\C, \T3 // C5_final + mov 48\C, \T4 // C6_final + mov 56\C, \T8 // C7_final +.endm + +#else + +.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + mulx \T4, \T5, 16\M1 + add \T0, \T3 + adc \T2, \T5 + mulx \T1, \T3, 24\M1 + adc \T3, \T4 + mulx \T5, \T6, 32\M1 + adc \T1, \T6 + adc \T5, 0 + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + add \T0, \T7 + mov 8\C, \T0 // C1_final + adc \T2, \T6 + mulx \T6, \T7, 8\M1 + adc \T3, \T6 + mulx \T0, \T4, 16\M1 + adc \T0, \T1 + mulx \T1, \T6, 24\M1 + adc \T5, \T1 + mulx \T1, rax, 32\M1 + adc \T1, 0 + + add \T2, \T7 + adc \T3, \T4 + adc \T0, \T6 + adc \T5, rax + adc \T1, 0 + + mov rdx, 16\M0 + mulx \T4, \T6, \M1 + add \T2, \T6 + mov 16\C, \T2 // C2_final + adc \T3, \T4 + mulx \T6, \T7, 8\M1 + adc \T0, \T6 + mulx \T2, \T4, 16\M1 + adc \T2, \T5 + mulx \T5, \T6, 24\M1 + adc \T1, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T3, \T7 + adc \T0, \T4 + adc \T2, \T6 + adc \T1, rax + adc \T5, 0 + + mov rdx, 24\M0 + mulx \T4, \T6, \M1 + add \T3, \T6 + mov 24\C, \T3 // C3_final + adc \T0, \T4 + mulx \T6, \T7, 8\M1 + adc \T2, \T6 + mulx \T3, \T4, 16\M1 + adc \T1, \T3 + mulx \T3, \T6, 24\M1 + adc \T3, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T0, \T7 + adc \T2, \T4 + adc \T1, \T6 + adc \T3, rax + adc \T5, 0 + + mov rdx, 32\M0 + mulx \T4, \T6, \M1 + add \T0, \T6 + mov 32\C, \T0 // C4_final + adc \T2, \T4 + mulx \T6, \T7, 8\M1 + adc \T1, \T6 + mulx \T0, \T4, 16\M1 + adc \T3, \T0 + mulx \T0, \T6, 24\M1 + adc \T0, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T2, \T7 + adc \T1, \T4 + adc \T3, \T6 + adc \T0, rax + adc \T5, 0 + mov 40\C, \T2 + mov 48\C, \T1 + mov 56\C, \T3 + mov 64\C, \T0 + mov 72\C, \T5 +.endm + +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + add \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adc \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adc \T2, \T4 + mov rdx, 8\M0 + adc \T3, 0 + + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T4, \T0 + mov 8\C, \T4 // C1_final + adc \T5, \T1 + adc \T6, \T2 + adc \T7, \T3 + mov rdx, 16\M0 + adc \T8, 0 + + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + add \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adc \T2, \T4 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adc \T3, \T9 + mov rdx, 24\M0 + adc \T4, 0 + + add \T0, \T5 + mov 16\C, \T0 // C2_final + adc \T1, \T6 + adc \T2, \T7 + adc \T3, \T8 + adc \T4, 0 + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T1, \T0 + mov 24\C, \T1 // C3_final + adc \T2, \T5 + mov 32\C, \T2 // C4_final + adc \T3, \T6 + mov 40\C, \T3 // C5_final + adc \T4, \T7 + mov 48\C, \T4 // C6_final + adc \T8, 0 + mov 56\C, \T8 // C7_final +.endm +#endif + + +//***************************************************************************** +// 546-bit multiplication using Karatsuba (one level), schoolbook (two levels) +//***************************************************************************** +.global fmt(mul546_asm) +fmt(mul546_asm): + push r12 + push r13 + push r14 + push r15 + mov rcx, reg_p3 + + // [rsp] <- AH + AL, rax <- mask + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + push rbx + push rbp + sub rsp, 160 + add r8, [reg_p1+40] + adc r9, [reg_p1+48] + adc r10, [reg_p1+56] + adc r11, [reg_p1+64] + adc r12, 0 + sbb rax, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + mov [rsp+32], r12 + + // [rsp+40] <- BH + BL, rdx <- mask + xor rdx, rdx + mov r8, [reg_p2] + mov r15, [reg_p2+8] + mov rbx, [reg_p2+16] + mov r13, [reg_p2+24] + mov r14, [reg_p2+32] + add r8, [reg_p2+40] + adc r15, [reg_p2+48] + adc rbx, [reg_p2+56] + adc r13, [reg_p2+64] + adc r14, 0 + sbb rdx, 0 + mov [rsp+40], r8 + mov [rsp+48], r15 + mov [rsp+56], rbx + mov [rsp+64], r13 + mov [rsp+72], r14 + + // [rcx] <- masked (BH + BL) + and r8, rax + and r15, rax + and rbx, rax + and r13, rax + and r14, rax + mov [rcx], r8 + + // r8-r12 <- masked (AH + AL) + mov r8, [rsp] + and r8, rdx + and r9, rdx + and r10, rdx + and r11, rdx + and r12, rdx + + // [rcx+80] <- masked (AH + AL) + masked (BH + BL) + mov rax, [rcx] + add r8, rax + adc r9, r15 + adc r10, rbx + adc r11, r13 + adc r12, r14 + mov [rcx+80], r8 + mov [rcx+88], r9 + mov [rcx+96], r10 + mov [rcx+104], r11 + mov [rcx+112], r12 + + // [rcx] <- AL x BL + MUL320_SCHOOL [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15 // Result C0-C4 + + // [rsp+80] <- (AH+AL) x (BH+BL), low part + MUL320_SCHOOL [rsp], [rsp+40], [rsp+80], r8, r9, r10, r11, r12, r13, r14, r15 + + // [rsp] <- AH x BH + MUL256_SCHOOL [reg_p1+40], [reg_p2+40], [rsp], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp + + // r8-r12 <- (AH+AL) x (BH+BL), final step + mov r8, [rcx+80] + mov r9, [rcx+88] + mov r10, [rcx+96] + mov r11, [rcx+104] + mov r12, [rcx+112] + mov rax, [rsp+120] + add r8, rax + mov rax, [rsp+128] + adc r9, rax + mov rax, [rsp+136] + adc r10, rax + mov rax, [rsp+144] + adc r11, rax + mov rax, [rsp+152] + adc r12, rax + + // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL + mov rdi, [rsp+80] + sub rdi, [rcx] + mov rdx, [rsp+88] + sbb rdx, [rcx+8] + mov rbx, [rsp+96] + sbb rbx, [rcx+16] + mov r13, [rsp+104] + sbb r13, [rcx+24] + mov r14, [rsp+112] + sbb r14, [rcx+32] + sbb r8, [rcx+40] + sbb r9, [rcx+48] + sbb r10, [rcx+56] + sbb r11, [rcx+64] + sbb r12, [rcx+72] + + // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub rdi, [rsp] + sbb rdx, [rsp+8] + sbb rbx, [rsp+16] + sbb r13, [rsp+24] + sbb r14, [rsp+32] + sbb r8, [rsp+40] + sbb r9, [rsp+48] + sbb r10, [rsp+56] + sbb r11, 0 + sbb r12, 0 + + mov rax, [rcx+40] + add rax, rdi + mov [rcx+40], rax // Result C5-C9 + mov rax, [rcx+48] + adc rax, rdx + mov [rcx+48], rax + mov rax, [rcx+56] + adc rax, rbx + mov [rcx+56], rax + mov rax, [rcx+64] + adc rax, r13 + mov [rcx+64], rax + mov rax, [rcx+72] + adc rax, r14 + mov [rcx+72], rax + mov rax, [rsp] + adc r8, rax + mov [rcx+80], r8 // Result C10-C19 + mov rax, [rsp+8] + adc r9, rax + mov [rcx+88], r9 + mov rax, [rsp+16] + adc r10, rax + mov [rcx+96], r10 + mov rax, [rsp+24] + adc r11, rax + mov [rcx+104], r11 + mov rax, [rsp+32] + adc r12, rax + mov [rcx+112], r12 + mov r8, [rsp+40] + mov r9, [rsp+48] + mov r10, [rsp+56] + adc r8, 0 + adc r9, 0 + adc r10, 0 + add rsp, 160 + mov [rcx+120], r8 + mov [rcx+128], r9 + mov [rcx+136], r10 + + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#else + +//*********************************************************************** +// Integer multiplication +// Based on Karatsuba method +// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] +// NOTE: a=c or b=c are not allowed +//*********************************************************************** +.global fmt(mul546_asm) +fmt(mul546_asm): + + ret + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + +#endif + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: regs T0:T7 +// Temps: regs T8 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL128x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, TT + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + adox \T1, \T4 + adox \T2, \T5 + mulx \T4, \T7, 24\M1 + adox \T3, \T7 + mulx \T5, \T6, 32\M1 + adox \T4, \T6 + adox \T5, \TT + + mov rdx, 8\M0 + mulx \T7, \T6, \M1 + adcx \T1, \T6 // T1 <- C1_final + adcx \T2, \T7 + mulx \T6, \T7, 8\M1 + adox \T2, \T7 + adcx \T3, \T6 + mulx \T6, \T7, 16\M1 + adox \T3, \T7 + adcx \T4, \T6 + mulx \T6, \T7, 24\M1 + adox \T4, \T7 + adcx \T5, \T6 + mulx \T6, \T7, 32\M1 + adox \T5, \T7 + adox \T6, rax +.endm + +.macro MUL64x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + adox \T1, \T4 + adox \T2, \T5 + mulx \T4, \T7, 24\M1 + adox \T3, \T7 + mulx \T5, \T6, 32\M1 + adox \T4, \T6 + adox \T5, rax +.endm + +#else + +.macro MUL128x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, TT + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T3, 8\M1 + add \T1, \T3 + adc \T2, 0 + + mov rdx, 8\M0 + xor \T5, \T5 + mulx \T3, \T4, \M1 + add \T1, \T4 + adc \T2, \T3 + adc \T5, 0 + + xor \T6, \T6 + mulx \T3, \T4, 8\M1 + add \T2, \T4 + adc \T3, \T5 + adc \T6, 0 + + mov rdx, \M0 + mulx \T4, \T5, 16\M1 + add \T2, \T5 + adc \T3, \T4 + adc \T6, 0 + + xor \T7, \T7 + mulx \T4, \T5, 24\M1 + add \T3, \T5 + adc \T4, \T6 + adc \T7, 0 + + mov rdx, 8\M0 + mulx \T5, \T6, 16\M1 + add \T3, \T6 + adc \T4, \T5 + adc \T7, 0 + + xor \T6, \T6 + mulx \T5, rax, 24\M1 + add \T4, rax + adc \T5, \T7 + adc \T6, 0 + + mov rdx, 32\M1 + mulx \T7, rax, \M0 + add \T4, rax + adc \T5, \T7 + adc \T6, 0 + + mulx rax, \T7, 8\M0 + add \T5, \T7 + adc \T6, rax + + add \T5, \TT + adc \T6, 0 +.endm + +.macro MUL64x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T3, 8\M1 + add \T1, \T3 + adc \T2, 0 + + mulx \T3, \T4, 16\M1 + add \T2, \T4 + adc \T3, 0 + + mulx \T4, \T5, 24\M1 + add \T3, \T5 + adc \T4, 0 + + mulx \T5, \T6, 32\M1 + add \T4, \T6 + adc \T5, 0 +.endm +#endif + + +//************************************************************************************** +// Montgomery reduction +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//************************************************************************************** +.global fmt(rdc546_asm) +fmt(rdc546_asm): + push r12 + push r13 + push r14 + push r15 + xor rcx, rcx + + // a[0-1] x p546p1_nz --> result: r8:r14 + MUL128x320_SCHOOL [reg_p1], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+32] + adc r9, [reg_p1+40] + adc r10, [reg_p1+48] + adc r11, [reg_p1+56] + adc r12, [reg_p1+64] + adc r13, [reg_p1+72] + adc r14, [reg_p1+80] + adc rcx, 0 + mov [reg_p1+32], r8 + mov [reg_p1+40], r9 + mov [reg_p1+48], r10 + mov [reg_p1+56], r11 + mov [reg_p1+64], r12 + mov [reg_p1+72], r13 + mov [reg_p1+80], r14 + + // a[2-3] x p546p1_nz --> result: r8:r14 + MUL128x320_SCHOOL [reg_p1+16], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+48] + adc r9, [reg_p1+56] + adc r10, [reg_p1+64] + adc r11, [reg_p1+72] + adc r12, [reg_p1+80] + adc r13, [reg_p1+88] + adc r14, [reg_p1+96] + adc rcx, 0 + mov [reg_p1+48], r8 + mov [reg_p1+56], r9 + mov [reg_p1+64], r10 + mov [reg_p1+72], r11 + mov [reg_p1+80], r12 + mov [reg_p1+88], r13 + mov [reg_p1+96], r14 + + // a[4-5] x p546p1_nz --> result: r8:r14 + MUL128x320_SCHOOL [reg_p1+32], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+64] + adc r9, [reg_p1+72] + adc r10, [reg_p1+80] + adc r11, [reg_p1+88] + adc r12, [reg_p1+96] + adc r13, [reg_p1+104] + adc r14, [reg_p1+112] + adc rcx, 0 + mov [reg_p1+64], r8 + mov [reg_p2], r9 // C0_final + mov [reg_p1+80], r10 + mov [reg_p1+88], r11 + mov [reg_p1+96], r12 + mov [reg_p1+104], r13 + mov [reg_p1+112], r14 + + // a[6-7] x p546p1_nz --> result: r8:r14 + MUL128x320_SCHOOL [reg_p1+48], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+80] + adc r9, [reg_p1+88] + adc r10, [reg_p1+96] + adc r11, [reg_p1+104] + adc r12, [reg_p1+112] + adc r13, [reg_p1+120] + adc r14, [reg_p1+128] + adc rcx, [reg_p1+136] + mov [reg_p2+8], r8 // C1_final + mov [reg_p2+16], r9 // C2_final + mov [reg_p1+96], r10 + mov [reg_p1+104], r11 + mov [reg_p1+112], r12 + mov [reg_p1+120], r13 + mov [reg_p1+128], r14 + + // a[8-9] x p546p1_nz --> result: r8:r13 + MUL64x320_SCHOOL [reg_p1+64], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15 + + // Final result C3:C8 + add r8, [reg_p1+96] + adc r9, [reg_p1+104] + adc r10, [reg_p1+112] + adc r11, [reg_p1+120] + adc r12, [reg_p1+128] + adc r13, rcx + mov [reg_p2+24], r8 + mov [reg_p2+32], r9 + mov [reg_p2+40], r10 + mov [reg_p2+48], r11 + mov [reg_p2+56], r12 + mov [reg_p2+64], r13 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + #else + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//*********************************************************************** +.global fmt(rdc546_asm) +fmt(rdc546_asm): + + ret + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + + #endif + +//*********************************************************************** +// 546-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(mp_add546_asm) +fmt(mp_add546_asm): + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rax, [reg_p1+32] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc rax, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rax + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov rax, [reg_p1+72] + adc r8, [reg_p2+40] + adc r9, [reg_p2+48] + adc r10, [reg_p2+56] + adc r11, [reg_p2+64] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + mov [reg_p3+64], r11 + ret + + +//*********************************************************************** +// 2x546-bit multiprecision subtraction/addition +// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p546*2^576 +//*********************************************************************** +.global fmt(mp_subadd546x2_asm) +fmt(mp_subadd546x2_asm): + push r12 + push r13 + push r14 + push r15 + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rcx, [reg_p1+32] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb rcx, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rcx + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + sbb r8, [reg_p2+40] + sbb r9, [reg_p2+48] + sbb r10, [reg_p2+56] + sbb r11, [reg_p2+64] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + mov [reg_p3+64], r11 + + mov r8, [reg_p1+72] + mov r9, [reg_p1+80] + mov r10, [reg_p1+88] + mov r11, [reg_p1+96] + mov rcx, [reg_p1+104] + sbb r8, [reg_p2+72] + sbb r9, [reg_p2+80] + sbb r10, [reg_p2+88] + sbb r11, [reg_p2+96] + sbb rcx, [reg_p2+104] + mov [reg_p3+72], r8 + mov [reg_p3+80], r9 + mov [reg_p3+88], r10 + mov [reg_p3+96], r11 + mov [reg_p3+104], rcx + + mov r8, [reg_p1+112] + mov r9, [reg_p1+120] + mov r10, [reg_p1+128] + mov r11, [reg_p1+136] + sbb r8, [reg_p2+112] + sbb r9, [reg_p2+120] + sbb r10, [reg_p2+128] + sbb r11, [reg_p2+136] + sbb rax, 0 + + // Add p546 anded with the mask in rax + mov r12, [rip+fmt(p546)] + mov r13, [rip+fmt(p546)+32] + mov r14, [rip+fmt(p546)+40] + mov r15, [rip+fmt(p546)+48] + mov rdi, [rip+fmt(p546)+56] + mov rsi, [rip+fmt(p546)+64] + and r12, rax + and r13, rax + and r14, rax + and r15, rax + and rdi, rax + and rsi, rax + mov rax, [reg_p3+72] + add rax, r12 + mov [reg_p3+72], rax + mov rax, [reg_p3+80] + adc rax, r12 + mov [reg_p3+80], rax + mov rax, [reg_p3+88] + adc rax, r12 + mov [reg_p3+88], rax + adc r12, [reg_p3+96] + adc r13, [reg_p3+104] + mov [reg_p3+96], r12 + mov [reg_p3+104], r13 + adc r8, r14 + adc r9, r15 + adc r10, rdi + adc r11, rsi + + mov [reg_p3+112], r8 + mov [reg_p3+120], r9 + mov [reg_p3+128], r10 + mov [reg_p3+136], r11 + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Double 2x546-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(mp_dblsub546x2_asm) +fmt(mp_dblsub546x2_asm): + push r12 + push r13 + + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + + mov r8, [reg_p3+48] + mov r9, [reg_p3+56] + mov r10, [reg_p3+64] + mov r11, [reg_p3+72] + mov r12, [reg_p3+80] + mov r13, [reg_p3+88] + bt rax, 0 + sbb r8, [reg_p1+48] + sbb r9, [reg_p1+56] + sbb r10, [reg_p1+64] + sbb r11, [reg_p1+72] + sbb r12, [reg_p1+80] + sbb r13, [reg_p1+88] + setc al + bt rcx, 0 + sbb r8, [reg_p2+48] + sbb r9, [reg_p2+56] + sbb r10, [reg_p2+64] + sbb r11, [reg_p2+72] + sbb r12, [reg_p2+80] + sbb r13, [reg_p2+88] + setc cl + mov [reg_p3+48], r8 + mov [reg_p3+56], r9 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], r12 + mov [reg_p3+88], r13 + + mov r8, [reg_p3+96] + mov r9, [reg_p3+104] + mov r10, [reg_p3+112] + mov r11, [reg_p3+120] + mov r12, [reg_p3+128] + mov r13, [reg_p3+136] + bt rax, 0 + sbb r8, [reg_p1+96] + sbb r9, [reg_p1+104] + sbb r10, [reg_p1+112] + sbb r11, [reg_p1+120] + sbb r12, [reg_p1+128] + sbb r13, [reg_p1+136] + bt rcx, 0 + sbb r8, [reg_p2+96] + sbb r9, [reg_p2+104] + sbb r10, [reg_p2+112] + sbb r11, [reg_p2+120] + sbb r12, [reg_p2+128] + sbb r13, [reg_p2+136] + mov [reg_p3+96], r8 + mov [reg_p3+104], r9 + mov [reg_p3+112], r10 + mov [reg_p3+120], r11 + mov [reg_p3+128], r12 + mov [reg_p3+136], r13 + + pop r13 + pop r12 + ret \ No newline at end of file diff --git a/SIKE_sw/src/P546/P546.c b/SIKE_sw/src/P546/P546.c new file mode 100644 index 0000000..953afe7 --- /dev/null +++ b/SIKE_sw/src/P546/P546.c @@ -0,0 +1,135 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny parameters and generation of functions for P546 +*********************************************************************************************/ + +#include "P546_api.h" +#include "P546_internal.h" +#include "../internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 546-bit field element is represented with Ceil(546 / 64) = 9 64-bit digits or Ceil(546 / 32) = 18 32-bit digits. + +// +// Curve isogeny system "SIDHp546". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p546^2), where A=6, B=1, C=1 and p546 = 2^273*3^172-1 +// + +const uint64_t p546[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xC1CCF59098E1FFFF, + 0x91CA3591A0810F4F, 0xC3A747738CBAAD7D, 0x3E568459654D5F6B, 0x000000030F5EBA42 }; +const uint64_t p546p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xC1CCF59098E20000, + 0x91CA3591A0810F4F, 0xC3A747738CBAAD7D, 0x3E568459654D5F6B, 0x000000030F5EBA42 }; +const uint64_t p546x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x8399EB2131C3FFFF, + 0x23946B2341021E9F, 0x874E8EE719755AFB, 0x7CAD08B2CA9ABED7, 0x000000061EBD7484 }; +const uint64_t p546x4[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0733D6426387FFFF, + 0x4728D64682043D3F, 0x0E9D1DCE32EAB5F6, 0xF95A116595357DAF, 0x0000000C3D7AE908 }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000020000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0x87A7E0E67AC84C71, 0x56BEC8E51AC8D040, 0xAFB5E1D3A3B9C65D, 0x5D211F2B422CB2A6, 0x00000000000187AF }; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} in GF(p546^2), expressed in Montgomery representation +const uint64_t A_gen[6*NWORDS64_FIELD] = { 0x8BF8B5CDA3529A11, 0x920F7AF8D8EDA1CE, 0x6A4FD6F4E65D2601, 0xAA5FDD88E6C8C053, 0x2DDFECC4564DD092, + 0xB5AE8E8B63CDD2EB, 0xF5530B1581D37EFC, 0xBB69799BE0974397, 0x000000029E924174, // XPA0 + 0x02BAA3F5AA08FBA0, 0xDF5E66F9718B1DB3, 0x7AAD305C4C16B9B5, 0xEFC538F7C899EC44, 0xB2B7A11B88589305, + 0xF4C2FE11D652F55A, 0x45F5A4010B37F36F, 0x68C0BE35B4414691, 0x00000002974A76B9, // XPA1 + 0x6655849EE4AD62B0, 0xA7B09BDA24F18E3D, 0xD9DC9DF1EFE6D4E3, 0x5618AE214D22122F, 0x35CE7CD8878AB07, + 0xDFBE3687D874F305, 0x0FFAC636361A0289, 0x732304C3E314E9F3, 0x00000002D4829F4D, // XQA0 + 0xD433C9386F41F07B, 0x591D74E6B6E16886, 0x1E91924E4D82BEA1, 0xE9ED0654FE5D746F, 0x95029EF76C0961D9, + 0x9C5798078846CCA8, 0xB8AD7EC5421DCE49, 0xEBEF3DD3098146F8, 0x000000010E9A2BCA, // XQA1 + 0xC218DF11E1FCA67A, 0x8C622C3530976AAF, 0xC5A558DA88A028C1, 0x5B0E218835EB3EEA, 0x63B412D6B77F6E5F, + 0x44265EEA17A1F58C, 0xD7A6BD5FE291AA13, 0xC0918F65ED8D3D23, 0x000000005562DBCD, // XRA0 + 0x071F4177BDD2E021, 0xDC8F3873504C93E7, 0x77038B491A006DB7, 0x9E205A8C15B8F717, 0x701734570E79CC07, + 0x0790455A85462B3D, 0x19AC9F7FC32A9F20, 0x04B599768492F2D5, 0x0000000248379BC7 }; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p546^2), expressed in Montgomery representation +const uint64_t B_gen[6*NWORDS64_FIELD] = { 0xC60DC8B9DD8A126C, 0x2841B16BD9C550AB, 0x33EB13E27326D027, 0xB4E345D7318893D7, 0x4F7BD19633EAA269, + 0xA93049DB038741F4, 0x93222D9F331C2848, 0x15FFBA19339361F0, 0x0000000089E90060, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0xA07EEF3334ACF340, 0x417F1E66A839DFCB, 0x45C32C88DAA25A10, 0x563B27FA6991C6BF, 0x4BE0CC5C10D513A9, + 0xE4E1756C009BD03E, 0xAFDFBF640F2717AC, 0xDC5EE9B702D8E56C, 0x0000000182A09EB0, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0x74937ACDD796D6EE, 0x7C5E906509CE108B, 0xDA57EBEF8BA73940, 0x1E5CB85A8E1C9A4C, 0xD4EBE9C3A955BB62, + 0xBA4C02A05B39742F, 0x21A4B5BCACC33156, 0xE96E8BD54B98A20C, 0x0000000104B99E73, // XRB0 + 0xEEFADB5C4965D7A8, 0xE653CE9D2DB5CD75, 0xB511FF5416DEAB7C, 0xA5D5B131D1112DEF, 0x72D33ED20BB3EB46, + 0x96809017849D85DF, 0x00BA691C5F526CFF, 0x9B384D1CF1873823, 0x0000000152691238 }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^576)^2 mod p546 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x52EB0249395B3348, 0x984F8851AEFDB3F3, 0x913744158E52803C, 0x1EC818C9E0CA0DA3, 0x4C2396C7E7350E87, + 0x75D4E9F73AC13B39, 0x1640A26835D93C44, 0x5D441830B61AD042, 0x00000001357E298F }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x0000000053A8B821, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAB9ED029DADE0000, + 0x7FD34034A42F114D, 0x319FDC331E9125F5, 0xF1361EF3C5499C8A, 0x00000001393B6AF7 }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +65, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, +2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, +1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 32, 16, 8, 4, 2, 1, +1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, +16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, +1, 2, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +71, 43, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, +1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, +1, 1, 2, 1, 1, 1, 17, 11, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 4, 3, 2, 1, 1, +1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2, +1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, +1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, +1, 4, 2, 1, 1, 2, 1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy546 +#define fpzero fpzero546 +#define fpadd fpadd546 +#define fpsub fpsub546 +#define fpneg fpneg546 +#define fpdiv2 fpdiv2_546 +#define fpcorrection fpcorrection546 +#define fpmul_mont fpmul546_mont +#define fpsqr_mont fpsqr546_mont +#define fpinv_mont fpinv546_mont +#define fpinv_chain_mont fpinv546_chain_mont +#define fp2copy fp2copy546 +#define fp2zero fp2zero546 +#define fp2add fp2add546 +#define fp2sub fp2sub546 +#define mp_sub_p2 mp_sub546_p2 +#define mp_sub_p4 mp_sub546_p4 +#define sub_p4 mp_sub_p4 +#define fp2neg fp2neg546 +#define fp2div2 fp2div2_546 +#define fp2correction fp2correction546 +#define fp2mul_mont fp2mul546_mont +#define fp2sqr_mont fp2sqr546_mont +#define fp2inv_mont fp2inv546_mont +#define fp2inv_mont_ct fp2inv546_mont_ct +#define fp2inv_mont_bingcd fp2inv546_mont_bingcd +#define mp_add_asm mp_add546_asm +#define mp_subaddx2_asm mp_subadd546x2_asm +#define mp_dblsubx2_asm mp_dblsub546x2_asm +#define crypto_kem_keypair crypto_kem_keypair_SIKEp546 +#define crypto_kem_enc crypto_kem_enc_SIKEp546 +#define crypto_kem_dec crypto_kem_dec_SIKEp546 +#define random_mod_order_A random_mod_order_A_SIDHp546 +#define random_mod_order_B random_mod_order_B_SIDHp546 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_SIDHp546 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_SIDHp546 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_SIDHp546 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_SIDHp546 + +#include "../fpx.c" +#include "../ec_isogeny.c" +#include "../sidh.c" +#include "../sike.c" \ No newline at end of file diff --git a/SIKE_sw/src/P546/P546_api.h b/SIKE_sw/src/P546/P546_api.h new file mode 100644 index 0000000..af461af --- /dev/null +++ b/SIKE_sw/src/P546/P546_api.h @@ -0,0 +1,112 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: API header file for P546 +*********************************************************************************************/ + +#ifndef P546_API_H +#define P546_API_H + + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 472 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 414 +#define CRYPTO_BYTES 24 +#define CRYPTO_CIPHERTEXTBYTES 438 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Algorithm name +#define CRYPTO_ALGNAME "SIKEp546" + +// SIKE's key generation +// It produces a private key sk and computes the public key pk. +// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 472 bytes) +// public key pk (CRYPTO_PUBLICKEYBYTES = 414 bytes) +int crypto_kem_keypair_SIKEp546(unsigned char *pk, unsigned char *sk); + +// SIKE's encapsulation +// Input: public key pk (CRYPTO_PUBLICKEYBYTES = 414 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 24 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 438 bytes) +int crypto_kem_enc_SIKEp546(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +// SIKE's decapsulation +// Input: secret key sk (CRYPTO_SECRETKEYBYTES = 472 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 438 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 24 bytes) +int crypto_kem_dec_SIKEp546(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +// Encoding of keys for KEM-based isogeny system "SIKEp546" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p546) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p546^2), where a and b are defined over GF(p546), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^Floor(Log(2,3^172))-1] and the public key pk. In the SIKE API, +// private keys are encoded in 472 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p546^2). In the SIKE API, pk is encoded in 414 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 414 + 24 = 438 octets. +// Shared keys ss consist of a value of 24 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES_A 35 +#define SIDH_SECRETKEYBYTES_B 34 +#define SIDH_PUBLICKEYBYTES 414 +#define SIDH_BYTES 138 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^273 - 1] to be used as Alice's private key +void random_mod_order_A_SIDHp546(unsigned char* random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^172)) - 1] to be used as Bob's private key +void random_mod_order_B_SIDHp546(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^273 - 1], stored in 35 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p546^2) elements encoded in 414 bytes. +int EphemeralKeyGeneration_A_SIDHp546(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^172)) - 1], stored in 34 bytes. +// The public key consists of 3 GF(p546^2) elements encoded in 414 bytes. +int EphemeralKeyGeneration_B_SIDHp546(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^273 - 1], stored in 35 bytes. +// Bob's PublicKeyB consists of 3 GF(p546^2) elements encoded in 414 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p546^2) encoded in 138 bytes. +int EphemeralSecretAgreement_A_SIDHp546(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^172)) - 1], stored in 34 bytes. +// Alice's PublicKeyA consists of 3 GF(p546^2) elements encoded in 414 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p546^2) encoded in 138 bytes. +int EphemeralSecretAgreement_B_SIDHp546(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp546" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p546) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p546^2), where a and b are defined over GF(p546), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^273-1] and [0, 2^Floor(Log(2,3^172)) - 1], resp. In the SIDH API, +// Alice's and Bob's private keys are encoded in 35 and 34 octets, resp., in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p546^2). In the SIDH API, they are encoded in 414 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p546^2). In the SIDH API, they are encoded in 138 octets. + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/P546/P546_internal.h b/SIKE_sw/src/P546/P546_internal.h new file mode 100644 index 0000000..6888b9a --- /dev/null +++ b/SIKE_sw/src/P546/P546_internal.h @@ -0,0 +1,175 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for P546 +*********************************************************************************************/ + +#ifndef P546_INTERNAL_H +#define P546_INTERNAL_H + +#include "../config.h" + + +#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 9 // Number of words of a 546-bit field element + #define p546_ZERO_WORDS 4 // Number of "0" digits in the least significant part of p546 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 18 + #define p546_ZERO_WORDS 8 +#endif + + +// Basic constants + +#define NBITS_FIELD 546 +#define MAXBITS_FIELD 576 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 546-bit field element +#define NBITS_ORDER 320 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 256-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 273 +#define OBOB_BITS 273 +#define OBOB_EXPON 172 +#define MASK_ALICE 0x01 +#define MASK_BOB 0xFF +#define PRIME p546 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 8 +#define MAX_INT_POINTS_BOB 8 +#define MAX_Alice 136 +#define MAX_Bob 172 +#define MSG_BYTES 24 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 546-bit field elements (576-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x546-bit field elements (2x576-bit max.) +typedef felm_t f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p546^2) + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + +#ifdef COMPRESS + typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj; // Point representation in full projective XYZ Montgomery coordinates + typedef point_full_proj point_full_proj_t[1]; + + typedef struct { f2elm_t x; f2elm_t y; } point_affine; // Point representation in affine coordinates. + typedef point_affine point_t[1]; + + typedef f2elm_t publickey_t[3]; +#endif + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// 546-bit multiprecision addition, c = a+b +void mp_add546(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add546_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 546-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p +extern void mp_sub546_p2(const digit_t* a, const digit_t* b, digit_t* c); +extern void mp_sub546_p4(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub546_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub546_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 2x546-bit multiprecision subtraction followed by addition with p546*2^576, c = a-b+(p546*2^576) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_subadd546x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Double 2x546-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub546x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy546(const digit_t* a, digit_t* c); + +// Zeroing a field element, a = 0 +void fpzero546(digit_t* a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal546_non_constant_time(const digit_t* a, const digit_t* b); + +// Modular addition, c = a+b mod p546 +extern void fpadd546(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd546_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p546 +extern void fpsub546(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub546_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p546 +extern void fpneg546(digit_t* a); + +// Modular division by two, c = a/2 mod p546. +void fpdiv2_546(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p546-1] to [0, p546-1]. +void fpcorrection546(digit_t* a); + +// 546-bit Montgomery reduction, c = a mod p +void rdc546_asm(digit_t* ma, digit_t* mc); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p546, where R=2^768 +void fpmul546_mont(const digit_t* a, const digit_t* b, digit_t* c); +void mul546_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p546, where R=2^768 +void fpsqr546_mont(const digit_t* ma, digit_t* mc); + +// Field inversion, a = a^-1 in GF(p546) +void fpinv546_mont(digit_t* a); + +// Chain to compute (p546-3)/4 using Montgomery arithmetic +void fpinv546_chain_mont(digit_t* a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p546^2) element, c = a +void fp2copy546(const f2elm_t a, f2elm_t c); + +// Zeroing a GF(p546^2) element, a = 0 +void fp2zero546(f2elm_t a); + +// GF(p546^2) negation, a = -a in GF(p546^2) +void fp2neg546(f2elm_t a); + +// GF(p546^2) addition, c = a+b in GF(p546^2) +extern void fp2add546(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p546^2) subtraction, c = a-b in GF(p546^2) +extern void fp2sub546(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p546^2) division by two, c = a/2 in GF(p546^2) +void fp2div2_546(const f2elm_t a, f2elm_t c); + +// Modular correction, a = a in GF(p546^2) +void fp2correction546(f2elm_t a); + +// GF(p546^2) squaring using Montgomery arithmetic, c = a^2 in GF(p546^2) +void fp2sqr546_mont(const f2elm_t a, f2elm_t c); + +// GF(p546^2) multiplication using Montgomery arithmetic, c = a*b in GF(p546^2) +void fp2mul546_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p546^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv546_mont(f2elm_t a); + + +#endif diff --git a/SIKE_sw/src/P546/generic/fp_generic.c b/SIKE_sw/src/P546/generic/fp_generic.c new file mode 100644 index 0000000..2cffafc --- /dev/null +++ b/SIKE_sw/src/P546/generic/fp_generic.c @@ -0,0 +1,259 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: portable modular arithmetic for P546 +*********************************************************************************************/ + +#include "../P546_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p546[NWORDS64_FIELD]; +extern const uint64_t p546p1[NWORDS64_FIELD]; +extern const uint64_t p546x2[NWORDS64_FIELD]; +extern const uint64_t p546x4[NWORDS64_FIELD]; + + +__inline void mp_sub546_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p546x2)[i], borrow, c[i]); + } +} + + +__inline void mp_sub546_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p546x4)[i], borrow, c[i]); + } +} + + +__inline void fpadd546(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p546. + // Inputs: a, b in [0, 2*p546-1] + // Output: c in [0, 2*p546-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p546x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p546x2)[i] & mask, carry, c[i]); + } +} + + +__inline void fpsub546(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p546. + // Inputs: a, b in [0, 2*p546-1] + // Output: c in [0, 2*p546-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p546x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void fpneg546(digit_t* a) +{ // Modular negation, a = -a mod p546. + // Input/output: a in [0, 2*p546-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p546x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_546(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p546. + // Input : a in [0, 2*p546-1] + // Output: c in [0, 2*p546-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p546 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p546)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection546(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p546-1] to [0, p546-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p546)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p546)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p546. + // mc = ma*R^-1 mod p546x2, where R = 2^576. + // If ma < 2^576*p546, the output mc is in the range [0, 2*p546-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p546_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p546_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p546p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p546p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} \ No newline at end of file diff --git a/SIKE_sw/src/P610/AMD64/fp_x64.c b/SIKE_sw/src/P610/AMD64/fp_x64.c new file mode 100644 index 0000000..f53ff76 --- /dev/null +++ b/SIKE_sw/src/P610/AMD64/fp_x64.c @@ -0,0 +1,729 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: modular arithmetic optimized for x64 platforms for P610 +*********************************************************************************************/ + +#include "../P610_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p610[NWORDS_FIELD]; +extern const uint64_t p610p1[NWORDS_FIELD]; +extern const uint64_t p610x2[NWORDS_FIELD]; +extern const uint64_t p610x4[NWORDS_FIELD]; + + +__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p610x2)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub610_p2_asm(a, b, c); + +#endif +} + + +__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p610x4)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub610_p4_asm(a, b, c); + +#endif +} + + +__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p610. + // Inputs: a, b in [0, 2*p610-1] + // Output: c in [0, 2*p610-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p610x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p610x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpadd610_asm(a, b, c); + +#endif +} + + +__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p610. + // Inputs: a, b in [0, 2*p610-1] + // Output: c in [0, 2*p610-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p610x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpsub610_asm(a, b, c); + +#endif +} + + +__inline void fpneg610(digit_t* a) +{ // Modular negation, a = -a mod p610. + // Input/output: a in [0, 2*p610-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p610x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_610(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p610. + // Input : a in [0, 2*p610-1] + // Output: c in [0, 2*p610-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p610 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p610)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection610(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p610-1] to [0, p610-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p610)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p610)[i] & mask, borrow, a[i]); + } +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + (void)nwords; + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[0], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[1], uv, carry, uv); + t += carry; + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + MULADD128(a[1], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[0], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[1], uv, carry, uv); + t += carry; + MULADD128(a[7], b[2], uv, carry, uv); + t += carry; + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + MULADD128(a[2], b[7], uv, carry, uv); + t += carry; + MULADD128(a[1], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[0], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[1], uv, carry, uv); + t += carry; + MULADD128(a[8], b[2], uv, carry, uv); + t += carry; + MULADD128(a[7], b[3], uv, carry, uv); + t += carry; + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + MULADD128(a[3], b[7], uv, carry, uv); + t += carry; + MULADD128(a[2], b[8], uv, carry, uv); + t += carry; + MULADD128(a[1], b[9], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[2], uv, carry, uv); + t += carry; + MULADD128(a[8], b[3], uv, carry, uv); + t += carry; + MULADD128(a[7], b[4], uv, carry, uv); + t += carry; + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + MULADD128(a[4], b[7], uv, carry, uv); + t += carry; + MULADD128(a[3], b[8], uv, carry, uv); + t += carry; + MULADD128(a[2], b[9], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[3], uv, carry, uv); + t += carry; + MULADD128(a[8], b[4], uv, carry, uv); + t += carry; + MULADD128(a[7], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[7], uv, carry, uv); + t += carry; + MULADD128(a[4], b[8], uv, carry, uv); + t += carry; + MULADD128(a[3], b[9], uv, carry, uv); + t += carry; + c[12] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[4], uv, carry, uv); + t += carry; + MULADD128(a[8], b[5], uv, carry, uv); + t += carry; + MULADD128(a[7], b[6], uv, carry, uv); + t += carry; + MULADD128(a[6], b[7], uv, carry, uv); + t += carry; + MULADD128(a[5], b[8], uv, carry, uv); + t += carry; + MULADD128(a[4], b[9], uv, carry, uv); + t += carry; + c[13] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[5], uv, carry, uv); + t += carry; + MULADD128(a[8], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[8], uv, carry, uv); + t += carry; + MULADD128(a[5], b[9], uv, carry, uv); + t += carry; + c[14] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[6], uv, carry, uv); + t += carry; + MULADD128(a[8], b[7], uv, carry, uv); + t += carry; + MULADD128(a[7], b[8], uv, carry, uv); + t += carry; + MULADD128(a[6], b[9], uv, carry, uv); + t += carry; + c[15] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[9], uv, carry, uv); + t += carry; + c[16] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[9], b[8], uv, carry, uv); + t += carry; + MULADD128(a[8], b[9], uv, carry, uv); + t += carry; + c[17] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[9], b[9], uv, carry, uv); + c[18] = uv[0]; + c[19] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + mul610_asm(a, b, c); + +#endif +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Montgomery reduction exploiting special form of the prime. + // mc = ma*R^-1 mod p610x2, where R = 2^640. + // If ma < 2^640*p610, the output mc is in the range [0, 2*p610-1]. + // ma is assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + mc[3] = ma[3]; + MUL128(mc[0], ((digit_t*)p610p1)[4], uv); + ADDC(0, uv[0], ma[4], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p610p1)[5], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p610p1)[4], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[13], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p610p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[14], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p610p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[15], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[7], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p610p1)[7], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[16], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[8], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p610p1)[8], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[17], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[9], ((digit_t*)p610p1)[9], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[18], carry, mc[8]); + ADDC(carry, uv[1], 0, carry, uv[1]); + ADDC(0, uv[1], ma[19], carry, mc[9]); + +#elif (OS_TARGET == OS_LINUX) + + rdc610_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/SIKE_sw/src/P610/AMD64/fp_x64_asm.S b/SIKE_sw/src/P610/AMD64/fp_x64_asm.S new file mode 100644 index 0000000..a799504 --- /dev/null +++ b/SIKE_sw/src/P610/AMD64/fp_x64_asm.S @@ -0,0 +1,1314 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// Copyright (c) Microsoft Corporation +// +// Website: https://github.com/microsoft/PQCrypto-SIDH +// Released under MIT license +// +// Abstract: field arithmetic in x64 assembly for P610 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(fpadd610_asm) +fmt(fpadd610_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + mov rdi, [reg_p1+72] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rcx, [reg_p2+64] + adc rdi, [reg_p2+72] + + mov rax, [rip+fmt(p610x2)] + sub r8, rax + mov rax, [rip+fmt(p610x2)+8] + sbb r9, rax + sbb r10, rax + sbb r11, rax + mov rax, [rip+fmt(p610x2)+32] + sbb r12, rax + mov rax, [rip+fmt(p610x2)+40] + sbb r13, rax + mov rax, [rip+fmt(p610x2)+48] + sbb r14, rax + mov rax, [rip+fmt(p610x2)+56] + sbb r15, rax + mov rax, [rip+fmt(p610x2)+64] + sbb rcx, rax + mov rax, [rip+fmt(p610x2)+72] + sbb rdi, rax + mov [reg_p3+64], rcx + mov [reg_p3+72], rdi + mov rax, 0 + sbb rax, 0 + + mov rsi, [rip+fmt(p610x2)] + and rsi, rax + mov rdi, [rip+fmt(p610x2)+8] + and rdi, rax + + add r8, rsi + adc r9, rdi + adc r10, rdi + adc r11, rdi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov rdi, [rip+fmt(p610x2)+32] + and rdi, rax + mov rsi, [rip+fmt(p610x2)+40] + and rsi, rax + mov r8, [rip+fmt(p610x2)+48] + and r8, rax + mov r9, [rip+fmt(p610x2)+56] + and r9, rax + mov r10, [rip+fmt(p610x2)+64] + and r10, rax + mov r11, [rip+fmt(p610x2)+72] + and r11, rax + + bt rcx, 0 + adc r12, rdi + adc r13, rsi + adc r14, r8 + adc r15, r9 + mov rsi, [reg_p3+64] + mov rdi, [reg_p3+72] + adc rsi, r10 + adc rdi, r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rsi + mov [reg_p3+72], rdi + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(fpsub610_asm) +fmt(fpsub610_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + mov rdi, [reg_p1+72] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rcx, [reg_p2+64] + sbb rdi, [reg_p2+72] + mov [reg_p3+64], rcx + mov [reg_p3+72], rdi + mov rax, 0 + sbb rax, 0 + + mov rsi, [rip+fmt(p610x2)] + and rsi, rax + mov rdi, [rip+fmt(p610x2)+8] + and rdi, rax + + add r8, rsi + adc r9, rdi + adc r10, rdi + adc r11, rdi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov rdi, [rip+fmt(p610x2)+32] + and rdi, rax + mov rsi, [rip+fmt(p610x2)+40] + and rsi, rax + mov r8, [rip+fmt(p610x2)+48] + and r8, rax + mov r9, [rip+fmt(p610x2)+56] + and r9, rax + mov r10, [rip+fmt(p610x2)+64] + and r10, rax + mov r11, [rip+fmt(p610x2)+72] + and r11, rax + + bt rcx, 0 + adc r12, rdi + adc r13, rsi + adc r14, r8 + adc r15, r9 + mov rsi, [reg_p3+64] + mov rdi, [reg_p3+72] + adc rsi, r10 + adc rdi, r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rsi + mov [reg_p3+72], rdi + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +///////////////////////////////////////////////////////////////// MACRO +.macro SUB610_PX P0 + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rax, [reg_p1+64] + mov rcx, [reg_p1+72] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rax, [reg_p2+64] + sbb rcx, [reg_p2+72] + + mov rdi, [rip+\P0] + mov rsi, [rip+\P0+8] + add r8, rdi + adc r9, rsi + adc r10, rsi + adc r11, rsi + mov rdi, [rip+\P0+32] + mov rsi, [rip+\P0+40] + adc r12, rdi + adc r13, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov rdi, [rip+\P0+48] + mov rsi, [rip+\P0+56] + adc r14, rdi + adc r15, rsi + mov rdi, [rip+\P0+64] + mov rsi, [rip+\P0+72] + adc rax, rdi + adc rcx, rsi + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rax + mov [reg_p3+72], rcx + + pop r15 + pop r14 + pop r13 + pop r12 + .endm + + +//*********************************************************************** +// Multiprecision subtraction with correction with 2*p610 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p610 +//*********************************************************************** +.global fmt(mp_sub610_p2_asm) +fmt(mp_sub610_p2_asm): + + SUB610_PX fmt(p610x2) + ret + + +//*********************************************************************** +// Multiprecision subtraction with correction with 4*p610 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p610 +//*********************************************************************** +.global fmt(mp_sub610_p4_asm) +fmt(mp_sub610_p4_asm): + + SUB610_PX fmt(p610x4) + ret + + +#ifdef _MULX_ + +/////////////////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C +// Temps: regs T0:T7 +/////////////////////////////////////////////////////////////////////////// +#ifdef _ADX_ + +.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + adox \T0, \T3 + adox \T2, \T5 + mulx \T1, \T3, 24\M1 + adox \T4, \T3 + mulx \T5, \T6, 32\M1 + adox \T1, \T6 + adox \T5, rax + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 8\C, \T0 // C1_final + adcx \T2, \T6 + mulx \T6, \T7, 8\M1 + adox \T2, \T7 + adcx \T4, \T6 + mulx \T0, \T6, 16\M1 + adox \T4, \T6 + adcx \T0, \T1 + mulx \T1, \T7, 24\M1 + adcx \T1, \T5 + adox \T0, \T7 + mulx \T5, \T6, 32\M1 + adcx \T5, rax + adox \T1, \T6 + adox \T5, rax + + mov rdx, 16\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T2, \T7 + mov 16\C, \T2 // C2_final + adcx \T4, \T6 + mulx \T6, \T7, 8\M1 + adox \T4, \T7 + adcx \T0, \T6 + mulx \T2, \T6, 16\M1 + adox \T0, \T6 + adcx \T1, \T2 + mulx \T2, \T7, 24\M1 + adcx \T5, \T2 + adox \T1, \T7 + mulx \T2, \T6, 32\M1 + adcx \T2, rax + adox \T5, \T6 + adox \T2, rax + + mov rdx, 24\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T4, \T7 + mov 24\C, \T4 // C3_final + adcx \T0, \T6 + mulx \T6, \T7, 8\M1 + adox \T0, \T7 + adcx \T1, \T6 + mulx \T4, \T6, 16\M1 + adox \T1, \T6 + adcx \T5, \T4 + mulx \T4, \T7, 24\M1 + adcx \T2, \T4 + adox \T5, \T7 + mulx \T4, \T6, 32\M1 + adcx \T4, rax + adox \T2, \T6 + adox \T4, rax + + mov rdx, 32\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 32\C, \T0 // C4_final + adcx \T1, \T6 + mulx \T6, \T7, 8\M1 + adox \T1, \T7 + adcx \T5, \T6 + mulx \T0, \T6, 16\M1 + adox \T5, \T6 + adcx \T2, \T0 + mulx \T0, \T7, 24\M1 + adcx \T4, \T0 + adox \T2, \T7 + mulx \T0, \T6, 32\M1 + adcx \T0, rax + adox \T4, \T6 + adox \T0, rax + + mov 40\C, \T1 + mov 48\C, \T5 + mov 56\C, \T2 + mov 64\C, \T4 + mov 72\C, \T0 +.endm + +#else + +.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + mulx \T4, \T5, 16\M1 + add \T0, \T3 + adc \T2, \T5 + mulx \T1, \T3, 24\M1 + adc \T3, \T4 + mulx \T5, \T6, 32\M1 + adc \T1, \T6 + adc \T5, 0 + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + add \T0, \T7 + mov 8\C, \T0 // C1_final + adc \T2, \T6 + mulx \T6, \T7, 8\M1 + adc \T3, \T6 + mulx \T0, \T4, 16\M1 + adc \T0, \T1 + mulx \T1, \T6, 24\M1 + adc \T5, \T1 + mulx \T1, rax, 32\M1 + adc \T1, 0 + + add \T2, \T7 + adc \T3, \T4 + adc \T0, \T6 + adc \T5, rax + adc \T1, 0 + + mov rdx, 16\M0 + mulx \T4, \T6, \M1 + add \T2, \T6 + mov 16\C, \T2 // C2_final + adc \T3, \T4 + mulx \T6, \T7, 8\M1 + adc \T0, \T6 + mulx \T2, \T4, 16\M1 + adc \T2, \T5 + mulx \T5, \T6, 24\M1 + adc \T1, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T3, \T7 + adc \T0, \T4 + adc \T2, \T6 + adc \T1, rax + adc \T5, 0 + + mov rdx, 24\M0 + mulx \T4, \T6, \M1 + add \T3, \T6 + mov 24\C, \T3 // C3_final + adc \T0, \T4 + mulx \T6, \T7, 8\M1 + adc \T2, \T6 + mulx \T3, \T4, 16\M1 + adc \T1, \T3 + mulx \T3, \T6, 24\M1 + adc \T3, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T0, \T7 + adc \T2, \T4 + adc \T1, \T6 + adc \T3, rax + adc \T5, 0 + + mov rdx, 32\M0 + mulx \T4, \T6, \M1 + add \T0, \T6 + mov 32\C, \T0 // C4_final + adc \T2, \T4 + mulx \T6, \T7, 8\M1 + adc \T1, \T6 + mulx \T0, \T4, 16\M1 + adc \T3, \T0 + mulx \T0, \T6, 24\M1 + adc \T0, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T2, \T7 + adc \T1, \T4 + adc \T3, \T6 + adc \T0, rax + adc \T5, 0 + mov 40\C, \T2 + mov 48\C, \T1 + mov 56\C, \T3 + mov 64\C, \T0 + mov 72\C, \T5 +.endm + +#endif + + +//***************************************************************************** +// 610-bit multiplication using Karatsuba (one level), schoolbook (two levels) +//***************************************************************************** +.global fmt(mul610_asm) +fmt(mul610_asm): + push r12 + push r13 + push r14 + push r15 + mov rcx, reg_p3 + + // [rsp] <- AH + AL, rax <- mask + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + push rbx + sub rsp, 112 + add r8, [reg_p1+40] + adc r9, [reg_p1+48] + adc r10, [reg_p1+56] + adc r11, [reg_p1+64] + adc r12, [reg_p1+72] + sbb rax, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + mov [rsp+32], r12 + + // [rsp+40] <- BH + BL, rdx <- mask + xor rdx, rdx + mov r8, [reg_p2] + mov r9, [reg_p2+8] + mov rbx, [reg_p2+16] + mov r13, [reg_p2+24] + mov r14, [reg_p2+32] + add r8, [reg_p2+40] + adc r9, [reg_p2+48] + adc rbx, [reg_p2+56] + adc r13, [reg_p2+64] + adc r14, [reg_p2+72] + sbb rdx, 0 + mov [rsp+40], r8 + mov [rsp+48], r9 + mov [rsp+56], rbx + mov [rsp+64], r13 + mov [rsp+72], r14 + + // [rcx] <- masked (BH + BL) + and r8, rax + and r9, rax + and rbx, rax + and r13, rax + and r14, rax + mov [rcx], r8 + mov [rcx+8], r9 + + // r8-r12 <- masked (AH + AL) + mov r8, [rsp] + mov r9, [rsp+8] + and r8, rdx + and r9, rdx + and r10, rdx + and r11, rdx + and r12, rdx + + // [rsp+80] <- masked (AH + AL) + masked (BH + BL) + mov rax, [rcx] + mov rdx, [rcx+8] + add r8, rax + adc r9, rdx + adc r10, rbx + adc r11, r13 + adc r12, r14 + mov [rsp+80], r8 + mov [rsp+88], r9 + mov [rsp+96], r10 + mov [rsp+104], r11 + + // [rcx] <- AL x BL + MUL320_SCHOOL [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, rbx, r13, r14, r15 // Result C0-C4 + + // [rcx+80] <- (AH+AL) x (BH+BL), low part + MUL320_SCHOOL [rsp], [rsp+40], [rcx+80], r8, r9, r10, r11, rbx, r13, r14, r15 + + // [rsp] <- AH x BH + MUL320_SCHOOL [reg_p1+40], [reg_p2+40], [rsp], r8, r9, r10, r11, rbx, r13, r14, r15 + + // r8-r12 <- (AH+AL) x (BH+BL), final step + mov r8, [rsp+80] + mov r9, [rsp+88] + mov r10, [rsp+96] + mov r11, [rsp+104] + mov rax, [rcx+120] + add r8, rax + mov rax, [rcx+128] + adc r9, rax + mov rax, [rcx+136] + adc r10, rax + mov rax, [rcx+144] + adc r11, rax + mov rax, [rcx+152] + adc r12, rax + + // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL + mov rdi, [rcx+80] + sub rdi, [rcx] + mov rdx, [rcx+88] + sbb rdx, [rcx+8] + mov rbx, [rcx+96] + sbb rbx, [rcx+16] + mov r13, [rcx+104] + sbb r13, [rcx+24] + mov r14, [rcx+112] + sbb r14, [rcx+32] + sbb r8, [rcx+40] + sbb r9, [rcx+48] + sbb r10, [rcx+56] + sbb r11, [rcx+64] + sbb r12, [rcx+72] + + // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub rdi, [rsp] + sbb rdx, [rsp+8] + sbb rbx, [rsp+16] + sbb r13, [rsp+24] + sbb r14, [rsp+32] + sbb r8, [rsp+40] + sbb r9, [rsp+48] + sbb r10, [rsp+56] + sbb r11, [rsp+64] + sbb r12, [rsp+72] + + mov rax, [rcx+40] + add rax, rdi + mov [rcx+40], rax // Result C5-C9 + mov rax, [rcx+48] + adc rax, rdx + mov [rcx+48], rax + mov rax, [rcx+56] + adc rax, rbx + mov [rcx+56], rax + mov rax, [rcx+64] + adc rax, r13 + mov [rcx+64], rax + mov rax, [rcx+72] + adc rax, r14 + mov [rcx+72], rax + mov rax, [rsp] + adc r8, rax + mov [rcx+80], r8 // Result C10-C19 + mov rax, [rsp+8] + adc r9, rax + mov [rcx+88], r9 + mov rax, [rsp+16] + adc r10, rax + mov [rcx+96], r10 + mov rax, [rsp+24] + adc r11, rax + mov [rcx+104], r11 + mov rax, [rsp+32] + adc r12, rax + mov [rcx+112], r12 + mov r8, [rsp+40] + mov r9, [rsp+48] + mov r10, [rsp+56] + mov r11, [rsp+64] + mov r12, [rsp+72] + adc r8, 0 + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc r12, 0 + add rsp, 112 + mov [rcx+120], r8 + mov [rcx+128], r9 + mov [rcx+136], r10 + mov [rcx+144], r11 + mov [rcx+152], r12 + + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#else + +//*********************************************************************** +// Integer multiplication +// Based on Karatsuba method +// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] +// NOTE: a=c or b=c are not allowed +//*********************************************************************** +.global fmt(mul610_asm) +fmt(mul610_asm): + + ret + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + +#endif + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: regs T0:T7 +// Temps: regs T8 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + adox \T1, \T4 + adox \T2, \T5 + mulx \T4, \T7, 24\M1 + adox \T3, \T7 + mulx \T5, \T6, 32\M1 + adox \T4, \T6 + mulx \T7, \T8, 40\M1 + adox \T5, \T8 + adox \T7, rax + + mov rdx, 8\M0 + mulx \T8, \T6, \M1 + adcx \T1, \T6 // T1 <- C1_final + adcx \T2, \T8 + mulx \T6, \T8, 8\M1 + adox \T2, \T8 + adcx \T3, \T6 + mulx \T6, \T8, 16\M1 + adox \T3, \T8 + adcx \T4, \T6 + mulx \T6, \T8, 24\M1 + adox \T4, \T8 + adcx \T5, \T6 + mulx \T6, \T8, 32\M1 + adox \T5, \T8 + adcx \T6, \T7 + mulx \T7, \T8, 40\M1 + adcx \T7, rax + adox \T6, \T8 + adox \T7, rax +.endm + +#else + +.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T3, 8\M1 + add \T1, \T3 + adc \T2, 0 + + mov rdx, 8\M0 + xor \T5, \T5 + mulx \T3, \T4, \M1 + add \T1, \T4 + adc \T2, \T3 + adc \T5, 0 + + xor \T6, \T6 + mulx \T3, \T4, 8\M1 + add \T2, \T4 + adc \T3, \T5 + adc \T6, 0 + + mov rdx, \M0 + mulx \T4, \T5, 16\M1 + add \T2, \T5 + adc \T3, \T4 + adc \T6, 0 + + xor \T7, \T7 + mulx \T4, \T5, 24\M1 + add \T3, \T5 + adc \T4, \T6 + adc \T7, 0 + + mov rdx, 8\M0 + mulx \T5, \T6, 16\M1 + add \T3, \T6 + adc \T4, \T5 + adc \T7, 0 + + xor \T6, \T6 + mulx \T5, \T8, 24\M1 + add \T4, \T8 + adc \T5, \T7 + adc \T6, 0 + + mov rdx, \M0 + mulx \T7, \T8, 32\M1 + add \T4, \T8 + adc \T5, \T7 + adc \T6, 0 + + xor \T7, \T7 + mulx \T8, rax, 40\M1 + add \T5, rax + adc \T6, \T8 + adc \T7, 0 + + mov rdx, 8\M0 + mulx \T8, rax, 32\M1 + add \T5, rax + adc \T6, \T8 + adc \T7, 0 + + mov rdx, 8\M0 + mulx \T8, rax, 40\M1 + add \T6, rax + adc \T7, \T8 +.endm +#endif + + +//************************************************************************************** +// Montgomery reduction +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//************************************************************************************** +.global fmt(rdc610_asm) +fmt(rdc610_asm): + push r12 + push r13 + push r14 + push r15 + + // a[0-1] x p610p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+32] + adc r9, [reg_p1+40] + adc r10, [reg_p1+48] + adc r11, [reg_p1+56] + adc r12, [reg_p1+64] + adc r13, [reg_p1+72] + adc r14, [reg_p1+80] + adc r15, [reg_p1+88] + adc rcx, [reg_p1+96] + mov [reg_p1+32], r8 + mov [reg_p1+40], r9 + mov [reg_p1+48], r10 + mov [reg_p1+56], r11 + mov [reg_p1+64], r12 + mov [reg_p1+72], r13 + mov [reg_p1+80], r14 + mov [reg_p1+88], r15 + mov [reg_p1+96], rcx + mov r8, [reg_p1+104] + mov r9, [reg_p1+112] + mov r10, [reg_p1+120] + mov r11, [reg_p1+128] + mov r12, [reg_p1+136] + mov r13, [reg_p1+144] + mov r14, [reg_p1+152] + adc r8, 0 + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc r12, 0 + adc r13, 0 + adc r14, 0 + mov [reg_p1+104], r8 + mov [reg_p1+112], r9 + mov [reg_p1+120], r10 + mov [reg_p1+128], r11 + mov [reg_p1+136], r12 + mov [reg_p1+144], r13 + mov [reg_p1+152], r14 + + // a[2-3] x p610p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+16], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+48] + adc r9, [reg_p1+56] + adc r10, [reg_p1+64] + adc r11, [reg_p1+72] + adc r12, [reg_p1+80] + adc r13, [reg_p1+88] + adc r14, [reg_p1+96] + adc r15, [reg_p1+104] + adc rcx, [reg_p1+112] + mov [reg_p1+48], r8 + mov [reg_p1+56], r9 + mov [reg_p1+64], r10 + mov [reg_p1+72], r11 + mov [reg_p1+80], r12 + mov [reg_p1+88], r13 + mov [reg_p1+96], r14 + mov [reg_p1+104], r15 + mov [reg_p1+112], rcx + mov r8, [reg_p1+120] + mov r9, [reg_p1+128] + mov r10, [reg_p1+136] + mov r11, [reg_p1+144] + mov r12, [reg_p1+152] + adc r8, 0 + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc r12, 0 + mov [reg_p1+120], r8 + mov [reg_p1+128], r9 + mov [reg_p1+136], r10 + mov [reg_p1+144], r11 + mov [reg_p1+152], r12 + + // a[4-5] x p610p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+32], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+64] + adc r9, [reg_p1+72] + adc r10, [reg_p1+80] + adc r11, [reg_p1+88] + adc r12, [reg_p1+96] + adc r13, [reg_p1+104] + adc r14, [reg_p1+112] + adc r15, [reg_p1+120] + adc rcx, [reg_p1+128] + mov [reg_p1+64], r8 + mov [reg_p1+72], r9 + mov [reg_p1+80], r10 + mov [reg_p1+88], r11 + mov [reg_p1+96], r12 + mov [reg_p1+104], r13 + mov [reg_p1+112], r14 + mov [reg_p1+120], r15 + mov [reg_p1+128], rcx + mov r8, [reg_p1+136] + mov r9, [reg_p1+144] + mov r10, [reg_p1+152] + adc r8, 0 + adc r9, 0 + adc r10, 0 + mov [reg_p1+136], r8 + mov [reg_p1+144], r9 + mov [reg_p1+152], r10 + + // a[6-7] x p610p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+48], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + xor rcx, rcx + add r8, [reg_p1+80] + adc r9, [reg_p1+88] + adc r10, [reg_p1+96] + adc r11, [reg_p1+104] + adc r12, [reg_p1+112] + adc r13, [reg_p1+120] + adc r14, [reg_p1+128] + adc r15, [reg_p1+136] + adc rcx, [reg_p1+144] + mov [reg_p2], r8 // C0_final + mov [reg_p2+8], r9 // C1_final + mov [reg_p1+96], r10 + mov [reg_p1+104], r11 + mov [reg_p1+112], r12 + mov [reg_p1+120], r13 + mov [reg_p1+128], r14 + mov [reg_p1+136], r15 + mov [reg_p1+144], rcx + mov r8, [reg_p1+152] + adc r8, 0 + mov [reg_p1+152], r8 + + // a[8-9] x p610p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+64], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx + + // Final result C2:C9 + add r8, [reg_p1+96] + adc r9, [reg_p1+104] + adc r10, [reg_p1+112] + adc r11, [reg_p1+120] + adc r12, [reg_p1+128] + adc r13, [reg_p1+136] + adc r14, [reg_p1+144] + adc r15, [reg_p1+152] + mov [reg_p2+16], r8 + mov [reg_p2+24], r9 + mov [reg_p2+32], r10 + mov [reg_p2+40], r11 + mov [reg_p2+48], r12 + mov [reg_p2+56], r13 + mov [reg_p2+64], r14 + mov [reg_p2+72], r15 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + #else + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//*********************************************************************** +.global fmt(rdc610_asm) +fmt(rdc610_asm): + + ret + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + + #endif + + +//*********************************************************************** +// 610-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(mp_add610_asm) +fmt(mp_add610_asm): + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rax, [reg_p1+32] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc rax, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rax + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov rax, [reg_p1+72] + adc r8, [reg_p2+40] + adc r9, [reg_p2+48] + adc r10, [reg_p2+56] + adc r11, [reg_p2+64] + adc rax, [reg_p2+72] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + mov [reg_p3+64], r11 + mov [reg_p3+72], rax + ret + + +//*********************************************************************** +// 2x610-bit multiprecision subtraction/addition +// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p610*2^640 +//*********************************************************************** +.global fmt(mp_subadd610x2_asm) +fmt(mp_subadd610x2_asm): + push r12 + push r13 + push r14 + push r15 + push rbx + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rcx, [reg_p1+32] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb rcx, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rcx + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov rcx, [reg_p1+72] + sbb r8, [reg_p2+40] + sbb r9, [reg_p2+48] + sbb r10, [reg_p2+56] + sbb r11, [reg_p2+64] + sbb rcx, [reg_p2+72] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + mov [reg_p3+64], r11 + mov [reg_p3+72], rcx + + mov r8, [reg_p1+80] + mov r9, [reg_p1+88] + mov r10, [reg_p1+96] + mov r11, [reg_p1+104] + mov rcx, [reg_p1+112] + sbb r8, [reg_p2+80] + sbb r9, [reg_p2+88] + sbb r10, [reg_p2+96] + sbb r11, [reg_p2+104] + sbb rcx, [reg_p2+112] + mov [reg_p3+80], r8 + mov [reg_p3+88], r9 + mov [reg_p3+96], r10 + mov [reg_p3+104], r11 + mov [reg_p3+112], rcx + + mov r8, [reg_p1+120] + mov r9, [reg_p1+128] + mov r10, [reg_p1+136] + mov r11, [reg_p1+144] + mov rcx, [reg_p1+152] + sbb r8, [reg_p2+120] + sbb r9, [reg_p2+128] + sbb r10, [reg_p2+136] + sbb r11, [reg_p2+144] + sbb rcx, [reg_p2+152] + sbb rax, 0 + + // Add p610 anded with the mask in rax + mov r12, [rip+fmt(p610)] + mov r13, [rip+fmt(p610)+32] + mov r14, [rip+fmt(p610)+40] + mov r15, [rip+fmt(p610)+48] + mov rdi, [rip+fmt(p610)+56] + mov rsi, [rip+fmt(p610)+64] + mov rbx, [rip+fmt(p610)+72] + and r12, rax + and r13, rax + and r14, rax + and r15, rax + and rdi, rax + and rsi, rax + and rbx, rax + mov rax, [reg_p3+80] + add rax, r12 + mov [reg_p3+80], rax + mov rax, [reg_p3+88] + adc rax, r12 + mov [reg_p3+88], rax + mov rax, [reg_p3+96] + adc rax, r12 + mov [reg_p3+96], rax + adc r12, [reg_p3+104] + adc r13, [reg_p3+112] + mov [reg_p3+104], r12 + mov [reg_p3+112], r13 + adc r8, r14 + adc r9, r15 + adc r10, rdi + adc r11, rsi + adc rcx, rbx + + mov [reg_p3+120], r8 + mov [reg_p3+128], r9 + mov [reg_p3+136], r10 + mov [reg_p3+144], r11 + mov [reg_p3+152], rcx + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Double 2x610-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(mp_dblsub610x2_asm) +fmt(mp_dblsub610x2_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + mov r14, [reg_p3+48] + mov r15, [reg_p3+56] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + sbb r14, [reg_p1+48] + sbb r15, [reg_p1+56] + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + + mov r8, [reg_p3+64] + mov r9, [reg_p3+72] + mov r10, [reg_p3+80] + mov r11, [reg_p3+88] + mov r12, [reg_p3+96] + mov r13, [reg_p3+104] + mov r14, [reg_p3+112] + mov r15, [reg_p3+120] + bt rax, 0 + sbb r8, [reg_p1+64] + sbb r9, [reg_p1+72] + sbb r10, [reg_p1+80] + sbb r11, [reg_p1+88] + sbb r12, [reg_p1+96] + sbb r13, [reg_p1+104] + sbb r14, [reg_p1+112] + sbb r15, [reg_p1+120] + setc al + bt rcx, 0 + sbb r8, [reg_p2+64] + sbb r9, [reg_p2+72] + sbb r10, [reg_p2+80] + sbb r11, [reg_p2+88] + sbb r12, [reg_p2+96] + sbb r13, [reg_p2+104] + sbb r14, [reg_p2+112] + sbb r15, [reg_p2+120] + setc cl + mov [reg_p3+64], r8 + mov [reg_p3+72], r9 + mov [reg_p3+80], r10 + mov [reg_p3+88], r11 + mov [reg_p3+96], r12 + mov [reg_p3+104], r13 + mov [reg_p3+112], r14 + mov [reg_p3+120], r15 + + mov r8, [reg_p3+128] + mov r9, [reg_p3+136] + mov r10, [reg_p3+144] + mov r11, [reg_p3+152] + bt rax, 0 + sbb r8, [reg_p1+128] + sbb r9, [reg_p1+136] + sbb r10, [reg_p1+144] + sbb r11, [reg_p1+152] + bt rcx, 0 + sbb r8, [reg_p2+128] + sbb r9, [reg_p2+136] + sbb r10, [reg_p2+144] + sbb r11, [reg_p2+152] + mov [reg_p3+128], r8 + mov [reg_p3+136], r9 + mov [reg_p3+144], r10 + mov [reg_p3+152], r11 + + pop r15 + pop r14 + pop r13 + pop r12 + ret \ No newline at end of file diff --git a/SIKE_sw/src/P610/P610.c b/SIKE_sw/src/P610/P610.c new file mode 100644 index 0000000..cec3d9b --- /dev/null +++ b/SIKE_sw/src/P610/P610.c @@ -0,0 +1,140 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny parameters and generation of functions for P610 +*********************************************************************************************/ + +#include "P610_api.h" +#include "P610_internal.h" +#include "../internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 610-bit field element is represented with Ceil(610 / 64) = 10 64-bit digits or Ceil(610 / 32) = 20 32-bit digits. + +// +// Curve isogeny system "SIDHp610". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p610^2), where A=6, B=1, C=1 and p610 = 2^305*3^192-1 +// + +const uint64_t p610[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF, + 0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768 }; +const uint64_t p610x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF, + 0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1 }; +const uint64_t p610x4[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xB807FFFFFFFFFFFF, + 0xC5E137A2A96AC0B9, 0x6B9EFD14123FE6AE, 0xC956CBE8431094AA, 0x06404309479F6232, 0x00000009EFDA9DA2 }; +const uint64_t p610p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000, + 0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768 }; +const uint64_t p610x16p[2*NWORDS64_FIELD] = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x3FC0000000000000, + 0xD0F642EAB4A9FA32, 0xA308175F6E00CA89, 0xB549A0BDE77B5AAC, 0xCDFDE7B5C304EE69, 0x7FDB7FF0812B12EF, + 0xE09BA529B9FE1167, 0xD249C196DAB8CD7F, 0xD4E22754A3F20928, 0x97825638B19A7CCE, 0x05E04550FC4CCE0D, + 0x8FB5DA1152CDE50C, 0xF9649BA3EA408644, 0x4473C93E6441063D, 0xBE190269D1337B7B, 0x0000000000000062 }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0002000000000000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0x26F4552D58173701, 0xDFA28247FCD5D8BC, 0xD97D086212954D73, 0x086128F3EC46592A, 0x00013DFB53B440C8 }; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p610^2), expressed in Montgomery representation +const uint64_t A_gen[6 * NWORDS64_FIELD] = { 0x5019EC96A75AC57A, 0x8AEA0E717712C6F1, 0x03C067C819D29E5E, 0x59F454425FE307D9, 0x6D29215D9AD5E6D4, + 0xD8C5A27CDC9DD34A, 0x972DC274DAB435B3, 0x82A597C70A80E10F, 0x48175986EFED547F, 0x00000000671A3592, // XPA0 + 0xE4BA9CC3EEEC53F4, 0xBD34E4FEDB0132D3, 0x1B7125C87BEE960C, 0x25D615BF3CFAA355, 0xFC8EC20DC367D66A, + 0xB44F3FD1CC73289C, 0xD84BF51195C2E012, 0x38D7C756EB370F48, 0xBBC236249F94F72A, 0x000000013020CC63, // XPA1 + 0x1D7C945D3DBCC38C, 0x9A5F7C12CA8BA5B9, 0x1E8F87985B01CBE3, 0xD2CABF82F5BC5235, 0x3BDE474ECCA9FAA2, + 0xB98CD975DF9FB0A8, 0x444E4464B9C67790, 0xCB2E888565CE6AD9, 0xDB64FFE2A1C350E2, 0x00000001D7532756, // XQA0 + 0x1E8B3AA2382C9079, 0x28CB31E08A943C00, 0xE04D02266E8A63E1, 0x84A2D260214EF65F, 0xD5933DA25018E226, + 0xBC8BF038928C4BA9, 0x91E9D0CB7EAF58A9, 0x04A4627B75E008E1, 0x58CEF27583E50C2E, 0x00000002170DDF44, // XQA1 + 0x261DD0782CEC958D, 0xC25B3AE64BBC0311, 0x9F21B8A8981B15FE, 0xA3C0B52CD5FFC45B, 0x5D2E65A016702C6A, + 0x8C5586CA98722EDE, 0x61490A967A6B4B1A, 0xFA64E30231F719AF, 0x9CEAB8B6301BB2DF, 0x00000000CF5AEA7D, // XRA0 + 0xB980435A77B912C0, 0x2B4A97F70E0FC873, 0x415C7FA4DE96F43C, 0xE5EED95643E443FD, 0xCBE18DB57C51B354, + 0x51C96C3FFABD2D46, 0x5C14637B9A5765D6, 0x45D2369C4D0199A5, 0x25A1F9C5BBF1E683, 0x000000025AD7A11B }; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p610^2), expressed in Montgomery representation +const uint64_t B_gen[6 * NWORDS64_FIELD] = { 0xC6C8E180E41884BA, 0x2161D2F4FBC32B95, 0xCBF83091BDB34092, 0xD742CC0AD4CC7E38, 0x61A1FA7E1B14FBD7, + 0xF0E5FC70137597C4, 0x1F0C8F2585E20B1F, 0xC68E44A1C032A4C2, 0xE3C65FB8AF155A0D, 0x00000001409EE8D5, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0xF586DB4A16BE1880, 0x712F10D95E6C65A9, 0x9D5AAC3B83584B87, 0x4ECDAA98182C8261, 0xAD7D4C15588FD230, + 0x4197C54E96B7D926, 0xED15BB13E8C588ED, 0x3E299AEAD5AAD7C7, 0xF36B25F1BD579F79, 0x000000021CE65B5B, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0x7A87897A0C4C3FD7, 0x3C1879ECD4D33D76, 0x595C28A36FFBA1A0, 0xF53FF66A2A7FD0FB, 0xB39F5A91230E56FA, + 0x81F21610DA3EA8B5, 0xEBB3B9A627428A90, 0x8661123B35748010, 0xE196173B9C48781D, 0x00000002198166AC, // XRB0 + 0x5E3CC79B37006D6A, 0xE0358A9AB2EA7923, 0x3B725CB595180951, 0x0724637F1DD0C191, 0x7BB031B67DAB9D19, + 0x53CCB8BECEDD3435, 0xEE5DF7FFEBFA7A0A, 0x899EDB7D8B9694C4, 0x0CA38EB4AE5506B6, 0x00000001489DE1CD }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^640)^2 mod p610 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0xE75F5D201A197727, 0xE0B85963B627392E, 0x6BC1707818DE493D, 0xDC7F419940D1A0C5, 0x7358030979EDE54A, + 0x84F4BEBDEED75A5C, 0x7ECCA66E13427B47, 0xC5BB4E65280080B3, 0x7019950F516DA19A, 0x000000008E290FF3 }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000670CC8E6, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x9A34000000000000, + 0x4D99C2BD28717A3F, 0x0A4A1839A323D41C, 0xD2B62215D06AD1E2, 0x1369026E862CAF3D, 0x000000010894E964 }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +67, 37, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, +2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 16, 9, +5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, +1, 4, 2, 1, 1, 2, 1, 1, 33, 16, 8, 5, 2, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 2, 1, +1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, +4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +86, 48, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, +1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, +1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, +1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 38, +21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, +9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9, 5, 3, 2, 1, 1, +1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, +1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy610 +#define fpzero fpzero610 +#define fpadd fpadd610 +#define fpsub fpsub610 +#define fpneg fpneg610 +#define fpdiv2 fpdiv2_610 +#define fpcorrection fpcorrection610 +#define fpmul_mont fpmul610_mont +#define fpsqr_mont fpsqr610_mont +#define fpinv_mont fpinv610_mont +#define fpinv_chain_mont fpinv610_chain_mont +#define fp2copy fp2copy610 +#define fp2zero fp2zero610 +#define fp2add fp2add610 +#define fp2sub fp2sub610 +#define mp_sub_p2 mp_sub610_p2 +#define mp_sub_p4 mp_sub610_p4 +#define sub_p4 mp_sub_p4 +#define fp2neg fp2neg610 +#define fp2div2 fp2div2_610 +#define fp2correction fp2correction610 +#define fp2mul_mont fp2mul610_mont +#define fp2sqr_mont fp2sqr610_mont +#define fp2inv_mont fp2inv610_mont +#define fp2inv_mont_bingcd fp2inv610_mont_bingcd +#define fpequal_non_constant_time fpequal610_non_constant_time +#define mp_add_asm mp_add610_asm +#define mp_subaddx2_asm mp_subadd610x2_asm +#define mp_dblsubx2_asm mp_dblsub610x2_asm +#define crypto_kem_keypair crypto_kem_keypair_SIKEp610 +#define crypto_kem_enc crypto_kem_enc_SIKEp610 +#define crypto_kem_dec crypto_kem_dec_SIKEp610 +#define random_mod_order_A random_mod_order_A_SIDHp610 +#define random_mod_order_B random_mod_order_B_SIDHp610 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_SIDHp610 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_SIDHp610 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_SIDHp610 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_SIDHp610 + +#include "../fpx.c" +#include "../ec_isogeny.c" +#include "../sidh.c" +#include "../sike.c" \ No newline at end of file diff --git a/SIKE_sw/src/P610/P610_api.h b/SIKE_sw/src/P610/P610_api.h new file mode 100644 index 0000000..40bec58 --- /dev/null +++ b/SIKE_sw/src/P610/P610_api.h @@ -0,0 +1,112 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: API header file for P610 +*********************************************************************************************/ + +#ifndef P610_API_H +#define P610_API_H + + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 524 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 462 +#define CRYPTO_BYTES 24 +#define CRYPTO_CIPHERTEXTBYTES 486 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Algorithm name +#define CRYPTO_ALGNAME "SIKEp610" + +// SIKE's key generation +// It produces a private key sk and computes the public key pk. +// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 524 bytes) +// public key pk (CRYPTO_PUBLICKEYBYTES = 462 bytes) +int crypto_kem_keypair_SIKEp610(unsigned char *pk, unsigned char *sk); + +// SIKE's encapsulation +// Input: public key pk (CRYPTO_PUBLICKEYBYTES = 462 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 24 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 486 bytes) +int crypto_kem_enc_SIKEp610(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +// SIKE's decapsulation +// Input: secret key sk (CRYPTO_SECRETKEYBYTES = 524 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 486 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 24 bytes) +int crypto_kem_dec_SIKEp610(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +// Encoding of keys for KEM-based isogeny system "SIKEp610" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p610) are encoded in 77 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p610^2), where a and b are defined over GF(p610), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^Floor(Log(2,3^192))-1] and the public key pk. In the SIKE API, +// private keys are encoded in 524 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p610^2). In the SIKE API, pk is encoded in 462 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 462 + 24 = 486 octets. +// Shared keys ss consist of a value of 24 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES_A 39 +#define SIDH_SECRETKEYBYTES_B 38 +#define SIDH_PUBLICKEYBYTES 462 +#define SIDH_BYTES 154 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^305 - 1] to be used as Alice's private key +void random_mod_order_A_SIDHp610(unsigned char* random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^192)) - 1] to be used as Bob's private key +void random_mod_order_B_SIDHp610(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^305 - 1], stored in 38 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p610^2) elements encoded in 462 bytes. +int EphemeralKeyGeneration_A_SIDHp610(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^192)) - 1], stored in 38 bytes. +// The public key consists of 3 GF(p610^2) elements encoded in 462 bytes. +int EphemeralKeyGeneration_B_SIDHp610(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^305 - 1], stored in 38 bytes. +// Bob's PublicKeyB consists of 3 GF(p610^2) elements encoded in 462 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p610^2) encoded in 154 bytes. +int EphemeralSecretAgreement_A_SIDHp610(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^192)) - 1], stored in 38 bytes. +// Alice's PublicKeyA consists of 3 GF(p610^2) elements encoded in 462 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p610^2) encoded in 154 bytes. +int EphemeralSecretAgreement_B_SIDHp610(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp610" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p610) are encoded in 77 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p610^2), where a and b are defined over GF(p610), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^305-1] and [0, 2^Floor(Log(2,3^192)) - 1], resp. In the SIDH API, +// Alice's and Bob's private keys are encoded in 39 and 38 octets, resp., in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p610^2). In the SIDH API, they are encoded in 462 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p610^2). In the SIDH API, they are encoded in 154 octets. + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/P610/P610_internal.h b/SIKE_sw/src/P610/P610_internal.h new file mode 100644 index 0000000..bf99702 --- /dev/null +++ b/SIKE_sw/src/P610/P610_internal.h @@ -0,0 +1,174 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for P610 +*********************************************************************************************/ + +#ifndef P610_INTERNAL_H +#define P610_INTERNAL_H + +#include "../config.h" + + +#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 10 // Number of words of a 610-bit field element + #define p610_ZERO_WORDS 4 // Number of "0" digits in the least significant part of p610 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 20 + #define p610_ZERO_WORDS 9 +#endif + + +// Basic constants + +#define NBITS_FIELD 610 +#define MAXBITS_FIELD 640 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 610-bit field element +#define NBITS_ORDER 320 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 320-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 305 +#define OBOB_BITS 305 +#define OBOB_EXPON 192 +#define MASK_ALICE 0x01 +#define MASK_BOB 0xFF +#define PRIME p610 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 8 +#define MAX_INT_POINTS_BOB 10 +#define MAX_Alice 152 +#define MAX_Bob 192 +#define MSG_BYTES 24 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 610-bit field elements (640-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x610-bit field elements (2x640-bit max.) +typedef felm_t f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p610^2) + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + +#ifdef COMPRESS + typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj; // Point representation in full projective XYZ Montgomery coordinates + typedef point_full_proj point_full_proj_t[1]; + + typedef struct { f2elm_t x; f2elm_t y; } point_affine; // Point representation in affine coordinates. + typedef point_affine point_t[1]; + + typedef f2elm_t publickey_t[3]; +#endif + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// 610-bit multiprecision addition, c = a+b +void mp_add610(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add610_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 610-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p +extern void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c); +extern void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub610_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub610_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 2x610-bit multiprecision subtraction followed by addition with p610*2^640, c = a-b+(p610*2^640) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_subadd610x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Double 2x610-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub610x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy610(const digit_t* a, digit_t* c); + +// Zeroing a field element, a = 0 +void fpzero610(digit_t* a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal610_non_constant_time(const digit_t* a, const digit_t* b); + +// Modular addition, c = a+b mod p610 +extern void fpadd610(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd610_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p610 +extern void fpsub610(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub610_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p610 +extern void fpneg610(digit_t* a); + +// Modular division by two, c = a/2 mod p610. +void fpdiv2_610(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p610-1] to [0, p610-1]. +void fpcorrection610(digit_t* a); + +// 610-bit Montgomery reduction, c = a mod p +void rdc610_asm(digit_t* ma, digit_t* mc); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640 +void fpmul610_mont(const digit_t* a, const digit_t* b, digit_t* c); +void mul610_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640 +void fpsqr610_mont(const digit_t* ma, digit_t* mc); + +// Field inversion, a = a^-1 in GF(p610) +void fpinv610_mont(digit_t* a); + +// Chain to compute (p610-3)/4 using Montgomery arithmetic +void fpinv610_chain_mont(digit_t* a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p610^2) element, c = a +void fp2copy610(const f2elm_t a, f2elm_t c); + +// Zeroing a GF(p610^2) element, a = 0 +void fp2zero610(f2elm_t a); + +// GF(p610^2) negation, a = -a in GF(p610^2) +void fp2neg610(f2elm_t a); + +// GF(p610^2) addition, c = a+b in GF(p610^2) +extern void fp2add610(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p610^2) subtraction, c = a-b in GF(p610^2) +extern void fp2sub610(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p610^2) division by two, c = a/2 in GF(p610^2) +void fp2div2_610(const f2elm_t a, f2elm_t c); + +// Modular correction, a = a in GF(p610^2) +void fp2correction610(f2elm_t a); + +// GF(p610^2) squaring using Montgomery arithmetic, c = a^2 in GF(p610^2) +void fp2sqr610_mont(const f2elm_t a, f2elm_t c); + +// GF(p610^2) multiplication using Montgomery arithmetic, c = a*b in GF(p610^2) +void fp2mul610_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p610^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv610_mont(f2elm_t a); + + +#endif diff --git a/SIKE_sw/src/P610/generic/fp_generic.c b/SIKE_sw/src/P610/generic/fp_generic.c new file mode 100644 index 0000000..f6bb529 --- /dev/null +++ b/SIKE_sw/src/P610/generic/fp_generic.c @@ -0,0 +1,259 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: portable modular arithmetic for P610 +*********************************************************************************************/ + +#include "../P610_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p610[NWORDS64_FIELD]; +extern const uint64_t p610p1[NWORDS64_FIELD]; +extern const uint64_t p610x2[NWORDS64_FIELD]; +extern const uint64_t p610x4[NWORDS64_FIELD]; + + +__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p610x2)[i], borrow, c[i]); + } +} + + +__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p610x4)[i], borrow, c[i]); + } +} + + +__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p610. + // Inputs: a, b in [0, 2*p610-1] + // Output: c in [0, 2*p610-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p610x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p610x2)[i] & mask, carry, c[i]); + } +} + + +__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p610. + // Inputs: a, b in [0, 2*p610-1] + // Output: c in [0, 2*p610-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p610x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void fpneg610(digit_t* a) +{ // Modular negation, a = -a mod p610. + // Input/output: a in [0, 2*p610-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p610x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_610(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p610. + // Input : a in [0, 2*p610-1] + // Output: c in [0, 2*p610-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p610 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p610)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection610(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p610-1] to [0, p610-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p610)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p610)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610. + // mc = ma*R^-1 mod p610x2, where R = 2^768. + // If ma < 2^768*p610, the output mc is in the range [0, 2*p610-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p610_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p610_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p610p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p610p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} \ No newline at end of file diff --git a/SIKE_sw/src/P697/AMD64/fp_x64.c b/SIKE_sw/src/P697/AMD64/fp_x64.c new file mode 100644 index 0000000..a0545a3 --- /dev/null +++ b/SIKE_sw/src/P697/AMD64/fp_x64.c @@ -0,0 +1,802 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: modular arithmetic optimized for x64 platforms for P697 +*********************************************************************************************/ + +#include "../P697_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p697[NWORDS_FIELD]; +extern const uint64_t p697p1[NWORDS_FIELD]; +extern const uint64_t p697x2[NWORDS_FIELD]; +extern const uint64_t p697x4[NWORDS_FIELD]; + + +__inline void mp_sub697_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p697x2)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub697_p2_asm(a, b, c); + +#endif +} + + +__inline void mp_sub697_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p697x4)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub697_p4_asm(a, b, c); + +#endif +} + + +__inline void fpadd697(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p697. + // Inputs: a, b in [0, 2*p697-1] + // Output: c in [0, 2*p697-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p697x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p697x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpadd697_asm(a, b, c); + +#endif +} + + +__inline void fpsub697(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p697. + // Inputs: a, b in [0, 2*p697-1] + // Output: c in [0, 2*p697-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p697x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpsub697_asm(a, b, c); + +#endif +} + + +__inline void fpneg697(digit_t* a) +{ // Modular negation, a = -a mod p697. + // Input/output: a in [0, 2*p697-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p697x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_697(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p697. + // Input : a in [0, 2*p697-1] + // Output: c in [0, 2*p697-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p697 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p697)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection697(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p697-1] to [0, p697-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p697)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p697)[i] & mask, borrow, a[i]); + } +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + (void)nwords; + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[0], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[1], uv, carry, uv); + t += carry; + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + MULADD128(a[1], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[0], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[1], uv, carry, uv); + t += carry; + MULADD128(a[7], b[2], uv, carry, uv); + t += carry; + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + MULADD128(a[2], b[7], uv, carry, uv); + t += carry; + MULADD128(a[1], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[0], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[10], uv, carry, uv); + t += carry; + MULADD128(a[9], b[1], uv, carry, uv); + t += carry; + MULADD128(a[8], b[2], uv, carry, uv); + t += carry; + MULADD128(a[7], b[3], uv, carry, uv); + t += carry; + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + MULADD128(a[3], b[7], uv, carry, uv); + t += carry; + MULADD128(a[2], b[8], uv, carry, uv); + t += carry; + MULADD128(a[1], b[9], uv, carry, uv); + t += carry; + MULADD128(a[10], b[0], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[1], uv, carry, uv); + t += carry; + MULADD128(a[9], b[2], uv, carry, uv); + t += carry; + MULADD128(a[8], b[3], uv, carry, uv); + t += carry; + MULADD128(a[7], b[4], uv, carry, uv); + t += carry; + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + MULADD128(a[4], b[7], uv, carry, uv); + t += carry; + MULADD128(a[3], b[8], uv, carry, uv); + t += carry; + MULADD128(a[2], b[9], uv, carry, uv); + t += carry; + MULADD128(a[1], b[10], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[2], uv, carry, uv); + t += carry; + MULADD128(a[9], b[3], uv, carry, uv); + t += carry; + MULADD128(a[8], b[4], uv, carry, uv); + t += carry; + MULADD128(a[7], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[7], uv, carry, uv); + t += carry; + MULADD128(a[4], b[8], uv, carry, uv); + t += carry; + MULADD128(a[3], b[9], uv, carry, uv); + t += carry; + MULADD128(a[2], b[10], uv, carry, uv); + t += carry; + c[12] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[3], uv, carry, uv); + t += carry; + MULADD128(a[9], b[4], uv, carry, uv); + t += carry; + MULADD128(a[8], b[5], uv, carry, uv); + t += carry; + MULADD128(a[7], b[6], uv, carry, uv); + t += carry; + MULADD128(a[6], b[7], uv, carry, uv); + t += carry; + MULADD128(a[5], b[8], uv, carry, uv); + t += carry; + MULADD128(a[4], b[9], uv, carry, uv); + t += carry; + MULADD128(a[3], b[10], uv, carry, uv); + t += carry; + c[13] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[4], uv, carry, uv); + t += carry; + MULADD128(a[9], b[5], uv, carry, uv); + t += carry; + MULADD128(a[8], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[8], uv, carry, uv); + t += carry; + MULADD128(a[5], b[9], uv, carry, uv); + t += carry; + MULADD128(a[4], b[10], uv, carry, uv); + t += carry; + c[14] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[5], uv, carry, uv); + t += carry; + MULADD128(a[9], b[6], uv, carry, uv); + t += carry; + MULADD128(a[8], b[7], uv, carry, uv); + t += carry; + MULADD128(a[7], b[8], uv, carry, uv); + t += carry; + MULADD128(a[6], b[9], uv, carry, uv); + t += carry; + MULADD128(a[5], b[10], uv, carry, uv); + t += carry; + c[15] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[6], uv, carry, uv); + t += carry; + MULADD128(a[9], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[9], uv, carry, uv); + t += carry; + MULADD128(a[6], b[10], uv, carry, uv); + t += carry; + c[16] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[7], uv, carry, uv); + t += carry; + MULADD128(a[9], b[8], uv, carry, uv); + t += carry; + MULADD128(a[8], b[9], uv, carry, uv); + t += carry; + MULADD128(a[7], b[10], uv, carry, uv); + t += carry; + c[17] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[10], uv, carry, uv); + t += carry; + c[18] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[10], b[9], uv, carry, uv); + t += carry; + MULADD128(a[9], b[10], uv, carry, uv); + t += carry; + c[19] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[10], b[10], uv, carry, uv); + c[20] = uv[0]; + c[21] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + mul697_asm(a, b, c); + +#endif +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Montgomery reduction exploiting special form of the prime. + // mc = ma*R^-1 mod p697x2, where R = 2^704. + // If ma < 2^704*p697, the output mc is in the range [0, 2*p697-1]. + // ma is assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + mc[3] = ma[3]; + mc[4] = ma[4]; + MUL128(mc[0], ((digit_t*)p697p1)[5], uv); + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p697p1)[6], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[13], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[14], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p697p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[15], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p697p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[16], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[7], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p697p1)[7], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[17], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[8], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p697p1)[8], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[18], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[9], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p697p1)[9], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[19], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[10], ((digit_t*)p697p1)[10], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[20], carry, mc[9]); + ADDC(carry, uv[1], 0, carry, uv[1]); + ADDC(0, uv[1], ma[21], carry, mc[10]); + +#elif (OS_TARGET == OS_LINUX) + + rdc697_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/SIKE_sw/src/P697/AMD64/fp_x64_asm.S b/SIKE_sw/src/P697/AMD64/fp_x64_asm.S new file mode 100644 index 0000000..e00fab4 --- /dev/null +++ b/SIKE_sw/src/P697/AMD64/fp_x64_asm.S @@ -0,0 +1,1681 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// Copyright (c) Microsoft Corporation +// +// Website: https://github.com/microsoft/PQCrypto-SIDH +// Released under MIT license +// +// Abstract: field arithmetic in x64 assembly for P697 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(fpadd697_asm) +fmt(fpadd697_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + mov rax, [reg_p1+72] + mov rdi, [reg_p1+80] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rcx, [reg_p2+64] + adc rax, [reg_p2+72] + adc rdi, [reg_p2+80] + mov [reg_p3+72], rax + + mov rax, [rip+fmt(p697x2)] + sub r8, rax + mov rax, [rip+fmt(p697x2)+8] + sbb r9, rax + sbb r10, rax + sbb r11, rax + sbb r12, rax + mov rax, [rip+fmt(p697x2)+40] + sbb r13, rax + mov rax, [rip+fmt(p697x2)+48] + sbb r14, rax + mov rax, [rip+fmt(p697x2)+56] + sbb r15, rax + mov rax, [rip+fmt(p697x2)+64] + sbb rcx, rax + mov rsi, [reg_p3+72] + sbb rsi, [rip+fmt(p697x2)+72] + mov rax, [rip+fmt(p697x2)+80] + sbb rdi, rax + mov [reg_p3+64], rcx + mov [reg_p3+72], rsi + mov rax, 0 + sbb rax, 0 + + mov rcx, [rip+fmt(p697x2)] + and rcx, rax + mov rsi, [rip+fmt(p697x2)+8] + and rsi, rax + + add r8, rcx + adc r9, rsi + adc r10, rsi + adc r11, rsi + adc r12, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + setc cl + + mov rsi, [rip+fmt(p697x2)+40] + and rsi, rax + mov r8, [rip+fmt(p697x2)+48] + and r8, rax + mov r9, [rip+fmt(p697x2)+56] + and r9, rax + mov r10, [rip+fmt(p697x2)+64] + and r10, rax + mov r11, [rip+fmt(p697x2)+72] + and r11, rax + mov r12, [rip+fmt(p697x2)+80] + and r12, rax + + bt rcx, 0 + adc r13, rsi + adc r14, r8 + adc r15, r9 + mov rax, [reg_p3+64] + mov rcx, [reg_p3+72] + adc r10, rax + adc r11, rcx + adc r12, rdi + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], r12 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(fpsub697_asm) +fmt(fpsub697_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + mov rax, [reg_p1+72] + mov rdi, [reg_p1+80] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rcx, [reg_p2+64] + sbb rax, [reg_p2+72] + sbb rdi, [reg_p2+80] + mov [reg_p3+64], rcx + mov [reg_p3+72], rax + mov rax, 0 + sbb rax, 0 + + mov rcx, [rip+fmt(p697x2)] + and rcx, rax + mov rsi, [rip+fmt(p697x2)+8] + and rsi, rax + + add r8, rcx + adc r9, rsi + adc r10, rsi + adc r11, rsi + adc r12, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + setc cl + + mov rsi, [rip+fmt(p697x2)+40] + and rsi, rax + mov r8, [rip+fmt(p697x2)+48] + and r8, rax + mov r9, [rip+fmt(p697x2)+56] + and r9, rax + mov r10, [rip+fmt(p697x2)+64] + and r10, rax + mov r11, [rip+fmt(p697x2)+72] + and r11, rax + mov r12, [rip+fmt(p697x2)+80] + and r12, rax + + bt rcx, 0 + adc r13, rsi + adc r14, r8 + adc r15, r9 + mov rax, [reg_p3+64] + mov rcx, [reg_p3+72] + adc r10, rax + adc r11, rcx + adc r12, rdi + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], r12 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +///////////////////////////////////////////////////////////////// MACRO +.macro SUB697_PX P0 + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rax, [reg_p1+64] + mov rcx, [reg_p1+72] + mov rdi, [reg_p1+80] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rax, [reg_p2+64] + sbb rcx, [reg_p2+72] + sbb rdi, [reg_p2+80] + + mov rsi, [rip+\P0] + add r8, rsi + mov rsi, [rip+\P0+8] + adc r9, rsi + adc r10, rsi + adc r11, rsi + adc r12, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov r8, [rip+\P0+40] + mov r9, [rip+\P0+48] + mov r10, [rip+\P0+56] + adc r13, r8 + adc r14, r9 + adc r15, r10 + mov r8, [rip+\P0+64] + mov r9, [rip+\P0+72] + mov r10, [rip+\P0+80] + adc r8, rax + adc r9, rcx + adc r10, rdi + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], r8 + mov [reg_p3+72], r9 + mov [reg_p3+80], r10 + + pop r15 + pop r14 + pop r13 + pop r12 + .endm + + +//*********************************************************************** +// Multiprecision subtraction with correction with 2*p697 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p697 +//*********************************************************************** +.global fmt(mp_sub697_p2_asm) +fmt(mp_sub697_p2_asm): + + SUB697_PX fmt(p697x2) + ret + + +//*********************************************************************** +// Multiprecision subtraction with correction with 4*p697 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p697 +//*********************************************************************** +.global fmt(mp_sub697_p4_asm) +fmt(mp_sub697_p4_asm): + + SUB697_PX fmt(p697x4) + ret + + +#ifdef _MULX_ + +/////////////////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C +// Temps: stack space for two 64-bit values (case w/o _ADX_), regs T0:T7 +/////////////////////////////////////////////////////////////////////////// +#ifdef _ADX_ + +.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + adox \T0, \T3 + adox \T2, \T5 + mulx \T1, \T3, 24\M1 + adox \T4, \T3 + mulx \T5, \T6, 32\M1 + adox \T1, \T6 + mulx \T3, \T7, 40\M1 + adox \T5, \T7 + adox \T3, rax + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 8\C, \T0 // C1_final + adcx \T2, \T6 + mulx \T6, \T7, 8\M1 + adox \T2, \T7 + adcx \T4, \T6 + mulx \T0, \T6, 16\M1 + adox \T4, \T6 + adcx \T0, \T1 + mulx \T1, \T7, 24\M1 + adcx \T1, \T5 + mulx \T5, \T6, 32\M1 + adcx \T3, \T5 + mulx \T5, rdx, 40\M1 + adcx \T5, rax + + adox \T0, \T7 + adox \T1, \T6 + adox \T3, rdx + adox \T5, rax + + mov rdx, 16\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T2, \T7 + mov 16\C, \T2 // C2_final + adcx \T4, \T6 + mulx \T6, \T7, 8\M1 + adox \T4, \T7 + adcx \T0, \T6 + mulx \T2, \T6, 16\M1 + adox \T0, \T6 + adcx \T1, \T2 + mulx \T2, \T7, 24\M1 + adcx \T3, \T2 + mulx \T2, \T6, 32\M1 + adcx \T5, \T2 + mulx \T2, rdx, 40\M1 + adcx \T2, rax + + adox \T1, \T7 + adox \T3, \T6 + adox \T5, rdx + adox \T2, rax + + mov rdx, 24\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T4, \T7 + mov 24\C, \T4 // C3_final + adcx \T0, \T6 + mulx \T6, \T7, 8\M1 + adox \T0, \T7 + adcx \T1, \T6 + mulx \T4, \T6, 16\M1 + adox \T1, \T6 + adcx \T3, \T4 + mulx \T4, \T7, 24\M1 + adcx \T5, \T4 + mulx \T4, \T6, 32\M1 + adcx \T2, \T4 + mulx \T4, rdx, 40\M1 + adcx \T4, rax + + adox \T3, \T7 + adox \T5, \T6 + adox \T2, rdx + adox \T4, rax + + mov rdx, 32\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 32\C, \T0 // C4_final + adcx \T1, \T6 + mulx \T6, \T7, 8\M1 + adox \T1, \T7 + adcx \T3, \T6 + mulx \T0, \T6, 16\M1 + adox \T3, \T6 + adcx \T5, \T0 + mulx \T0, \T7, 24\M1 + adcx \T2, \T0 + mulx \T0, \T6, 32\M1 + adcx \T4, \T0 + mulx \T0, rdx, 40\M1 + adcx \T0, rax + + adox \T5, \T7 + adox \T2, \T6 + adox \T4, rdx + adox \T0, rax + + mov rdx, 40\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T1, \T7 + mov 40\C, \T1 // C5_final + adcx \T3, \T6 + mulx \T6, \T7, 8\M1 + adox \T3, \T7 + adcx \T5, \T6 + mulx \T1, \T6, 16\M1 + adox \T5, \T6 + adcx \T2, \T1 + mulx \T1, \T7, 24\M1 + adcx \T4, \T1 + mulx \T1, \T6, 32\M1 + adcx \T0, \T1 + mulx \T1, rdx, 40\M1 + adcx \T1, rax + + adox \T2, \T7 + adox \T4, \T6 + adox \T0, rdx + adox \T1, rax + mov 48\C, \T3 + mov 56\C, \T5 + mov 64\C, \T2 + mov 72\C, \T4 + mov 80\C, \T0 + mov 88\C, \T1 +.endm + +.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + adox \T0, \T3 + adox \T2, \T5 + mulx \T1, \T3, 24\M1 + adox \T4, \T3 + mulx \T5, \T6, 32\M1 + adox \T1, \T6 + adox \T5, rax + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 8\C, \T0 // C1_final + adcx \T2, \T6 + mulx \T6, \T7, 8\M1 + adox \T2, \T7 + adcx \T4, \T6 + mulx \T0, \T6, 16\M1 + adox \T4, \T6 + adcx \T0, \T1 + mulx \T1, \T7, 24\M1 + adcx \T1, \T5 + adox \T0, \T7 + mulx \T5, \T6, 32\M1 + adcx \T5, rax + adox \T1, \T6 + adox \T5, rax + + mov rdx, 16\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T2, \T7 + mov 16\C, \T2 // C2_final + adcx \T4, \T6 + mulx \T6, \T7, 8\M1 + adox \T4, \T7 + adcx \T0, \T6 + mulx \T2, \T6, 16\M1 + adox \T0, \T6 + adcx \T1, \T2 + mulx \T2, \T7, 24\M1 + adcx \T5, \T2 + adox \T1, \T7 + mulx \T2, \T6, 32\M1 + adcx \T2, rax + adox \T5, \T6 + adox \T2, rax + + mov rdx, 24\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T4, \T7 + mov 24\C, \T4 // C3_final + adcx \T0, \T6 + mulx \T6, \T7, 8\M1 + adox \T0, \T7 + adcx \T1, \T6 + mulx \T4, \T6, 16\M1 + adox \T1, \T6 + adcx \T5, \T4 + mulx \T4, \T7, 24\M1 + adcx \T2, \T4 + adox \T5, \T7 + mulx \T4, \T6, 32\M1 + adcx \T4, rax + adox \T2, \T6 + adox \T4, rax + + mov rdx, 32\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 32\C, \T0 // C4_final + adcx \T1, \T6 + mulx \T6, \T7, 8\M1 + adox \T1, \T7 + adcx \T5, \T6 + mulx \T0, \T6, 16\M1 + adox \T5, \T6 + adcx \T2, \T0 + mulx \T0, \T7, 24\M1 + adcx \T4, \T0 + adox \T2, \T7 + mulx \T0, \T6, 32\M1 + adcx \T0, rax + adox \T4, \T6 + adox \T0, rax + + mov 40\C, \T1 + mov 48\C, \T5 + mov 56\C, \T2 + mov 64\C, \T4 + mov 72\C, \T0 +.endm + +#else + +.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + add \T0, \T3 + adc \T2, \T5 + mulx \T1, \T3, 24\M1 + adc \T4, \T3 + mulx \T5, \T6, 32\M1 + adc \T1, \T6 + mulx \T3, \T7, 40\M1 + adc \T5, \T7 + adc \T3, rax + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + add \T0, \T7 + mov 8\C, \T0 // C1_final + adc \T2, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T4, \T6 + mulx \T0, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T0, \T1 + mulx \T1, \T7, 24\M1 + adc \T1, \T5 + mulx \T5, \T6, 32\M1 + adc \T3, \T5 + mulx \T5, rdx, 40\M1 + adc \T5, rax + + xor rax, rax + add \T2, \S + adc \T4, 8\S + adc \T0, \T7 + adc \T1, \T6 + adc \T3, rdx + adc \T5, rax + + mov rdx, 16\M0 + mulx \T6, \T7, \M1 + add \T2, \T7 + mov 16\C, \T2 // C2_final + adc \T4, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T0, \T6 + mulx \T2, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T1, \T2 + mulx \T2, \T7, 24\M1 + adc \T3, \T2 + mulx \T2, \T6, 32\M1 + adc \T5, \T2 + mulx \T2, rdx, 40\M1 + adc \T2, rax + + xor rax, rax + add \T4, \S + adc \T0, 8\S + adc \T1, \T7 + adc \T3, \T6 + adc \T5, rdx + adc \T2, rax + + mov rdx, 24\M0 + mulx \T6, \T7, \M1 + add \T4, \T7 + mov 24\C, \T4 // C3_final + adc \T0, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T1, \T6 + mulx \T4, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T3, \T4 + mulx \T4, \T7, 24\M1 + adc \T5, \T4 + mulx \T4, \T6, 32\M1 + adc \T2, \T4 + mulx \T4, rdx, 40\M1 + adc \T4, rax + + xor rax, rax + add \T0, \S + adc \T1, 8\S + adc \T3, \T7 + adc \T5, \T6 + adc \T2, rdx + adc \T4, rax + + mov rdx, 32\M0 + mulx \T6, \T7, \M1 + add \T0, \T7 + mov 32\C, \T0 // C4_final + adc \T1, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T3, \T6 + mulx \T0, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T5, \T0 + mulx \T0, \T7, 24\M1 + adc \T2, \T0 + mulx \T0, \T6, 32\M1 + adc \T4, \T0 + mulx \T0, rdx, 40\M1 + adc \T0, rax + + xor rax, rax + add \T1, \S + adc \T3, 8\S + adc \T5, \T7 + adc \T2, \T6 + adc \T4, rdx + adc \T0, rax + + mov rdx, 40\M0 + mulx \T6, \T7, \M1 + add \T1, \T7 + mov 40\C, \T1 // C5_final + adc \T3, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T5, \T6 + mulx \T1, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T2, \T1 + mulx \T1, \T7, 24\M1 + adc \T4, \T1 + mulx \T1, \T6, 32\M1 + adc \T0, \T1 + mulx \T1, rdx, 40\M1 + adc \T1, rax + + add \T3, \S + adc \T5, 8\S + adc \T2, \T7 + adc \T4, \T6 + adc \T0, rdx + adc \T1, 0 + mov 48\C, \T3 + mov 56\C, \T5 + mov 64\C, \T2 + mov 72\C, \T4 + mov 80\C, \T0 + mov 88\C, \T1 +.endm + +.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + mulx \T4, \T5, 16\M1 + add \T0, \T3 + adc \T2, \T5 + mulx \T1, \T3, 24\M1 + adc \T3, \T4 + mulx \T5, \T6, 32\M1 + adc \T1, \T6 + adc \T5, 0 + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + add \T0, \T7 + mov 8\C, \T0 // C1_final + adc \T2, \T6 + mulx \T6, \T7, 8\M1 + adc \T3, \T6 + mulx \T0, \T4, 16\M1 + adc \T0, \T1 + mulx \T1, \T6, 24\M1 + adc \T5, \T1 + mulx \T1, rax, 32\M1 + adc \T1, 0 + + add \T2, \T7 + adc \T3, \T4 + adc \T0, \T6 + adc \T5, rax + adc \T1, 0 + + mov rdx, 16\M0 + mulx \T4, \T6, \M1 + add \T2, \T6 + mov 16\C, \T2 // C2_final + adc \T3, \T4 + mulx \T6, \T7, 8\M1 + adc \T0, \T6 + mulx \T2, \T4, 16\M1 + adc \T2, \T5 + mulx \T5, \T6, 24\M1 + adc \T1, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T3, \T7 + adc \T0, \T4 + adc \T2, \T6 + adc \T1, rax + adc \T5, 0 + + mov rdx, 24\M0 + mulx \T4, \T6, \M1 + add \T3, \T6 + mov 24\C, \T3 // C3_final + adc \T0, \T4 + mulx \T6, \T7, 8\M1 + adc \T2, \T6 + mulx \T3, \T4, 16\M1 + adc \T1, \T3 + mulx \T3, \T6, 24\M1 + adc \T3, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T0, \T7 + adc \T2, \T4 + adc \T1, \T6 + adc \T3, rax + adc \T5, 0 + + mov rdx, 32\M0 + mulx \T4, \T6, \M1 + add \T0, \T6 + mov 32\C, \T0 // C4_final + adc \T2, \T4 + mulx \T6, \T7, 8\M1 + adc \T1, \T6 + mulx \T0, \T4, 16\M1 + adc \T3, \T0 + mulx \T0, \T6, 24\M1 + adc \T0, \T5 + mulx \T5, rax, 32\M1 + adc \T5, 0 + + add \T2, \T7 + adc \T1, \T4 + adc \T3, \T6 + adc \T0, rax + adc \T5, 0 + mov 40\C, \T2 + mov 48\C, \T1 + mov 56\C, \T3 + mov 64\C, \T0 + mov 72\C, \T5 +.endm + +#endif + + +//***************************************************************************** +// 697-bit multiplication using Karatsuba (one level), schoolbook (two levels) +//***************************************************************************** +.global fmt(mul697_asm) +fmt(mul697_asm): + push r12 + push r13 + push r14 + push r15 + mov rcx, reg_p3 + + // [rsp] <- AH + AL, rax <- mask + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + push rbx + push rbp + sub rsp, 224 + add r8, [reg_p1+48] + adc r9, [reg_p1+56] + adc r10, [reg_p1+64] + adc r11, [reg_p1+72] + adc r12, [reg_p1+80] + adc r13, 0 + sbb rax, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + mov [rsp+32], r12 + mov [rsp+40], r13 + + // [rsp+48] <- BH + BL, rdx <- mask + xor rdx, rdx + mov r8, [reg_p2] + mov r9, [reg_p2+8] + mov rbx, [reg_p2+16] + mov rbp, [reg_p2+24] + mov r14, [reg_p2+32] + mov r15, [reg_p2+40] + add r8, [reg_p2+48] + adc r9, [reg_p2+56] + adc rbx, [reg_p2+64] + adc rbp, [reg_p2+72] + adc r14, [reg_p2+80] + adc r15, 0 + sbb rdx, 0 + mov [rsp+48], r8 + mov [rsp+56], r9 + mov [rsp+64], rbx + mov [rsp+72], rbp + mov [rsp+80], r14 + mov [rsp+88], r15 + + // [rcx] <- masked (BH + BL) + and r8, rax + and r9, rax + and rbx, rax + and rbp, rax + and r14, rax + and r15, rax + mov [rcx], r8 + mov [rcx+8], r9 + + // r8-r13 <- masked (AH + AL) + mov r8, [rsp] + mov r9, [rsp+8] + and r8, rdx + and r9, rdx + and r10, rdx + and r11, rdx + and r12, rdx + and r13, rdx + + // [rsp+96] <- masked (AH + AL) + masked (AH + AL) + mov rax, [rcx] + mov rdx, [rcx+8] + add r8, rax + adc r9, rdx + adc r10, rbx + adc r11, rbp + adc r12, r14 + adc r13, r15 + mov [rsp+96], r8 + mov [rsp+104], r9 + mov [rsp+112], r10 + mov [rsp+120], r11 + + // [rcx] <- AL x BL + MUL384_SCHOOL [reg_p1], [reg_p2], [rcx], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15 // Result C0-C5 + + // [rcx+96] <- (AH+AL) x (BH+BL), low part + MUL384_SCHOOL [rsp], [rsp+48], [rsp+128], [rcx+96], r8, r9, r10, r11, rbx, rbp, r14, r15 + + // [rsp] <- AH x BH + MUL320_SCHOOL [reg_p1+48], [reg_p2+48], [rsp], r8, r9, r10, r11, rbx, rbp, r14, r15 + + // r8-r13 <- (AH+AL) x (BH+BL), final step + mov r8, [rsp+96] + mov r9, [rsp+104] + mov r10, [rsp+112] + mov r11, [rsp+120] + mov rax, [rsp+176] + add r8, rax + mov rax, [rsp+184] + adc r9, rax + mov rax, [rsp+192] + adc r10, rax + mov rax, [rsp+200] + adc r11, rax + mov rax, [rsp+208] + adc r12, rax + mov rax, [rsp+216] + adc r13, rax + + // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL + mov rdi, [rsp+128] + sub rdi, [rcx] + mov rdx, [rsp+136] + sbb rdx, [rcx+8] + mov rbx, [rsp+144] + sbb rbx, [rcx+16] + mov rbp, [rsp+152] + sbb rbp, [rcx+24] + mov r14, [rsp+160] + sbb r14, [rcx+32] + mov r15, [rsp+168] + sbb r15, [rcx+40] + sbb r8, [rcx+48] + sbb r9, [rcx+56] + sbb r10, [rcx+64] + sbb r11, [rcx+72] + sbb r12, [rcx+80] + sbb r13, [rcx+88] + + // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub rdi, [rsp] + sbb rdx, [rsp+8] + sbb rbx, [rsp+16] + sbb rbp, [rsp+24] + sbb r14, [rsp+32] + sbb r15, [rsp+40] + sbb r8, [rsp+48] + sbb r9, [rsp+56] + sbb r10, [rsp+64] + sbb r11, [rsp+72] + sbb r12, 0 + sbb r13, 0 + + mov rax, [rcx+48] + add rax, rdi + mov [rcx+48], rax // Result C6-C11 + mov rax, [rcx+56] + adc rax, rdx + mov [rcx+56], rax + mov rax, [rcx+64] + adc rax, rbx + mov [rcx+64], rax + mov rax, [rcx+72] + adc rax, rbp + mov [rcx+72], rax + mov rax, [rcx+80] + adc rax, r14 + mov [rcx+80], rax + mov rax, [rcx+88] + adc rax, r15 + mov [rcx+88], rax + mov rax, [rsp] + adc r8, rax + mov [rcx+96], r8 // Result C8-C15 + mov rax, [rsp+8] + adc r9, rax + mov [rcx+104], r9 + mov rax, [rsp+16] + adc r10, rax + mov [rcx+112], r10 + mov rax, [rsp+24] + adc r11, rax + mov [rcx+120], r11 + mov rax, [rsp+32] + adc r12, rax + mov [rcx+128], r12 + mov rax, [rsp+40] + adc r13, rax + mov [rcx+136], r13 + mov r8, [rsp+48] + mov r9, [rsp+56] + mov r10, [rsp+64] + mov r11, [rsp+72] + adc r8, 0 + adc r9, 0 + adc r10, 0 + adc r11, 0 + add rsp, 224 + mov [rcx+144], r8 + mov [rcx+152], r9 + mov [rcx+160], r10 + mov [rcx+168], r11 + + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#else + +//*********************************************************************** +// Integer multiplication +// Based on Karatsuba method +// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] +// NOTE: a=c or b=c are not allowed +//*********************************************************************** +.global fmt(mul697_asm) +fmt(mul697_asm): + + ret + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + +#endif + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: regs T0:T7 +// Temps: regs T8 +///////////////////////////////////////////////////////////////// + +#ifdef _ADX_ +.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8, TT + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + adox \T1, \T4 + adox \T2, \T5 + mulx \T4, \T7, 24\M1 + adox \T3, \T7 + mulx \T5, \T6, 32\M1 + adox \T4, \T6 + mulx \T7, \T8, 40\M1 + adox \T5, \T8 + adox \T7, \TT + + mov rdx, 8\M0 + mulx \T8, \T6, \M1 + adcx \T1, \T6 // T1 <- C1_final + adcx \T2, \T8 + mulx \T6, \T8, 8\M1 + adox \T2, \T8 + adcx \T3, \T6 + mulx \T6, \T8, 16\M1 + adox \T3, \T8 + adcx \T4, \T6 + mulx \T6, \T8, 24\M1 + adox \T4, \T8 + adcx \T5, \T6 + mulx \T6, \T8, 32\M1 + adox \T5, \T8 + adcx \T6, \T7 + mulx \T7, \T8, 40\M1 + adcx \T7, rax + adox \T6, \T8 + adox \T7, rax +.endm + +.macro MUL64x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + adox \T1, \T4 + adox \T2, \T5 + mulx \T4, \T7, 24\M1 + adox \T3, \T7 + mulx \T5, \T6, 32\M1 + adox \T4, \T6 + mulx \T6, \T7, 40\M1 + adox \T5, \T7 + adox \T6, rax +.endm + +#else + +.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8, TT + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T3, 8\M1 + add \T1, \T3 + adc \T2, 0 + + mov rdx, 8\M0 + xor \T5, \T5 + mulx \T3, \T4, \M1 + add \T1, \T4 + adc \T2, \T3 + adc \T5, 0 + + xor \T6, \T6 + mulx \T3, \T4, 8\M1 + add \T2, \T4 + adc \T3, \T5 + adc \T6, 0 + + mov rdx, \M0 + mulx \T4, \T5, 16\M1 + add \T2, \T5 + adc \T3, \T4 + adc \T6, 0 + + xor \T7, \T7 + mulx \T4, \T5, 24\M1 + add \T3, \T5 + adc \T4, \T6 + adc \T7, 0 + + mov rdx, 8\M0 + mulx \T5, \T6, 16\M1 + add \T3, \T6 + adc \T4, \T5 + adc \T7, 0 + + xor \T6, \T6 + mulx \T5, \T8, 24\M1 + add \T4, \T8 + adc \T5, \T7 + adc \T6, 0 + + mov rdx, \M0 + mulx \T7, \T8, 32\M1 + add \T4, \T8 + adc \T5, \T7 + adc \T6, 0 + + xor \T7, \T7 + mulx \T8, rax, 40\M1 + add \T5, rax + adc \T6, \T8 + adc \T7, 0 + + mov rdx, 8\M0 + mulx \T8, rax, 32\M1 + add \T5, rax + adc \T6, \T8 + adc \T7, 0 + + mov rdx, 8\M0 + mulx \T8, rax, 40\M1 + add \T6, rax + adc \T7, \T8 + + add \T6, \TT + adc \T7, 0 +.endm + +.macro MUL64x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T3, 8\M1 + add \T1, \T3 + adc \T2, 0 + + mulx \T3, \T4, 16\M1 + add \T2, \T4 + adc \T3, 0 + + mulx \T4, \T5, 24\M1 + add \T3, \T5 + adc \T4, 0 + + mulx \T5, \T6, 32\M1 + add \T4, \T6 + adc \T5, 0 + + mulx \T6, \T7, 40\M1 + add \T5, \T7 + adc \T6, 0 +.endm +#endif + + +//************************************************************************************** +// Montgomery reduction +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//************************************************************************************** +.global fmt(rdc697_asm) +fmt(rdc697_asm): + push r12 + push r13 + push r14 + push r15 + push rbx + xor rcx, rcx + + // a[0-1] x p697p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx + + xor rcx, rcx + add r8, [reg_p1+40] + adc r9, [reg_p1+48] + adc r10, [reg_p1+56] + adc r11, [reg_p1+64] + adc r12, [reg_p1+72] + adc r13, [reg_p1+80] + adc r14, [reg_p1+88] + adc r15, [reg_p1+96] + adc rcx, 0 + mov [reg_p1+40], r8 + mov [reg_p1+48], r9 + mov [reg_p1+56], r10 + mov [reg_p1+64], r11 + mov [reg_p1+72], r12 + mov [reg_p1+80], r13 + mov [reg_p1+88], r14 + mov [reg_p1+96], r15 + + // a[2-3] x p697p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+16], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx + + xor rcx, rcx + add r8, [reg_p1+56] + adc r9, [reg_p1+64] + adc r10, [reg_p1+72] + adc r11, [reg_p1+80] + adc r12, [reg_p1+88] + adc r13, [reg_p1+96] + adc r14, [reg_p1+104] + adc r15, [reg_p1+112] + adc rcx, 0 + mov [reg_p1+56], r8 + mov [reg_p1+64], r9 + mov [reg_p1+72], r10 + mov [reg_p1+80], r11 + mov [reg_p1+88], r12 + mov [reg_p1+96], r13 + mov [reg_p1+104], r14 + mov [reg_p1+112], r15 + + // a[4-5] x p697p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+32], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx + + xor rcx, rcx + add r8, [reg_p1+72] + adc r9, [reg_p1+80] + adc r10, [reg_p1+88] + adc r11, [reg_p1+96] + adc r12, [reg_p1+104] + adc r13, [reg_p1+112] + adc r14, [reg_p1+120] + adc r15, [reg_p1+128] + adc rcx, 0 + mov [reg_p1+72], r8 + mov [reg_p1+80], r9 + mov [reg_p1+88], r10 + mov [reg_p1+96], r11 + mov [reg_p1+104], r12 + mov [reg_p1+112], r13 + mov [reg_p1+120], r14 + mov [reg_p1+128], r15 + + // a[6-7] x p697p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+48], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx + + xor rcx, rcx + add r8, [reg_p1+88] + adc r9, [reg_p1+96] + adc r10, [reg_p1+104] + adc r11, [reg_p1+112] + adc r12, [reg_p1+120] + adc r13, [reg_p1+128] + adc r14, [reg_p1+136] + adc r15, [reg_p1+144] + adc rcx, 0 + mov [reg_p2], r8 // C0_final + mov [reg_p2+8], r9 // C1_final + mov [reg_p1+104], r10 + mov [reg_p1+112], r11 + mov [reg_p1+120], r12 + mov [reg_p1+128], r13 + mov [reg_p1+136], r14 + mov [reg_p1+144], r15 + + // a[8-9] x p697p1_nz --> result: r8:r15 + MUL128x384_SCHOOL [reg_p1+64], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx + + xor rcx, rcx + add r8, [reg_p1+104] + adc r9, [reg_p1+112] + adc r10, [reg_p1+120] + adc r11, [reg_p1+128] + adc r12, [reg_p1+136] + adc r13, [reg_p1+144] + adc r14, [reg_p1+152] + adc r15, [reg_p1+160] + adc rcx, [reg_p1+168] + mov [reg_p2+16], r8 // C3_final + mov [reg_p2+24], r9 // C4_final + mov [reg_p1+120], r10 + mov [reg_p1+128], r11 + mov [reg_p1+136], r12 + mov [reg_p1+144], r13 + mov [reg_p1+152], r14 + mov [reg_p1+160], r15 + + // a[10] x p697p1_nz --> result: r8:r14 + MUL64x384_SCHOOL [reg_p1+80], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15 + + // Final result C5:C10 + add r8, [reg_p1+120] + adc r9, [reg_p1+128] + adc r10, [reg_p1+136] + adc r11, [reg_p1+144] + adc r12, [reg_p1+152] + adc r13, [reg_p1+160] + adc r14, rcx + mov [reg_p2+32], r8 + mov [reg_p2+40], r9 + mov [reg_p2+48], r10 + mov [reg_p2+56], r11 + mov [reg_p2+64], r12 + mov [reg_p2+72], r13 + mov [reg_p2+80], r14 + + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + + #else + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//*********************************************************************** +.global fmt(rdc697_asm) +fmt(rdc697_asm): + + ret + +# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" + + #endif + +//*********************************************************************** +// 697-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(mp_add697_asm) +fmt(mp_add697_asm): + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rax, [reg_p1+32] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc rax, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rax + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov rax, [reg_p1+72] + mov rcx, [reg_p1+80] + adc r8, [reg_p2+40] + adc r9, [reg_p2+48] + adc r10, [reg_p2+56] + adc r11, [reg_p2+64] + adc rax, [reg_p2+72] + adc rcx, [reg_p2+80] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + mov [reg_p3+64], r11 + mov [reg_p3+72], rax + mov [reg_p3+80], rcx + ret + + +//*********************************************************************** +// 2x697-bit multiprecision subtraction/addition +// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p697*2^704 +//*********************************************************************** +.global fmt(mp_subadd697x2_asm) +fmt(mp_subadd697x2_asm): + push r12 + push r13 + push r14 + push r15 + push rbx + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rcx, [reg_p1+32] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb rcx, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rcx + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov rcx, [reg_p1+72] + sbb r8, [reg_p2+40] + sbb r9, [reg_p2+48] + sbb r10, [reg_p2+56] + sbb r11, [reg_p2+64] + sbb rcx, [reg_p2+72] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + mov [reg_p3+64], r11 + mov [reg_p3+72], rcx + + mov r8, [reg_p1+80] + mov r9, [reg_p1+88] + mov r10, [reg_p1+96] + mov r11, [reg_p1+104] + mov rcx, [reg_p1+112] + sbb r8, [reg_p2+80] + sbb r9, [reg_p2+88] + sbb r10, [reg_p2+96] + sbb r11, [reg_p2+104] + sbb rcx, [reg_p2+112] + mov [reg_p3+80], r8 + mov [reg_p3+88], r9 + mov [reg_p3+96], r10 + mov [reg_p3+104], r11 + mov [reg_p3+112], rcx + + mov r8, [reg_p1+120] + mov r9, [reg_p1+128] + mov r10, [reg_p1+136] + mov r11, [reg_p1+144] + mov rcx, [reg_p1+152] + mov r14, [reg_p1+160] + mov r15, [reg_p1+168] + sbb r8, [reg_p2+120] + sbb r9, [reg_p2+128] + sbb r10, [reg_p2+136] + sbb r11, [reg_p2+144] + sbb rcx, [reg_p2+152] + sbb r14, [reg_p2+160] + sbb r15, [reg_p2+168] + mov [reg_p3+160], r14 + mov [reg_p3+168], r15 + sbb rax, 0 + + // Add p697 anded with the mask in rax + mov r12, [rip+fmt(p697)] + mov r13, [rip+fmt(p697)+40] + mov r14, [rip+fmt(p697)+48] + mov r15, [rip+fmt(p697)+56] + mov rdi, [rip+fmt(p697)+64] + mov rsi, [rip+fmt(p697)+72] + mov rbx, [rip+fmt(p697)+80] + and r12, rax + and r13, rax + and r14, rax + and r15, rax + and rdi, rax + and rsi, rax + and rbx, rax + mov rax, [reg_p3+88] + add rax, r12 + mov [reg_p3+88], rax + mov rax, [reg_p3+96] + adc rax, r12 + mov [reg_p3+96], rax + mov rax, [reg_p3+104] + adc rax, r12 + mov [reg_p3+104], rax + mov rax, [reg_p3+112] + adc rax, r12 + mov [reg_p3+112], rax + adc r8, r12 + adc r9, r13 + mov [reg_p3+120], r8 + mov [reg_p3+128], r9 + adc r10, r14 + adc r11, r15 + mov r8, [reg_p3+160] + mov r9, [reg_p3+168] + adc rcx, rdi + adc r8, rsi + adc r9, rbx + + mov [reg_p3+136], r10 + mov [reg_p3+144], r11 + mov [reg_p3+152], rcx + mov [reg_p3+160], r8 + mov [reg_p3+168], r9 + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Double 2x697-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(mp_dblsub697x2_asm) +fmt(mp_dblsub697x2_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + mov r14, [reg_p3+48] + mov r15, [reg_p3+56] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + sbb r14, [reg_p1+48] + sbb r15, [reg_p1+56] + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + + mov r8, [reg_p3+64] + mov r9, [reg_p3+72] + mov r10, [reg_p3+80] + mov r11, [reg_p3+88] + mov r12, [reg_p3+96] + mov r13, [reg_p3+104] + mov r14, [reg_p3+112] + mov r15, [reg_p3+120] + bt rax, 0 + sbb r8, [reg_p1+64] + sbb r9, [reg_p1+72] + sbb r10, [reg_p1+80] + sbb r11, [reg_p1+88] + sbb r12, [reg_p1+96] + sbb r13, [reg_p1+104] + sbb r14, [reg_p1+112] + sbb r15, [reg_p1+120] + setc al + bt rcx, 0 + sbb r8, [reg_p2+64] + sbb r9, [reg_p2+72] + sbb r10, [reg_p2+80] + sbb r11, [reg_p2+88] + sbb r12, [reg_p2+96] + sbb r13, [reg_p2+104] + sbb r14, [reg_p2+112] + sbb r15, [reg_p2+120] + setc cl + mov [reg_p3+64], r8 + mov [reg_p3+72], r9 + mov [reg_p3+80], r10 + mov [reg_p3+88], r11 + mov [reg_p3+96], r12 + mov [reg_p3+104], r13 + mov [reg_p3+112], r14 + mov [reg_p3+120], r15 + + mov r8, [reg_p3+128] + mov r9, [reg_p3+136] + mov r10, [reg_p3+144] + mov r11, [reg_p3+152] + mov r12, [reg_p3+160] + mov r13, [reg_p3+168] + bt rax, 0 + sbb r8, [reg_p1+128] + sbb r9, [reg_p1+136] + sbb r10, [reg_p1+144] + sbb r11, [reg_p1+152] + sbb r12, [reg_p1+160] + sbb r13, [reg_p1+168] + bt rcx, 0 + sbb r8, [reg_p2+128] + sbb r9, [reg_p2+136] + sbb r10, [reg_p2+144] + sbb r11, [reg_p2+152] + sbb r12, [reg_p2+160] + sbb r13, [reg_p2+168] + mov [reg_p3+128], r8 + mov [reg_p3+136], r9 + mov [reg_p3+144], r10 + mov [reg_p3+152], r11 + mov [reg_p3+160], r12 + mov [reg_p3+168], r13 + + pop r15 + pop r14 + pop r13 + pop r12 + ret \ No newline at end of file diff --git a/SIKE_sw/src/P697/P697.c b/SIKE_sw/src/P697/P697.c new file mode 100644 index 0000000..aef7321 --- /dev/null +++ b/SIKE_sw/src/P697/P697.c @@ -0,0 +1,139 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny parameters and generation of functions for P697 +*********************************************************************************************/ + +#include "P697_api.h" +#include "P697_internal.h" +#include "../internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 697-bit field element is represented with Ceil(697 / 64) = 11 64-bit digits or Ceil(697 / 32) = 22 32-bit digits. + +// +// Curve isogeny system "SIDHp697". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p697^2), where A=6, B=1, C=1 and p697 = 2^356*3^215-1 +// + +const uint64_t p697[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x604054AFFFFFFFFF, + 0xDF4970CF7313736F, 0x719AEC973BF54225, 0x40E474DA88B90FFE, 0x9A0E279D6CEB3C8E, 0x01B39F97671708CF }; +const uint64_t p697p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x604054B000000000, + 0xDF4970CF7313736F, 0x719AEC973BF54225, 0x40E474DA88B90FFE, 0x9A0E279D6CEB3C8E, 0x01B39F97671708CF }; +const uint64_t p697x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xC080A95FFFFFFFFF, + 0xBE92E19EE626E6DE, 0xE335D92E77EA844B, 0x81C8E9B511721FFC, 0x341C4F3AD9D6791C, 0x03673F2ECE2E119F }; +const uint64_t p697x4[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x810152BFFFFFFFFF, + 0x7D25C33DCC4DCDBD, 0xC66BB25CEFD50897, 0x0391D36A22E43FF9, 0x68389E75B3ACF239, 0x06CE7E5D9C5C233E }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000001000000000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0xF7313736F604054B, 0x73BF54225DF4970C, 0xA88B90FFE719AEC9, 0xD6CEB3C8E40E474D, 0x7671708CF9A0E279, 0x00000000001B39F9 }; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} in GF(p697^2), expressed in Montgomery representation +const uint64_t A_gen[6*NWORDS64_FIELD] = { 0xAED913E7D94626F9, 0x6F163E13CE243B16, 0x63211BC832B204DD, 0x35C03D027DA18195, 0x4AC8AE5E92D9A2E0, 0x7901C981FA69E5F6, + 0xDC074593C4951783, 0xE039A85DA8C4CCCB, 0x238709FB5A391A27, 0x81C303327E8FDA3A, 0x000F36173BC9782E, // XPA0 + 0x266F82DA9F627219, 0xC25C277AD1F10869, 0x947D3148A5C130AB, 0xFC5142FE8F622A88, 0xB5F69FFF2BA5CDB9, 0xA5B6DC9C5B5A65E9, + 0xD1B526E7169AC83E, 0x0DAD5BA3BDB5F30D, 0xAF70A90042BC2A5E, 0xE55389C1D5AC115F, 0x012EFF54E3702B19, // XPA1 + 0x4C987E2710131A53, 0xC85EBC0B6964FC4E, 0x01064AF42ED201FE, 0x6C7F56903B372893, 0x70D22E68DEE9FB6E, 0x41DBA2F20C3FF934, + 0x741E3BC447063D35, 0x830A5DA2BB4C3381, 0x1896BD7E957480D5, 0x5FF6ABE18016BD72, 0x015B3A13274C3A5E, // XQA0 + 0xAB9DA605058DB5BD, 0x676326751136B419, 0xA012ED1457E7A8FB, 0x4D2C99E2BCBDBCBF, 0x847DAAAB8AF49694, 0x57E4A8EBEE16077A, + 0x253098F5145E024F, 0x2834FA2027602D7E, 0x67370BF01ECA39F5, 0xFD1988310BD8B371, 0x006E1C1994AAE711, // XQA1 + 0x388557F6D513BA2E, 0x985FC6241AF2D870, 0xAB4A1A0CB162217E, 0xEFE329C716283B0C, 0x1B8A160873A72DF3, 0xE788A8E93CE9A2BF, + 0x9208D779576BE635, 0x9F01542376C9CF14, 0xB4C147E4C823B27B, 0x14EBA3D4E36220A2, 0x00B5E9F1B8C6EB1F, // XRA0 + 0x56DA90C58CF6CF46, 0x81618C6931E0A49F, 0xE85EDF7AAA8E245E, 0x3EFAADBA6C218FE5, 0x070BC4D671757F0A, 0x33E57D453747A238, + 0xA1DE9DC8B2194C11, 0xD5C01615A266F9F3, 0x1FD965E5FB51C6F5, 0x86EA60BF172F4F54, 0x1568A2478263BE4 }; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p697^2), expressed in Montgomery representation +const uint64_t B_gen[6*NWORDS64_FIELD] = { 0x17004B45D6CD5264, 0x2120CCAD6F2560B1, 0x2037B4FC92D82662, 0x64A1CA7B3198E4F9, 0xA049034AC1A0019A, 0xA78FDEEA1525EFC7, + 0x1235E926EB190D51, 0x20808D93DDDEB13D, 0x4EE5F74BFA19F9E7, 0xB6325316EE6D75DD, 0x016E69166BA0015E, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0x6BE584AC7B4EB4F9, 0xF80F2AD8BBBEED51, 0x3681798875177782, 0x50D3F6C3774A2F09, 0xFF3C23A377640B8D, 0x6033D3DF5745A962, + 0x2FF24E14C9699274, 0x83DA36836A97EB83, 0x25C8EF44B73BD1CD, 0x712062DF86ADEF09, 0x004CF039055BDB65, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0x94D7DFC81C1E72E5, 0xE43215CB25F12508, 0x05C2FA2D3F4AE2F9, 0xBB0752FE5CE1746B, 0xA780994FB878A14B, 0x15F6979E08C55016, + 0xE520E266C3B11912, 0xA857D0496B40DA30, 0xEBACFFF0FDFA0DD2, 0x4C84A4D2485B1E15, 0x00A4F1A9A018A254, // XRB0 + 0x49940C6C65957574, 0xC475B85CD816F0A5, 0x52F4C5971D1E4573, 0xE695F0CD74372CBD, 0x53BC43AA1AFA579E, 0xE02CD95D4A267AE6, + 0x7B96626EBA6A4ECB, 0xF5E38B098E29F8D0, 0xEAED32068F11ACB9, 0xAFF1F42532675E47, 0x0078655255FA5626 }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^704)^2 mod p697 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x90E8717898EB005C, 0x1DF9EB2CE3B0E597, 0x70EDDE1C2495B71C, 0x441E14E451B09CBC, 0x362ACF49015E62FF, 0x139D92FB72D960C4, + 0x7840FBE341B9CCE6, 0xFC3D2E62C11AEF2F, 0xE8053C8FF2621C9B, 0x7D2E06601F8D8373, 0x01634C22A8B7316F }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x0000000000000096, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x9A4E60E000000000, + 0x2AF7E672929A5CBD, 0x6F395F62DE4B3DCF, 0xFA2387F3E390A0E9, 0xBBB4C9C22E2A84A5, 0x00C07D499880D65B }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +72, 48, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1, +4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1, +1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, +1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, +1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 33, 17, 9, 5, 4, 2, 1, 1, 2, 1, 1, +2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, +1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, +4, 2, 1, 1, 2, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +109, 58, 27, 12, 5, 2, 1, 1, 3, 1, 2, 1, +7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 8, 4, 2, +1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 31, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 8, 4, +2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, +1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 58, 27, 12, 7, 3, 1, 2, 1, 4, 2, +1, 2, 1, 1, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1, +1, 8, 4, 2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 31, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2, +1, 1, 8, 4, 2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 2, 1, 1, 4, 2, +1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy697 +#define fpzero fpzero697 +#define fpadd fpadd697 +#define fpsub fpsub697 +#define fpneg fpneg697 +#define fpdiv2 fpdiv2_697 +#define fpcorrection fpcorrection697 +#define fpmul_mont fpmul697_mont +#define fpsqr_mont fpsqr697_mont +#define fpinv_mont fpinv697_mont +#define fpinv_chain_mont fpinv697_chain_mont +#define fp2copy fp2copy697 +#define fp2zero fp2zero697 +#define fp2add fp2add697 +#define fp2sub fp2sub697 +#define mp_sub_p2 mp_sub697_p2 +#define mp_sub_p4 mp_sub697_p4 +#define sub_p4 mp_sub_p4 +#define fp2neg fp2neg697 +#define fp2div2 fp2div2_697 +#define fp2correction fp2correction697 +#define fp2mul_mont fp2mul697_mont +#define fp2sqr_mont fp2sqr697_mont +#define fp2inv_mont fp2inv697_mont +#define fp2inv_mont_ct fp2inv697_mont_ct +#define fp2inv_mont_bingcd fp2inv697_mont_bingcd +#define mp_add_asm mp_add697_asm +#define mp_subaddx2_asm mp_subadd697x2_asm +#define mp_dblsubx2_asm mp_dblsub697x2_asm +#define crypto_kem_keypair crypto_kem_keypair_SIKEp697 +#define crypto_kem_enc crypto_kem_enc_SIKEp697 +#define crypto_kem_dec crypto_kem_dec_SIKEp697 +#define random_mod_order_A random_mod_order_A_SIDHp697 +#define random_mod_order_B random_mod_order_B_SIDHp697 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_SIDHp697 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_SIDHp697 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_SIDHp697 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_SIDHp697 + +#include "../fpx.c" +#include "../ec_isogeny.c" +#include "../sidh.c" +#include "../sike.c" \ No newline at end of file diff --git a/SIKE_sw/src/P697/P697_api.h b/SIKE_sw/src/P697/P697_api.h new file mode 100644 index 0000000..25c3782 --- /dev/null +++ b/SIKE_sw/src/P697/P697_api.h @@ -0,0 +1,112 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: API header file for P697 +*********************************************************************************************/ + +#ifndef P697_API_H +#define P697_API_H + + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 603 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 528 +#define CRYPTO_BYTES 32 +#define CRYPTO_CIPHERTEXTBYTES 560 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Algorithm name +#define CRYPTO_ALGNAME "SIKEp697" + +// SIKE's key generation +// It produces a private key sk and computes the public key pk. +// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 603 bytes) +// public key pk (CRYPTO_PUBLICKEYBYTES = 528 bytes) +int crypto_kem_keypair_SIKEp697(unsigned char *pk, unsigned char *sk); + +// SIKE's encapsulation +// Input: public key pk (CRYPTO_PUBLICKEYBYTES = 528 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 32 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 560 bytes) +int crypto_kem_enc_SIKEp697(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +// SIKE's decapsulation +// Input: secret key sk (CRYPTO_SECRETKEYBYTES = 603 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 560 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 32 bytes) +int crypto_kem_dec_SIKEp697(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +// Encoding of keys for KEM-based isogeny system "SIKEp697" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p697) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p697^2), where a and b are defined over GF(p697), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 32-byte random value, a value in the range [0, 2^Floor(Log(2,3^215))-1] and the public key pk. In the SIKE API, +// private keys are encoded in 603 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p697^2). In the SIKE API, pk is encoded in 528 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 32-byte value. In the SIKE API, ct is encoded in 528 + 32 = 560 octets. +// Shared keys ss consist of a value of 32 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES_A 45 +#define SIDH_SECRETKEYBYTES_B 43 +#define SIDH_PUBLICKEYBYTES 528 +#define SIDH_BYTES 176 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^356 - 1] to be used as Alice's private key +void random_mod_order_A_SIDHp697(unsigned char* random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^215)) - 1] to be used as Bob's private key +void random_mod_order_B_SIDHp697(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^356 - 1], stored in 45 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p697^2) elements encoded in 528 bytes. +int EphemeralKeyGeneration_A_SIDHp697(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^215)) - 1], stored in 43 bytes. +// The public key consists of 3 GF(p697^2) elements encoded in 528 bytes. +int EphemeralKeyGeneration_B_SIDHp697(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^356 - 1], stored in 45 bytes. +// Bob's PublicKeyB consists of 3 GF(p697^2) elements encoded in 528 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p697^2) encoded in 176 bytes. +int EphemeralSecretAgreement_A_SIDHp697(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^215)) - 1], stored in 43 bytes. +// Alice's PublicKeyA consists of 3 GF(p697^2) elements encoded in 528 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p697^2) encoded in 176 bytes. +int EphemeralSecretAgreement_B_SIDHp697(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp697" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p697) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p697^2), where a and b are defined over GF(p697), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^356-1] and [0, 2^Floor(Log(2,3^215)) - 1], resp. In the SIDH API, +// Alice's and Bob's private keys are encoded in 45 and 43 octets, resp., in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p697^2). In the SIDH API, they are encoded in 528 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p697^2). In the SIDH API, they are encoded in 176 octets. + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/P697/P697_internal.h b/SIKE_sw/src/P697/P697_internal.h new file mode 100644 index 0000000..f01c560 --- /dev/null +++ b/SIKE_sw/src/P697/P697_internal.h @@ -0,0 +1,175 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for P697 +*********************************************************************************************/ + +#ifndef P697_INTERNAL_H +#define P697_INTERNAL_H + +#include "../config.h" + + +#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 11 // Number of words of a 697-bit field element + #define p697_ZERO_WORDS 5 // Number of "0" digits in the least significant part of p697 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 22 + #define p697_ZERO_WORDS 11 +#endif + + +// Basic constants + +#define NBITS_FIELD 697 +#define MAXBITS_FIELD 704 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 697-bit field element +#define NBITS_ORDER 384 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 256-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 356 +#define OBOB_BITS 341 +#define OBOB_EXPON 215 +#define MASK_ALICE 0x0F +#define MASK_BOB 0x0F +#define PRIME p697 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 8 +#define MAX_INT_POINTS_BOB 9 +#define MAX_Alice 178 +#define MAX_Bob 215 +#define MSG_BYTES 32 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 697-bit field elements (704-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x697-bit field elements (2x704-bit max.) +typedef felm_t f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p697^2) + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + +#ifdef COMPRESS + typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj; // Point representation in full projective XYZ Montgomery coordinates + typedef point_full_proj point_full_proj_t[1]; + + typedef struct { f2elm_t x; f2elm_t y; } point_affine; // Point representation in affine coordinates. + typedef point_affine point_t[1]; + + typedef f2elm_t publickey_t[3]; +#endif + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// 697-bit multiprecision addition, c = a+b +void mp_add697(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add697_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 697-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p +extern void mp_sub697_p2(const digit_t* a, const digit_t* b, digit_t* c); +extern void mp_sub697_p4(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub697_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub697_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 2x697-bit multiprecision subtraction followed by addition with p697*2^704, c = a-b+(p697*2^704) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_subadd697x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Double 2x697-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub697x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy697(const digit_t* a, digit_t* c); + +// Zeroing a field element, a = 0 +void fpzero697(digit_t* a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal697_non_constant_time(const digit_t* a, const digit_t* b); + +// Modular addition, c = a+b mod p697 +extern void fpadd697(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd697_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p697 +extern void fpsub697(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub697_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p697 +extern void fpneg697(digit_t* a); + +// Modular division by two, c = a/2 mod p697. +void fpdiv2_697(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p697-1] to [0, p697-1]. +void fpcorrection697(digit_t* a); + +// 697-bit Montgomery reduction, c = a mod p +void rdc697_asm(digit_t* ma, digit_t* mc); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p697, where R=2^768 +void fpmul697_mont(const digit_t* a, const digit_t* b, digit_t* c); +void mul697_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p697, where R=2^768 +void fpsqr697_mont(const digit_t* ma, digit_t* mc); + +// Field inversion, a = a^-1 in GF(p697) +void fpinv697_mont(digit_t* a); + +// Chain to compute (p697-3)/4 using Montgomery arithmetic +void fpinv697_chain_mont(digit_t* a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p697^2) element, c = a +void fp2copy697(const f2elm_t a, f2elm_t c); + +// Zeroing a GF(p697^2) element, a = 0 +void fp2zero697(f2elm_t a); + +// GF(p697^2) negation, a = -a in GF(p697^2) +void fp2neg697(f2elm_t a); + +// GF(p697^2) addition, c = a+b in GF(p697^2) +extern void fp2add697(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p697^2) subtraction, c = a-b in GF(p697^2) +extern void fp2sub697(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p697^2) division by two, c = a/2 in GF(p697^2) +void fp2div2_697(const f2elm_t a, f2elm_t c); + +// Modular correction, a = a in GF(p697^2) +void fp2correction697(f2elm_t a); + +// GF(p697^2) squaring using Montgomery arithmetic, c = a^2 in GF(p697^2) +void fp2sqr697_mont(const f2elm_t a, f2elm_t c); + +// GF(p697^2) multiplication using Montgomery arithmetic, c = a*b in GF(p697^2) +void fp2mul697_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p697^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv697_mont(f2elm_t a); + + +#endif diff --git a/SIKE_sw/src/P697/generic/fp_generic.c b/SIKE_sw/src/P697/generic/fp_generic.c new file mode 100644 index 0000000..421dfa8 --- /dev/null +++ b/SIKE_sw/src/P697/generic/fp_generic.c @@ -0,0 +1,259 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: portable modular arithmetic for P697 +*********************************************************************************************/ + +#include "../P697_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p697[NWORDS64_FIELD]; +extern const uint64_t p697p1[NWORDS64_FIELD]; +extern const uint64_t p697x2[NWORDS64_FIELD]; +extern const uint64_t p697x4[NWORDS64_FIELD]; + + +__inline void mp_sub697_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p697x2)[i], borrow, c[i]); + } +} + + +__inline void mp_sub697_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p697x4)[i], borrow, c[i]); + } +} + + +__inline void fpadd697(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p697. + // Inputs: a, b in [0, 2*p697-1] + // Output: c in [0, 2*p697-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p697x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p697x2)[i] & mask, carry, c[i]); + } +} + + +__inline void fpsub697(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p697. + // Inputs: a, b in [0, 2*p697-1] + // Output: c in [0, 2*p697-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p697x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void fpneg697(digit_t* a) +{ // Modular negation, a = -a mod p697. + // Input/output: a in [0, 2*p697-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p697x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_697(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p697. + // Input : a in [0, 2*p697-1] + // Output: c in [0, 2*p697-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p697 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p697)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection697(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p697-1] to [0, p697-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p697)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p697)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p697. + // mc = ma*R^-1 mod p697x2, where R = 2^704. + // If ma < 2^704*p697, the output mc is in the range [0, 2*p697-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p697_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p697_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p697p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p697p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} \ No newline at end of file diff --git a/SIKE_sw/src/P751/AMD64/fp_x64.c b/SIKE_sw/src/P751/AMD64/fp_x64.c new file mode 100644 index 0000000..9dd2078 --- /dev/null +++ b/SIKE_sw/src/P751/AMD64/fp_x64.c @@ -0,0 +1,910 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: modular arithmetic optimized for x64 platforms for P751 +*********************************************************************************************/ + +#include "../P751_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p751[NWORDS_FIELD]; +extern const uint64_t p751p1[NWORDS_FIELD]; +extern const uint64_t p751x2[NWORDS_FIELD]; +extern const uint64_t p751x4[NWORDS_FIELD]; + + +__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x2)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub751_p2_asm(a, b, c); + +#endif +} + + +__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x4)[i], borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + mp_sub751_p4_asm(a, b, c); + +#endif +} + + +__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p751x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p751x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpadd751_asm(a, b, c); + +#endif +} + + +__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + fpsub751_asm(a, b, c); + +#endif +} + + +__inline void fpneg751(digit_t* a) +{ // Modular negation, a = -a mod p751. + // Input/output: a in [0, 2*p751-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p751x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_751(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p751. + // Input : a in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p751 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p751)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection751(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p751)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p751)[i] & mask, borrow, a[i]); + } +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + (void)nwords; + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[0], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[1], uv, carry, uv); + t += carry; + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + MULADD128(a[1], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[0], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[1], uv, carry, uv); + t += carry; + MULADD128(a[7], b[2], uv, carry, uv); + t += carry; + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + MULADD128(a[2], b[7], uv, carry, uv); + t += carry; + MULADD128(a[1], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[0], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[10], uv, carry, uv); + t += carry; + MULADD128(a[9], b[1], uv, carry, uv); + t += carry; + MULADD128(a[8], b[2], uv, carry, uv); + t += carry; + MULADD128(a[7], b[3], uv, carry, uv); + t += carry; + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + MULADD128(a[3], b[7], uv, carry, uv); + t += carry; + MULADD128(a[2], b[8], uv, carry, uv); + t += carry; + MULADD128(a[1], b[9], uv, carry, uv); + t += carry; + MULADD128(a[10], b[0], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[11], uv, carry, uv); + t += carry; + MULADD128(a[10], b[1], uv, carry, uv); + t += carry; + MULADD128(a[9], b[2], uv, carry, uv); + t += carry; + MULADD128(a[8], b[3], uv, carry, uv); + t += carry; + MULADD128(a[7], b[4], uv, carry, uv); + t += carry; + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + MULADD128(a[4], b[7], uv, carry, uv); + t += carry; + MULADD128(a[3], b[8], uv, carry, uv); + t += carry; + MULADD128(a[2], b[9], uv, carry, uv); + t += carry; + MULADD128(a[1], b[10], uv, carry, uv); + t += carry; + MULADD128(a[11], b[0], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[1], b[11], uv, carry, uv); + t += carry; + MULADD128(a[10], b[2], uv, carry, uv); + t += carry; + MULADD128(a[9], b[3], uv, carry, uv); + t += carry; + MULADD128(a[8], b[4], uv, carry, uv); + t += carry; + MULADD128(a[7], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[7], uv, carry, uv); + t += carry; + MULADD128(a[4], b[8], uv, carry, uv); + t += carry; + MULADD128(a[3], b[9], uv, carry, uv); + t += carry; + MULADD128(a[2], b[10], uv, carry, uv); + t += carry; + MULADD128(a[11], b[1], uv, carry, uv); + t += carry; + c[12] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[2], uv, carry, uv); + t += carry; + MULADD128(a[10], b[3], uv, carry, uv); + t += carry; + MULADD128(a[9], b[4], uv, carry, uv); + t += carry; + MULADD128(a[8], b[5], uv, carry, uv); + t += carry; + MULADD128(a[7], b[6], uv, carry, uv); + t += carry; + MULADD128(a[6], b[7], uv, carry, uv); + t += carry; + MULADD128(a[5], b[8], uv, carry, uv); + t += carry; + MULADD128(a[4], b[9], uv, carry, uv); + t += carry; + MULADD128(a[3], b[10], uv, carry, uv); + t += carry; + MULADD128(a[2], b[11], uv, carry, uv); + t += carry; + c[13] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[3], uv, carry, uv); + t += carry; + MULADD128(a[10], b[4], uv, carry, uv); + t += carry; + MULADD128(a[9], b[5], uv, carry, uv); + t += carry; + MULADD128(a[8], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[8], uv, carry, uv); + t += carry; + MULADD128(a[5], b[9], uv, carry, uv); + t += carry; + MULADD128(a[4], b[10], uv, carry, uv); + t += carry; + MULADD128(a[3], b[11], uv, carry, uv); + t += carry; + c[14] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[4], uv, carry, uv); + t += carry; + MULADD128(a[10], b[5], uv, carry, uv); + t += carry; + MULADD128(a[9], b[6], uv, carry, uv); + t += carry; + MULADD128(a[8], b[7], uv, carry, uv); + t += carry; + MULADD128(a[7], b[8], uv, carry, uv); + t += carry; + MULADD128(a[6], b[9], uv, carry, uv); + t += carry; + MULADD128(a[5], b[10], uv, carry, uv); + t += carry; + MULADD128(a[4], b[11], uv, carry, uv); + t += carry; + c[15] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[5], uv, carry, uv); + t += carry; + MULADD128(a[10], b[6], uv, carry, uv); + t += carry; + MULADD128(a[9], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[9], uv, carry, uv); + t += carry; + MULADD128(a[6], b[10], uv, carry, uv); + t += carry; + MULADD128(a[5], b[11], uv, carry, uv); + t += carry; + c[16] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[6], uv, carry, uv); + t += carry; + MULADD128(a[10], b[7], uv, carry, uv); + t += carry; + MULADD128(a[9], b[8], uv, carry, uv); + t += carry; + MULADD128(a[8], b[9], uv, carry, uv); + t += carry; + MULADD128(a[7], b[10], uv, carry, uv); + t += carry; + MULADD128(a[6], b[11], uv, carry, uv); + t += carry; + c[17] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[7], uv, carry, uv); + t += carry; + MULADD128(a[10], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[10], uv, carry, uv); + t += carry; + MULADD128(a[7], b[11], uv, carry, uv); + t += carry; + c[18] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[8], uv, carry, uv); + t += carry; + MULADD128(a[10], b[9], uv, carry, uv); + t += carry; + MULADD128(a[9], b[10], uv, carry, uv); + t += carry; + MULADD128(a[8], b[11], uv, carry, uv); + t += carry; + c[19] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[9], uv, carry, uv); + t += carry; + MULADD128(a[10], b[10], uv, carry, uv); + t += carry; + MULADD128(a[9], b[11], uv, carry, uv); + t += carry; + c[20] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[10], uv, carry, uv); + t += carry; + MULADD128(a[10], b[11], uv, carry, uv); + t += carry; + c[21] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[11], b[11], uv, carry, uv); + c[22] = uv[0]; + c[23] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + mul751_asm(a, b, c); + +#endif +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Montgomery reduction exploiting special form of the prime. + // mc = ma*R^-1 mod p751x2, where R = 2^768. + // If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1]. + // ma is assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + mc[3] = ma[3]; + mc[4] = ma[4]; + MUL128(mc[0], ((digit_t*)p751p1)[5], uv); + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[6], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[13], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[14], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[15], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[16], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[17], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[7], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[18], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[8], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[19], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[9], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[20], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[10], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[21], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[11], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[22], carry, mc[10]); + ADDC(carry, uv[1], 0, carry, uv[1]); + ADDC(0, uv[1], ma[23], carry, mc[11]); + +#elif (OS_TARGET == OS_LINUX) + + rdc751_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/SIKE_sw/src/P751/AMD64/fp_x64_asm.S b/SIKE_sw/src/P751/AMD64/fp_x64_asm.S new file mode 100644 index 0000000..cca6992 --- /dev/null +++ b/SIKE_sw/src/P751/AMD64/fp_x64_asm.S @@ -0,0 +1,3147 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// Copyright (c) Microsoft Corporation +// +// Website: https://github.com/microsoft/PQCrypto-SIDH +// Released under MIT license +// +// Abstract: field arithmetic in x64 assembly for P751 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +// Format function and variable names for Mac OS X +#if defined(__APPLE__) + #define fmt(f) _##f +#else + #define fmt(f) f +#endif + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(fpadd751_asm) +fmt(fpadd751_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rcx, [reg_p2+64] + mov rax, [reg_p1+72] + adc rax, [reg_p2+72] + mov [reg_p3+72], rax + mov rax, [reg_p1+80] + adc rax, [reg_p2+80] + mov [reg_p3+80], rax + mov rax, [reg_p1+88] + adc rax, [reg_p2+88] + mov [reg_p3+88], rax + + mov rax, [rip+fmt(p751x2)] + sub r8, rax + mov rax, [rip+fmt(p751x2)+8] + sbb r9, rax + sbb r10, rax + sbb r11, rax + sbb r12, rax + mov rax, [rip+fmt(p751x2)+40] + sbb r13, rax + mov rax, [rip+fmt(p751x2)+48] + sbb r14, rax + mov rax, [rip+fmt(p751x2)+56] + sbb r15, rax + mov rax, [rip+fmt(p751x2)+64] + sbb rcx, rax + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rcx + mov r8, [reg_p3+72] + mov r9, [reg_p3+80] + mov r10, [reg_p3+88] + mov rax, [rip+fmt(p751x2)+72] + sbb r8, rax + mov rax, [rip+fmt(p751x2)+80] + sbb r9, rax + mov rax, [rip+fmt(p751x2)+88] + sbb r10, rax + mov [reg_p3+72], r8 + mov [reg_p3+80], r9 + mov [reg_p3+88], r10 + mov rax, 0 + sbb rax, 0 + + mov rsi, [rip+fmt(p751x2)] + and rsi, rax + mov r8, [rip+fmt(p751x2)+8] + and r8, rax + mov r9, [rip+fmt(p751x2)+40] + and r9, rax + mov r10, [rip+fmt(p751x2)+48] + and r10, rax + mov r11, [rip+fmt(p751x2)+56] + and r11, rax + mov r12, [rip+fmt(p751x2)+64] + and r12, rax + mov r13, [rip+fmt(p751x2)+72] + and r13, rax + mov r14, [rip+fmt(p751x2)+80] + and r14, rax + mov r15, [rip+fmt(p751x2)+88] + and r15, rax + + add rsi, [reg_p3] + mov [reg_p3], rsi + mov rax, [reg_p3+8] + adc rax, r8 + mov [reg_p3+8], rax + mov rax, [reg_p3+16] + adc rax, r8 + mov [reg_p3+16], rax + mov rax, [reg_p3+24] + adc rax, r8 + mov [reg_p3+24], rax + mov rax, [reg_p3+32] + adc rax, r8 + mov [reg_p3+32], rax + adc r9, [reg_p3+40] + adc r10, [reg_p3+48] + adc r11, [reg_p3+56] + adc r12, [reg_p3+64] + adc r13, [reg_p3+72] + adc r14, [reg_p3+80] + adc r15, [reg_p3+88] + mov [reg_p3+40], r9 + mov [reg_p3+48], r10 + mov [reg_p3+56], r11 + mov [reg_p3+64], r12 + mov [reg_p3+72], r13 + mov [reg_p3+80], r14 + mov [reg_p3+88], r15 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(fpsub751_asm) +fmt(fpsub751_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rcx, [reg_p2+64] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rcx + mov rax, [reg_p1+72] + sbb rax, [reg_p2+72] + mov [reg_p3+72], rax + mov rax, [reg_p1+80] + sbb rax, [reg_p2+80] + mov [reg_p3+80], rax + mov rax, [reg_p1+88] + sbb rax, [reg_p2+88] + mov [reg_p3+88], rax + mov rax, 0 + sbb rax, 0 + + mov rsi, [rip+fmt(p751x2)] + and rsi, rax + mov r8, [rip+fmt(p751x2)+8] + and r8, rax + mov r9, [rip+fmt(p751x2)+40] + and r9, rax + mov r10, [rip+fmt(p751x2)+48] + and r10, rax + mov r11, [rip+fmt(p751x2)+56] + and r11, rax + mov r12, [rip+fmt(p751x2)+64] + and r12, rax + mov r13, [rip+fmt(p751x2)+72] + and r13, rax + mov r14, [rip+fmt(p751x2)+80] + and r14, rax + mov r15, [rip+fmt(p751x2)+88] + and r15, rax + + mov rax, [reg_p3] + add rax, rsi + mov [reg_p3], rax + mov rax, [reg_p3+8] + adc rax, r8 + mov [reg_p3+8], rax + mov rax, [reg_p3+16] + adc rax, r8 + mov [reg_p3+16], rax + mov rax, [reg_p3+24] + adc rax, r8 + mov [reg_p3+24], rax + mov rax, [reg_p3+32] + adc rax, r8 + mov [reg_p3+32], rax + adc r9, [reg_p3+40] + adc r10, [reg_p3+48] + adc r11, [reg_p3+56] + adc r12, [reg_p3+64] + adc r13, [reg_p3+72] + adc r14, [reg_p3+80] + adc r15, [reg_p3+88] + mov [reg_p3+40], r9 + mov [reg_p3+48], r10 + mov [reg_p3+56], r11 + mov [reg_p3+64], r12 + mov [reg_p3+72], r13 + mov [reg_p3+80], r14 + mov [reg_p3+88], r15 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +///////////////////////////////////////////////////////////////// MACRO +.macro SUB751_PX P0 + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + setc al + + mov r14, [rip+\P0] + mov r15, [rip+\P0+8] + add r8, r14 + adc r9, r15 + adc r10, r15 + adc r11, r15 + adc r12, r15 + mov r14, [rip+\P0+40] + adc r13, r14 + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + setc cl + + bt rax, 0 + mov r8, [reg_p1+48] + mov r9, [reg_p1+56] + mov r10, [reg_p1+64] + mov r11, [reg_p1+72] + mov r12, [reg_p1+80] + mov r13, [reg_p1+88] + sbb r8, [reg_p2+48] + sbb r9, [reg_p2+56] + sbb r10, [reg_p2+64] + sbb r11, [reg_p2+72] + sbb r12, [reg_p2+80] + sbb r13, [reg_p2+88] + + bt rcx, 0 + mov r14, [rip+\P0+48] + mov r15, [rip+\P0+56] + adc r8, r14 + adc r9, r15 + mov r14, [rip+\P0+64] + mov r15, [rip+\P0+72] + adc r10, r14 + adc r11, r15 + mov r14, [rip+\P0+80] + mov r15, [rip+\P0+88] + adc r12, r14 + adc r13, r15 + mov [reg_p3+48], r8 + mov [reg_p3+56], r9 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], r12 + mov [reg_p3+88], r13 + + pop r15 + pop r14 + pop r13 + pop r12 + .endm + + +//*********************************************************************** +// Multiprecision subtraction with correction with 2*p751 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p751 +//*********************************************************************** +.global fmt(mp_sub751_p2_asm) +fmt(mp_sub751_p2_asm): + + SUB751_PX fmt(p751x2) + ret + + +//*********************************************************************** +// Multiprecision subtraction with correction with 4*p751 +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p751 +//*********************************************************************** +.global fmt(mp_sub751_p4_asm) +fmt(mp_sub751_p4_asm): + + SUB751_PX fmt(p751x4) + ret + + +#ifdef _MULX_ + +/////////////////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C +// Temps: stack space for two 64-bit values (case w/o _ADX_), regs T0:T7 +/////////////////////////////////////////////////////////////////////////// +#ifdef _ADX_ + +.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + adox \T0, \T3 + adox \T2, \T5 + mulx \T1, \T3, 24\M1 + adox \T4, \T3 + mulx \T5, \T6, 32\M1 + adox \T1, \T6 + mulx \T3, \T7, 40\M1 + adox \T5, \T7 + adox \T3, rax + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 8\C, \T0 // C1_final + adcx \T2, \T6 + mulx \T6, \T7, 8\M1 + adox \T2, \T7 + adcx \T4, \T6 + mulx \T0, \T6, 16\M1 + adox \T4, \T6 + adcx \T0, \T1 + mulx \T1, \T7, 24\M1 + adcx \T1, \T5 + mulx \T5, \T6, 32\M1 + adcx \T3, \T5 + mulx \T5, rdx, 40\M1 + adcx \T5, rax + + adox \T0, \T7 + adox \T1, \T6 + adox \T3, rdx + adox \T5, rax + + mov rdx, 16\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T2, \T7 + mov 16\C, \T2 // C2_final + adcx \T4, \T6 + mulx \T6, \T7, 8\M1 + adox \T4, \T7 + adcx \T0, \T6 + mulx \T2, \T6, 16\M1 + adox \T0, \T6 + adcx \T1, \T2 + mulx \T2, \T7, 24\M1 + adcx \T3, \T2 + mulx \T2, \T6, 32\M1 + adcx \T5, \T2 + mulx \T2, rdx, 40\M1 + adcx \T2, rax + + adox \T1, \T7 + adox \T3, \T6 + adox \T5, rdx + adox \T2, rax + + mov rdx, 24\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T4, \T7 + mov 24\C, \T4 // C3_final + adcx \T0, \T6 + mulx \T6, \T7, 8\M1 + adox \T0, \T7 + adcx \T1, \T6 + mulx \T4, \T6, 16\M1 + adox \T1, \T6 + adcx \T3, \T4 + mulx \T4, \T7, 24\M1 + adcx \T5, \T4 + mulx \T4, \T6, 32\M1 + adcx \T2, \T4 + mulx \T4, rdx, 40\M1 + adcx \T4, rax + + adox \T3, \T7 + adox \T5, \T6 + adox \T2, rdx + adox \T4, rax + + mov rdx, 32\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 32\C, \T0 // C4_final + adcx \T1, \T6 + mulx \T6, \T7, 8\M1 + adox \T1, \T7 + adcx \T3, \T6 + mulx \T0, \T6, 16\M1 + adox \T3, \T6 + adcx \T5, \T0 + mulx \T0, \T7, 24\M1 + adcx \T2, \T0 + mulx \T0, \T6, 32\M1 + adcx \T4, \T0 + mulx \T0, rdx, 40\M1 + adcx \T0, rax + + adox \T5, \T7 + adox \T2, \T6 + adox \T4, rdx + adox \T0, rax + + mov rdx, 40\M0 + mulx \T6, \T7, \M1 + xor rax, rax + adcx \T1, \T7 + mov 40\C, \T1 // C5_final + adcx \T3, \T6 + mulx \T6, \T7, 8\M1 + adox \T3, \T7 + adcx \T5, \T6 + mulx \T1, \T6, 16\M1 + adox \T5, \T6 + adcx \T2, \T1 + mulx \T1, \T7, 24\M1 + adcx \T4, \T1 + mulx \T1, \T6, 32\M1 + adcx \T0, \T1 + mulx \T1, rdx, 40\M1 + adcx \T1, rax + + adox \T2, \T7 + adox \T4, \T6 + adox \T0, rdx + adox \T1, rax + mov 48\C, \T3 + mov 56\C, \T5 + mov 64\C, \T2 + mov 72\C, \T4 + mov 80\C, \T0 + mov 88\C, \T1 +.endm + +#else + +.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + add \T0, \T3 + adc \T2, \T5 + mulx \T1, \T3, 24\M1 + adc \T4, \T3 + mulx \T5, \T6, 32\M1 + adc \T1, \T6 + mulx \T3, \T7, 40\M1 + adc \T5, \T7 + adc \T3, rax + + mov rdx, 8\M0 + mulx \T6, \T7, \M1 + add \T0, \T7 + mov 8\C, \T0 // C1_final + adc \T2, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T4, \T6 + mulx \T0, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T0, \T1 + mulx \T1, \T7, 24\M1 + adc \T1, \T5 + mulx \T5, \T6, 32\M1 + adc \T3, \T5 + mulx \T5, rdx, 40\M1 + adc \T5, rax + + xor rax, rax + add \T2, \S + adc \T4, 8\S + adc \T0, \T7 + adc \T1, \T6 + adc \T3, rdx + adc \T5, rax + + mov rdx, 16\M0 + mulx \T6, \T7, \M1 + add \T2, \T7 + mov 16\C, \T2 // C2_final + adc \T4, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T0, \T6 + mulx \T2, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T1, \T2 + mulx \T2, \T7, 24\M1 + adc \T3, \T2 + mulx \T2, \T6, 32\M1 + adc \T5, \T2 + mulx \T2, rdx, 40\M1 + adc \T2, rax + + xor rax, rax + add \T4, \S + adc \T0, 8\S + adc \T1, \T7 + adc \T3, \T6 + adc \T5, rdx + adc \T2, rax + + mov rdx, 24\M0 + mulx \T6, \T7, \M1 + add \T4, \T7 + mov 24\C, \T4 // C3_final + adc \T0, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T1, \T6 + mulx \T4, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T3, \T4 + mulx \T4, \T7, 24\M1 + adc \T5, \T4 + mulx \T4, \T6, 32\M1 + adc \T2, \T4 + mulx \T4, rdx, 40\M1 + adc \T4, rax + + xor rax, rax + add \T0, \S + adc \T1, 8\S + adc \T3, \T7 + adc \T5, \T6 + adc \T2, rdx + adc \T4, rax + + mov rdx, 32\M0 + mulx \T6, \T7, \M1 + add \T0, \T7 + mov 32\C, \T0 // C4_final + adc \T1, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T3, \T6 + mulx \T0, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T5, \T0 + mulx \T0, \T7, 24\M1 + adc \T2, \T0 + mulx \T0, \T6, 32\M1 + adc \T4, \T0 + mulx \T0, rdx, 40\M1 + adc \T0, rax + + xor rax, rax + add \T1, \S + adc \T3, 8\S + adc \T5, \T7 + adc \T2, \T6 + adc \T4, rdx + adc \T0, rax + + mov rdx, 40\M0 + mulx \T6, \T7, \M1 + add \T1, \T7 + mov 40\C, \T1 // C5_final + adc \T3, \T6 + mulx \T6, \T7, 8\M1 + mov \S, \T7 // store T7 + adc \T5, \T6 + mulx \T1, \T6, 16\M1 + mov 8\S, \T6 // store T6 + adc \T2, \T1 + mulx \T1, \T7, 24\M1 + adc \T4, \T1 + mulx \T1, \T6, 32\M1 + adc \T0, \T1 + mulx \T1, rdx, 40\M1 + adc \T1, rax + + add \T3, \S + adc \T5, 8\S + adc \T2, \T7 + adc \T4, \T6 + adc \T0, rdx + adc \T1, 0 + mov 48\C, \T3 + mov 56\C, \T5 + mov 64\C, \T2 + mov 72\C, \T4 + mov 80\C, \T0 + mov 88\C, \T1 +.endm + +#endif + + +//***************************************************************************** +// 751-bit multiplication using Karatsuba (one level), schoolbook (two levels) +//***************************************************************************** +.global fmt(mul751_asm) +fmt(mul751_asm): + push r12 + push r13 + push r14 + push r15 + mov rcx, reg_p3 + + // [rsp] <- AH + AL, rax <- mask + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + push rbx + push rbp + sub rsp, 152 + add r8, [reg_p1+48] + adc r9, [reg_p1+56] + adc r10, [reg_p1+64] + adc r11, [reg_p1+72] + adc r12, [reg_p1+80] + adc r13, [reg_p1+88] + sbb rax, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + mov [rsp+32], r12 + mov [rsp+40], r13 + + // [rsp+48] <- BH + BL, rdx <- mask + xor rdx, rdx + mov r8, [reg_p2] + mov r9, [reg_p2+8] + mov rbx, [reg_p2+16] + mov rbp, [reg_p2+24] + mov r14, [reg_p2+32] + mov r15, [reg_p2+40] + add r8, [reg_p2+48] + adc r9, [reg_p2+56] + adc rbx, [reg_p2+64] + adc rbp, [reg_p2+72] + adc r14, [reg_p2+80] + adc r15, [reg_p2+88] + sbb rdx, 0 + mov [rsp+48], r8 + mov [rsp+56], r9 + mov [rsp+64], rbx + mov [rsp+72], rbp + mov [rsp+80], r14 + mov [rsp+88], r15 + + // [rcx] <- masked (BH + BL) + and r8, rax + and r9, rax + and rbx, rax + and rbp, rax + and r14, rax + and r15, rax + mov [rcx], r8 + mov [rcx+8], r9 + + // r8-r13 <- masked (AH + AL) + mov r8, [rsp] + mov r9, [rsp+8] + and r8, rdx + and r9, rdx + and r10, rdx + and r11, rdx + and r12, rdx + and r13, rdx + + // [rsp+96] <- masked (AH + AL) + masked (AH + AL) + mov rax, [rcx] + mov rdx, [rcx+8] + add r8, rax + adc r9, rdx + adc r10, rbx + adc r11, rbp + adc r12, r14 + adc r13, r15 + mov [rsp+96], r8 + mov [rsp+104], r9 + mov [rsp+112], r10 + mov [rsp+120], r11 + + // [rcx] <- AL x BL + MUL384_SCHOOL [reg_p1], [reg_p2], [rcx], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15 // Result C0-C5 + + // [rcx+96] <- (AH+AL) x (BH+BL), low part + MUL384_SCHOOL [rsp], [rsp+48], [rcx+96], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15 + + // [rsp] <- AH x BH + MUL384_SCHOOL [reg_p1+48], [reg_p2+48], [rsp], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15 + + // r8-r13 <- (AH+AL) x (BH+BL), final step + mov r8, [rsp+96] + mov r9, [rsp+104] + mov r10, [rsp+112] + mov r11, [rsp+120] + mov rax, [rcx+144] + add r8, rax + mov rax, [rcx+152] + adc r9, rax + mov rax, [rcx+160] + adc r10, rax + mov rax, [rcx+168] + adc r11, rax + mov rax, [rcx+176] + adc r12, rax + mov rax, [rcx+184] + adc r13, rax + + // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL + mov rdi, [rcx+96] + sub rdi, [rcx] + mov rdx, [rcx+104] + sbb rdx, [rcx+8] + mov rbx, [rcx+112] + sbb rbx, [rcx+16] + mov rbp, [rcx+120] + sbb rbp, [rcx+24] + mov r14, [rcx+128] + sbb r14, [rcx+32] + mov r15, [rcx+136] + sbb r15, [rcx+40] + sbb r8, [rcx+48] + sbb r9, [rcx+56] + sbb r10, [rcx+64] + sbb r11, [rcx+72] + sbb r12, [rcx+80] + sbb r13, [rcx+88] + + // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub rdi, [rsp] + sbb rdx, [rsp+8] + sbb rbx, [rsp+16] + sbb rbp, [rsp+24] + sbb r14, [rsp+32] + sbb r15, [rsp+40] + sbb r8, [rsp+48] + sbb r9, [rsp+56] + sbb r10, [rsp+64] + sbb r11, [rsp+72] + sbb r12, [rsp+80] + sbb r13, [rsp+88] + + mov rax, [rcx+48] + add rax, rdi + mov [rcx+48], rax // Result C6-C11 + mov rax, [rcx+56] + adc rax, rdx + mov [rcx+56], rax + mov rax, [rcx+64] + adc rax, rbx + mov [rcx+64], rax + mov rax, [rcx+72] + adc rax, rbp + mov [rcx+72], rax + mov rax, [rcx+80] + adc rax, r14 + mov [rcx+80], rax + mov rax, [rcx+88] + adc rax, r15 + mov [rcx+88], rax + mov rax, [rsp] + adc r8, rax + mov [rcx+96], r8 // Result C8-C15 + mov rax, [rsp+8] + adc r9, rax + mov [rcx+104], r9 + mov rax, [rsp+16] + adc r10, rax + mov [rcx+112], r10 + mov rax, [rsp+24] + adc r11, rax + mov [rcx+120], r11 + mov rax, [rsp+32] + adc r12, rax + mov [rcx+128], r12 + mov rax, [rsp+40] + adc r13, rax + mov [rcx+136], r13 + mov r8, [rsp+48] + mov r9, [rsp+56] + mov r10, [rsp+64] + mov r11, [rsp+72] + mov r12, [rsp+80] + mov r13, [rsp+88] + adc r8, 0 + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc r12, 0 + adc r13, 0 + add rsp, 152 + mov [rcx+144], r8 + mov [rcx+152], r9 + mov [rcx+160], r10 + mov [rcx+168], r11 + mov [rcx+176], r12 + mov [rcx+184], r13 + + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#else + +//*********************************************************************** +// Integer multiplication +// Based on Karatsuba method +// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] +// NOTE: a=c or b=c are not allowed +//*********************************************************************** +.global fmt(mul751_asm) +fmt(mul751_asm): + push r12 + push r13 + push r14 + mov rcx, reg_p3 + + // rcx[0-5] <- AH+AL + xor rax, rax + mov r8, [reg_p1+48] + mov r9, [reg_p1+56] + mov r10, [reg_p1+64] + mov r11, [reg_p1+72] + mov r12, [reg_p1+80] + mov r13, [reg_p1+88] + add r8, [reg_p1] + adc r9, [reg_p1+8] + adc r10, [reg_p1+16] + adc r11, [reg_p1+24] + adc r12, [reg_p1+32] + adc r13, [reg_p1+40] + push r15 + mov [rcx], r8 + mov [rcx+8], r9 + mov [rcx+16], r10 + mov [rcx+24], r11 + mov [rcx+32], r12 + mov [rcx+40], r13 + sbb rax, 0 + sub rsp, 96 // Allocating space in stack + + // rcx[6-11] <- BH+BL + xor rdx, rdx + mov r8, [reg_p2+48] + mov r9, [reg_p2+56] + mov r10, [reg_p2+64] + mov r11, [reg_p2+72] + mov r12, [reg_p2+80] + mov r13, [reg_p2+88] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + mov [rcx+48], r8 + mov [rcx+56], r9 + mov [rcx+64], r10 + mov [rcx+72], r11 + mov [rcx+80], r12 + mov [rcx+88], r13 + sbb rdx, 0 + mov [rsp+80], rax + mov [rsp+88], rdx + + // (rsp[0-8],r10,r8,r9) <- (AH+AL)*(BH+BL) + mov r11, [rcx] + mov rax, r8 + mul r11 + mov [rsp], rax // c0 + mov r14, rdx + + xor r15, r15 + mov rax, r9 + mul r11 + xor r9, r9 + add r14, rax + adc r9, rdx + + mov r12, [rcx+8] + mov rax, r8 + mul r12 + add r14, rax + mov [rsp+8], r14 // c1 + adc r9, rdx + adc r15, 0 + + xor r8, r8 + mov rax, r10 + mul r11 + add r9, rax + mov r13, [rcx+48] + adc r15, rdx + adc r8, 0 + + mov rax, [rcx+16] + mul r13 + add r9, rax + adc r15, rdx + mov rax, [rcx+56] + adc r8, 0 + + mul r12 + add r9, rax + mov [rsp+16], r9 // c2 + adc r15, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+72] + mul r11 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+24] + mul r13 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov rax, r10 + mul r12 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov r14, [rcx+16] + mov rax, [rcx+56] + mul r14 + add r15, rax + mov [rsp+24], r15 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [rcx+80] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+64] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [rcx+48] + mov rax, [rcx+32] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+72] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [rcx+24] + mov rax, [rcx+56] + mul r13 + add r8, rax + mov [rsp+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [rcx+88] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+72] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+40] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+80] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r15, [rcx+32] + mov rax, [rcx+56] + mul r15 + add r9, rax + mov [rsp+40], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+64] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+88] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+80] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r11, [rcx+40] + mov rax, [rcx+56] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+72] + mul r13 + add r10, rax + mov [rsp+48], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [rcx+88] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+64] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+72] + mul r15 + add r8, rax + mov [rsp+56], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [rcx+72] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+80] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+88] + mul r13 + add r9, rax + mov [rsp+64], r9 // c8 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+88] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+80] + mul r11 + add r10, rax // c9 + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+88] + mul r11 + add r8, rax // c10 + adc r9, rdx // c11 + + mov rax, [rsp+88] + mov rdx, [rcx] + and r12, rax + and r14, rax + and rdx, rax + and r13, rax + and r15, rax + and r11, rax + mov rax, [rsp+48] + add rdx, rax + mov rax, [rsp+56] + adc r12, rax + mov rax, [rsp+64] + adc r14, rax + adc r13, r10 + adc r15, r8 + adc r11, r9 + mov rax, [rsp+80] + mov [rsp+48], rdx + mov [rsp+56], r12 + mov [rsp+64], r14 + mov [rsp+72], r13 + mov [rsp+80], r15 + mov [rsp+88], r11 + + mov r8, [rcx+48] + mov r9, [rcx+56] + mov r10, [rcx+64] + mov r11, [rcx+72] + mov r12, [rcx+80] + mov r13, [rcx+88] + and r8, rax + and r9, rax + and r10, rax + and r11, rax + and r12, rax + and r13, rax + mov rax, [rsp+48] + add r8, rax + mov rax, [rsp+56] + adc r9, rax + mov rax, [rsp+64] + adc r10, rax + mov rax, [rsp+72] + adc r11, rax + mov rax, [rsp+80] + adc r12, rax + mov rax, [rsp+88] + adc r13, rax + mov [rsp+48], r8 + mov [rsp+56], r9 + mov [rsp+72], r11 + + // rcx[0-11] <- AL*BL + mov r11, [reg_p1] + mov rax, [reg_p2] + mul r11 + xor r9, r9 + mov [rcx], rax // c0 + mov [rsp+64], r10 + mov r8, rdx + + mov rax, [reg_p2+8] + mul r11 + xor r10, r10 + add r8, rax + mov [rsp+80], r12 + adc r9, rdx + + mov r12, [reg_p1+8] + mov rax, [reg_p2] + mul r12 + add r8, rax + mov [rcx+8], r8 // c1 + adc r9, rdx + mov [rsp+88], r13 + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+16] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2] + mov rax, [reg_p1+16] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+8] + mul r12 + add r9, rax + mov [rcx+16], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+24] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p1+24] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+16] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+16] + mov rax, [reg_p2+8] + mul r14 + add r10, rax + mov [rcx+24], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+32] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+16] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p1+32] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+24] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [reg_p1+24] + mov rax, [reg_p2+8] + mul r13 + add r8, rax + mov [rcx+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+40] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+16] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+24] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r11, [reg_p1+40] + mov rax, [reg_p2] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+32] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r15, [reg_p1+32] + mov rax, [reg_p2+8] + mul r15 + add r9, rax + mov [rcx+40], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+16] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+40] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+32] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+8] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+24] + mul r13 + add r10, rax + mov [rcx+48], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+40] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+16] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+32] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+24] + mul r15 + add r8, rax + mov [rcx+56], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+24] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+32] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+40] + mul r13 + add r9, rax + mov [rcx+64], r9 // c8 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+40] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+32] + mul r11 + add r10, rax + mov [rcx+72], r10 // c9 + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+40] + mul r11 + add r8, rax + mov [rcx+80], r8 // c10 + adc r9, rdx + mov [rcx+88], r9 // c11 + + // rcx[12-23] <- AH*BH + mov r11, [reg_p1+48] + mov rax, [reg_p2+48] + mul r11 + xor r9, r9 + mov [rcx+96], rax // c0 + mov r8, rdx + + mov rax, [reg_p2+56] + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+56] + mov rax, [reg_p2+48] + mul r12 + add r8, rax + mov [rcx+104], r8 // c1 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+64] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2+48] + mov rax, [reg_p1+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+56] + mul r12 + add r9, rax + mov [rcx+112], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+72] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p1+72] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+64] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+64] + mov rax, [reg_p2+56] + mul r14 + add r10, rax + mov [rcx+120], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+80] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+64] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p1+80] + mov rax, r13 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+72] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [reg_p1+72] + mov rax, [reg_p2+56] + mul r13 + add r8, rax + mov [rcx+128], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+88] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+72] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r11, [reg_p1+88] + mov rax, [reg_p2+48] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+80] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+56] + mul r15 + add r9, rax + mov [rcx+136], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+64] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+88] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+80] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+56] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+72] + mul r13 + add r10, rax + mov [rcx+144], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+88] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+64] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+72] + mul r15 + add r8, rax + mov [rcx+152], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+72] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+80] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+88] + mul r13 + add r9, rax + mov [rcx+160], r9 // c8 + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+88] + mul r15 + add r10, rax + adc r8, rdx + + mov rax, [reg_p2+80] + mul r11 + add r10, rax + mov [rcx+168], r10 // c9 + adc r8, rdx + + mov rax, [reg_p2+88] + mul r11 + add r8, rax + mov [rcx+176], r8 // c10 + adc rdx, 0 + mov [rcx+184], rdx // c11 + + // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL + mov r8, [rsp] + sub r8, [rcx] + mov r9, [rsp+8] + sbb r9, [rcx+8] + mov r10, [rsp+16] + sbb r10, [rcx+16] + mov r11, [rsp+24] + sbb r11, [rcx+24] + mov r12, [rsp+32] + sbb r12, [rcx+32] + mov r13, [rsp+40] + sbb r13, [rcx+40] + mov r14, [rsp+48] + sbb r14, [rcx+48] + mov r15, [rsp+56] + sbb r15, [rcx+56] + mov rax, [rsp+64] + sbb rax, [rcx+64] + mov rdx, [rsp+72] + sbb rdx, [rcx+72] + mov rdi, [rsp+80] + sbb rdi, [rcx+80] + mov rsi, [rsp+88] + sbb rsi, [rcx+88] + mov [rsp], rsi + + // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH + mov rsi, [rcx+96] + sub r8, rsi + mov rsi, [rcx+104] + sbb r9, rsi + mov rsi, [rcx+112] + sbb r10, rsi + mov rsi, [rcx+120] + sbb r11, rsi + mov rsi, [rcx+128] + sbb r12, rsi + mov rsi, [rcx+136] + sbb r13, rsi + mov rsi, [rcx+144] + sbb r14, rsi + mov rsi, [rcx+152] + sbb r15, rsi + mov rsi, [rcx+160] + sbb rax, rsi + mov rsi, [rcx+168] + sbb rdx, rsi + mov rsi, [rcx+176] + sbb rdi, rsi + mov rsi, [rsp] + sbb rsi, [rcx+184] + + // Final result + add r8, [rcx+48] + mov [rcx+48], r8 + adc r9, [rcx+56] + mov [rcx+56], r9 + adc r10, [rcx+64] + mov [rcx+64], r10 + adc r11, [rcx+72] + mov [rcx+72], r11 + adc r12, [rcx+80] + mov [rcx+80], r12 + adc r13, [rcx+88] + mov [rcx+88], r13 + adc r14, [rcx+96] + mov [rcx+96], r14 + adc r15, [rcx+104] + mov [rcx+104], r15 + adc rax, [rcx+112] + mov [rcx+112], rax + adc rdx, [rcx+120] + mov [rcx+120], rdx + adc rdi, [rcx+128] + mov [rcx+128], rdi + adc rsi, [rcx+136] + mov [rcx+136], rsi + mov rax, [rcx+144] + adc rax, 0 + mov [rcx+144], rax + mov rax, [rcx+152] + adc rax, 0 + mov [rcx+152], rax + mov rax, [rcx+160] + adc rax, 0 + mov [rcx+160], rax + mov rax, [rcx+168] + adc rax, 0 + mov [rcx+168], rax + mov rax, [rcx+176] + adc rax, 0 + mov [rcx+176], rax + mov rax, [rcx+184] + adc rax, 0 + mov [rcx+184], rax + + add rsp, 96 // Restoring space in stack + pop r15 + pop r14 + pop r13 + pop r12 + ret + +#endif + + +#ifdef _MULX_ + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: memory locations C, C+8, C+16, and regs T0:T7 +// Temps: memory locations regs T7:T9 +///////////////////////////////////////////////////////////////// +#ifdef _ADX_ + +.macro MUL256x448_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + adox \T0, \T3 + adox \T2, \T5 + mulx \T1, \T3, 24\M1 + adox \T4, \T3 + mulx \T5, \T6, 32\M1 + adox \T1, \T6 + mulx \T3, \T7, 40\M1 + adox \T5, \T7 + mulx \T6, \T8, 48\M1 + adox \T3, \T8 + adox \T6, rax + + mov rdx, 8\M0 + mulx \T8, \T7, \M1 + xor rax, rax + adcx \T0, \T7 + mov 8\C, \T0 // C1_final + adcx \T2, \T8 + mulx \T7, \T8, 8\M1 + adox \T2, \T8 + adcx \T4, \T7 + mulx \T0, \T8, 16\M1 + adox \T4, \T8 + adcx \T0, \T1 + mulx \T1, \T7, 24\M1 + adcx \T1, \T5 + mulx \T5, \T8, 32\M1 + adcx \T3, \T5 + mulx \T5, \T9, 40\M1 + adcx \T6, \T5 + mulx \T5, rdx, 48\M1 + adcx \T5, rax + + adox \T0, \T7 + adox \T1, \T8 + adox \T3, \T9 + adox \T6, rdx + adox \T5, rax + + mov rdx, 16\M0 + mulx \T8, \T7, \M1 + xor rax, rax + adcx \T2, \T7 + mov 16\C, \T2 // C2_final + adcx \T4, \T8 + mulx \T8, \T7, 8\M1 + adox \T4, \T7 + adcx \T0, \T8 + mulx \T2, \T8, 16\M1 + adox \T0, \T8 + adcx \T1, \T2 + mulx \T2, \T7, 24\M1 + adcx \T3, \T2 + mulx \T2, \T8, 32\M1 + adcx \T6, \T2 + mulx \T2, \T9, 40\M1 + adcx \T5, \T2 + mulx \T2, rdx, 48\M1 + adcx \T2, rax + + adox \T1, \T7 + adox \T3, \T8 + adox \T6, \T9 + adox \T5, rdx + adox \T2, rax + + mov rdx, 24\M0 + mulx \T8, \T7, \M1 + xor rax, rax + adcx \T7, \T4 + adcx \T0, \T8 + mulx \T8, \T10, 8\M1 + adox \T0, \T10 + adcx \T1, \T8 + mulx \T4, \T8, 16\M1 + adox \T1, \T8 + adcx \T3, \T4 + mulx \T4, \T10, 24\M1 + adcx \T6, \T4 + mulx \T4, \T8, 32\M1 + adcx \T5, \T4 + mulx \T4, \T9, 40\M1 + adcx \T2, \T4 + mulx \T4, rdx, 48\M1 + adcx \T4, rax + + adox \T3, \T10 + adox \T6, \T8 + adox \T5, \T9 + adox \T2, rdx + adox \T4, rax +.endm + +#else + +.macro MUL256x448_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10 + mov rdx, \M0 + mulx \T0, \T1, \M1 + mulx \T2, \T3, 8\M1 + mov \C, \T1 // C0_final + xor rax, rax + mulx \T4, \T5, 16\M1 + add \T0, \T3 + adc \T2, \T5 + mulx \T1, \T3, 24\M1 + adc \T4, \T3 + mulx \T5, \T6, 32\M1 + adc \T1, \T6 + mulx \T3, \T7, 40\M1 + adc \T5, \T7 + mulx \T6, \T8, 48\M1 + adc \T3, \T8 + adc \T6, rax + + mov rdx, 8\M0 + mulx \T8, \T7, \M1 + add \T0, \T7 + mov 8\C, \T0 // C1_final + adc \T2, \T8 + mulx \T7, \T8, 8\M1 + mov 32\C, \T8 // store + adc \T4, \T7 + mulx \T0, \T8, 16\M1 + mov 40\C, \T8 // store + adc \T0, \T1 + mulx \T1, \T7, 24\M1 + adc \T1, \T5 + mulx \T5, \T8, 32\M1 + adc \T3, \T5 + mulx \T5, \T9, 40\M1 + adc \T6, \T5 + mulx \T5, rdx, 48\M1 + adc \T5, rax + + xor rax, rax + add \T2, 32\C + adc \T4, 40\C + adc \T0, \T7 + adc \T1, \T8 + adc \T3, \T9 + adc \T6, rdx + adc \T5, rax + + mov rdx, 16\M0 + mulx \T8, \T7, \M1 + add \T2, \T7 + mov 16\C, \T2 // C2_final + adc \T4, \T8 + mulx \T8, \T7, 8\M1 + mov 32\C, \T7 // store + adc \T0, \T8 + mulx \T2, \T8, 16\M1 + mov 40\C, \T8 // store + adc \T1, \T2 + mulx \T2, \T7, 24\M1 + adc \T3, \T2 + mulx \T2, \T8, 32\M1 + adc \T6, \T2 + mulx \T2, \T9, 40\M1 + adc \T5, \T2 + mulx \T2, rdx, 48\M1 + adc \T2, rax + + xor rax, rax + add \T4, 32\C + adc \T0, 40\C + adc \T1, \T7 + adc \T3, \T8 + adc \T6, \T9 + adc \T5, rdx + adc \T2, rax + + mov rdx, 24\M0 + mulx \T8, \T7, \M1 + add \T7, \T4 + adc \T0, \T8 + mulx \T8, \T10, 8\M1 + mov 32\C, \T10 // store + adc \T1, \T8 + mulx \T4, \T8, 16\M1 + mov 40\C, \T8 // store + adc \T3, \T4 + mulx \T4, \T10, 24\M1 + adc \T6, \T4 + mulx \T4, \T8, 32\M1 + adc \T5, \T4 + mulx \T4, \T9, 40\M1 + adc \T2, \T4 + mulx \T4, rdx, 48\M1 + adc \T4, rax + + xor rax, rax + add \T0, 32\C + adc \T1, 40\C + adc \T3, \T10 + adc \T6, \T8 + adc \T5, \T9 + adc \T2, rdx + adc \T4, rax +.endm + +#endif + + +//************************************************************************************** +// Montgomery reduction +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//************************************************************************************** +.global fmt(rdc751_asm) +fmt(rdc751_asm): + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + // a[0-3] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 + MUL256x448_SCHOOL [reg_p1], [rip+fmt(p751p1)+40], [reg_p2+48], r8, r9, r13, r10, r14, r12, r11, rbp, rbx, rcx, r15 + + xor r15, r15 + mov rax, [reg_p2+48] + mov rdx, [reg_p2+56] + mov rbx, [reg_p2+64] + add rax, [reg_p1+40] + adc rdx, [reg_p1+48] + adc rbx, [reg_p1+56] + mov [reg_p1+40], rax + mov [reg_p1+48], rdx + mov [reg_p1+56], rbx + adc rbp, [reg_p1+64] + adc r8, [reg_p1+72] + adc r9, [reg_p1+80] + adc r10, [reg_p1+88] + adc r11, [reg_p1+96] + adc r12, [reg_p1+104] + adc r13, [reg_p1+112] + adc r14, [reg_p1+120] + adc r15, [reg_p1+128] + mov [reg_p1+64], rbp + mov [reg_p1+72], r8 + mov [reg_p1+80], r9 + mov [reg_p1+88], r10 + mov [reg_p1+96], r11 + mov [reg_p1+104], r12 + mov [reg_p1+112], r13 + mov [reg_p1+120], r14 + mov [reg_p1+128], r15 + mov r8, [reg_p1+136] + mov r9, [reg_p1+144] + mov r10, [reg_p1+152] + mov r11, [reg_p1+160] + mov r12, [reg_p1+168] + mov r13, [reg_p1+176] + mov r14, [reg_p1+184] + adc r8, 0 + adc r9, 0 + adc r10, 0 + adc r11, 0 + adc r12, 0 + adc r13, 0 + adc r14, 0 + mov [reg_p1+136], r8 + mov [reg_p1+144], r9 + mov [reg_p1+152], r10 + mov [reg_p1+160], r11 + mov [reg_p1+168], r12 + mov [reg_p1+176], r13 + mov [reg_p1+184], r14 + + // a[4-7] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 + MUL256x448_SCHOOL [reg_p1+32], [rip+fmt(p751p1)+40], [reg_p2+48], r8, r9, r13, r10, r14, r12, r11, rbp, rbx, rcx, r15 + + xor r15, r15 + mov rax, [reg_p2+48] + mov rdx, [reg_p2+56] + mov rbx, [reg_p2+64] + add rax, [reg_p1+72] + adc rdx, [reg_p1+80] + adc rbx, [reg_p1+88] + mov [reg_p1+72], rax + mov [reg_p1+80], rdx + mov [reg_p1+88], rbx + adc rbp, [reg_p1+96] + adc r8, [reg_p1+104] + adc r9, [reg_p1+112] + adc r10, [reg_p1+120] + adc r11, [reg_p1+128] + adc r12, [reg_p1+136] + adc r13, [reg_p1+144] + adc r14, [reg_p1+152] + adc r15, [reg_p1+160] + mov [reg_p2], rbp // Final result c0 + mov [reg_p1+104], r8 + mov [reg_p1+112], r9 + mov [reg_p1+120], r10 + mov [reg_p1+128], r11 + mov [reg_p1+136], r12 + mov [reg_p1+144], r13 + mov [reg_p1+152], r14 + mov [reg_p1+160], r15 + mov r12, [reg_p1+168] + mov r13, [reg_p1+176] + mov r14, [reg_p1+184] + adc r12, 0 + adc r13, 0 + adc r14, 0 + mov [reg_p1+168], r12 + mov [reg_p1+176], r13 + mov [reg_p1+184], r14 + + // a[8-11] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 + MUL256x448_SCHOOL [reg_p1+64], [rip+fmt(p751p1)+40], [reg_p2+48], r8, r9, r13, r10, r14, r12, r11, rbp, rbx, rcx, r15 + + // Final result c1:c11 + mov rax, [reg_p2+48] + mov rdx, [reg_p2+56] + mov rbx, [reg_p2+64] + add rax, [reg_p1+104] + adc rdx, [reg_p1+112] + adc rbx, [reg_p1+120] + mov [reg_p2+8], rax + mov [reg_p2+16], rdx + mov [reg_p2+24], rbx + adc rbp, [reg_p1+128] + adc r8, [reg_p1+136] + adc r9, [reg_p1+144] + adc r10, [reg_p1+152] + adc r11, [reg_p1+160] + adc r12, [reg_p1+168] + adc r13, [reg_p1+176] + adc r14, [reg_p1+184] + mov [reg_p2+32], rbp + mov [reg_p2+40], r8 + mov [reg_p2+48], r9 + mov [reg_p2+56], r10 + mov [reg_p2+64], r11 + mov [reg_p2+72], r12 + mov [reg_p2+80], r13 + mov [reg_p2+88], r14 + + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + ret + + #else + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//*********************************************************************** +.global fmt(rdc751_asm) +fmt(rdc751_asm): + push r12 + push r13 + push r14 + push r15 + + mov r11, [reg_p1] + mov rax, [rip+fmt(p751p1)+40] + mul r11 + xor r8, r8 + add rax, [reg_p1+40] + mov [reg_p2+40], rax // z5 + adc r8, rdx + + xor r9, r9 + mov rax, [rip+fmt(p751p1)+48] + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+8] + mov rax, [rip+fmt(p751p1)+40] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+48] + mov [reg_p2+48], r8 // z6 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p751p1)+56] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p1+16] + mov rax, [rip+fmt(p751p1)+40] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+56] + mov [reg_p2+56], r9 // z7 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p751p1)+64] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+24] + mov rax, [rip+fmt(p751p1)+40] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+64] + mov [reg_p2+64], r10 // z8 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p751p1)+72] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p1+32] + mov rax, [rip+fmt(p751p1)+40] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+72] + mov [reg_p2+72], r8 // z9 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p751p1)+80] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rcx, [reg_p2+40] + mov rax, [rip+fmt(p751p1)+40] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+80] + mov [reg_p2+80], r9 // z10 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p751p1)+88] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r11, [reg_p2+48] + mov rax, [rip+fmt(p751p1)+40] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+88] + mov [reg_p2+88], r10 // z11 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p751p1)+88] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r12, [reg_p2+56] + mov rax, [rip+fmt(p751p1)+40] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+96] + mov [reg_p2], r8 // z0 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p751p1)+88] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2+64] + mov rax, [rip+fmt(p751p1)+40] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+104] + mov [reg_p2+8], r9 // z1 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p751p1)+88] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p2+72] + mov rax, [rip+fmt(p751p1)+40] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+112] + mov [reg_p2+16], r10 // z2 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p751p1)+88] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p2+80] + mov rax, [rip+fmt(p751p1)+40] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+120] + mov [reg_p2+24], r8 // z3 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p751p1)+88] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rcx, [reg_p2+88] + mov rax, [rip+fmt(p751p1)+40] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+128] + mov [reg_p2+32], r9 // z4 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p751p1)+88] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+48] + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+136] + mov [reg_p2+40], r10 // z5 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p751p1)+88] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+56] + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+144] + mov [reg_p2+48], r8 // z6 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + mov rax, [rip+fmt(p751p1)+88] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rip+fmt(p751p1)+64] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+152] + mov [reg_p2+56], r9 // z7 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + mov rax, [rip+fmt(p751p1)+88] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rip+fmt(p751p1)+72] + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+160] + mov [reg_p2+64], r10 // z8 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + mov rax, [rip+fmt(p751p1)+88] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+80] + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+168] // z9 + mov [reg_p2+72], r8 // z9 + adc r9, 0 + adc r10, 0 + + mov rax, [rip+fmt(p751p1)+88] + mul rcx + add r9, rax + adc r10, rdx + add r9, [reg_p1+176] // z10 + mov [reg_p2+80], r9 // z10 + adc r10, 0 + add r10, [reg_p1+184] // z11 + mov [reg_p2+88], r10 // z11 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + #endif + + +//*********************************************************************** +// 751-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fmt(mp_add751_asm) +fmt(mp_add751_asm): + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rax, [reg_p1+32] + mov rcx, [reg_p1+40] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc rax, [reg_p2+32] + adc rcx, [reg_p2+40] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rax + mov [reg_p3+40], rcx + + mov r8, [reg_p1+48] + mov r9, [reg_p1+56] + mov r10, [reg_p1+64] + mov r11, [reg_p1+72] + mov rax, [reg_p1+80] + mov rcx, [reg_p1+88] + adc r8, [reg_p2+48] + adc r9, [reg_p2+56] + adc r10, [reg_p2+64] + adc r11, [reg_p2+72] + adc rax, [reg_p2+80] + adc rcx, [reg_p2+88] + mov [reg_p3+48], r8 + mov [reg_p3+56], r9 + mov [reg_p3+64], r10 + mov [reg_p3+72], r11 + mov [reg_p3+80], rax + mov [reg_p3+88], rcx + ret + + +//*********************************************************************** +// 2x751-bit multiprecision subtraction/addition +// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p751*2^768 +//*********************************************************************** +.global fmt(mp_subadd751x2_asm) +fmt(mp_subadd751x2_asm): + push r12 + push r13 + push r14 + push r15 + push rbx + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov rcx, [reg_p1+32] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb rcx, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], rcx + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov rcx, [reg_p1+72] + sbb r8, [reg_p2+40] + sbb r9, [reg_p2+48] + sbb r10, [reg_p2+56] + sbb r11, [reg_p2+64] + sbb rcx, [reg_p2+72] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + mov [reg_p3+64], r11 + mov [reg_p3+72], rcx + + mov r8, [reg_p1+80] + mov r9, [reg_p1+88] + mov r10, [reg_p1+96] + mov r11, [reg_p1+104] + mov rcx, [reg_p1+112] + sbb r8, [reg_p2+80] + sbb r9, [reg_p2+88] + sbb r10, [reg_p2+96] + sbb r11, [reg_p2+104] + sbb rcx, [reg_p2+112] + mov [reg_p3+80], r8 + mov [reg_p3+88], r9 + mov [reg_p3+96], r10 + mov [reg_p3+104], r11 + mov [reg_p3+112], rcx + + mov r8, [reg_p1+120] + mov r9, [reg_p1+128] + mov r10, [reg_p1+136] + mov r11, [reg_p1+144] + mov rcx, [reg_p1+152] + sbb r8, [reg_p2+120] + sbb r9, [reg_p2+128] + sbb r10, [reg_p2+136] + sbb r11, [reg_p2+144] + sbb rcx, [reg_p2+152] + mov [reg_p3+120], r8 + mov [reg_p3+128], r9 + mov [reg_p3+136], r10 + mov [reg_p3+144], r11 + mov [reg_p3+152], rcx + + mov r8, [reg_p1+160] + mov r9, [reg_p1+168] + mov r10, [reg_p1+176] + mov r11, [reg_p1+184] + sbb r8, [reg_p2+160] + sbb r9, [reg_p2+168] + sbb r10, [reg_p2+176] + sbb r11, [reg_p2+184] + sbb rax, 0 + + // Add p751 anded with the mask in rax + mov r12, [rip+fmt(p751)] + mov r13, [rip+fmt(p751)+40] + mov r14, [rip+fmt(p751)+48] + mov r15, [rip+fmt(p751)+56] + mov rdi, [rip+fmt(p751)+64] + mov rsi, [rip+fmt(p751)+72] + mov rbx, [rip+fmt(p751)+80] + mov rcx, [rip+fmt(p751)+88] + and r12, rax + and r13, rax + and r14, rax + and r15, rax + and rdi, rax + and rsi, rax + and rbx, rax + and rcx, rax + mov rax, [reg_p3+96] + add rax, r12 + mov [reg_p3+96], rax + mov rax, [reg_p3+104] + adc rax, r12 + mov [reg_p3+104], rax + mov rax, [reg_p3+112] + adc rax, r12 + mov [reg_p3+112], rax + mov rax, [reg_p3+120] + adc rax, r12 + mov [reg_p3+120], rax + adc r12, [reg_p3+128] + adc r13, [reg_p3+136] + mov [reg_p3+128], r12 + mov [reg_p3+136], r13 + mov r12, [reg_p3+144] + mov r13, [reg_p3+152] + adc r12, r14 + adc r13, r15 + adc r8, rdi + adc r9, rsi + adc r10, rbx + adc r11, rcx + + mov [reg_p3+144], r12 + mov [reg_p3+152], r13 + mov [reg_p3+160], r8 + mov [reg_p3+168], r9 + mov [reg_p3+176], r10 + mov [reg_p3+184], r11 + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + ret + + +//*********************************************************************** +// Double 2x751-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fmt(mp_dblsub751x2_asm) +fmt(mp_dblsub751x2_asm): + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + mov r14, [reg_p3+48] + mov r15, [reg_p3+56] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + sbb r14, [reg_p1+48] + sbb r15, [reg_p1+56] + setc al + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + setc cl + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + + mov r8, [reg_p3+64] + mov r9, [reg_p3+72] + mov r10, [reg_p3+80] + mov r11, [reg_p3+88] + mov r12, [reg_p3+96] + mov r13, [reg_p3+104] + mov r14, [reg_p3+112] + mov r15, [reg_p3+120] + bt rax, 0 + sbb r8, [reg_p1+64] + sbb r9, [reg_p1+72] + sbb r10, [reg_p1+80] + sbb r11, [reg_p1+88] + sbb r12, [reg_p1+96] + sbb r13, [reg_p1+104] + sbb r14, [reg_p1+112] + sbb r15, [reg_p1+120] + setc al + bt rcx, 0 + sbb r8, [reg_p2+64] + sbb r9, [reg_p2+72] + sbb r10, [reg_p2+80] + sbb r11, [reg_p2+88] + sbb r12, [reg_p2+96] + sbb r13, [reg_p2+104] + sbb r14, [reg_p2+112] + sbb r15, [reg_p2+120] + setc cl + mov [reg_p3+64], r8 + mov [reg_p3+72], r9 + mov [reg_p3+80], r10 + mov [reg_p3+88], r11 + mov [reg_p3+96], r12 + mov [reg_p3+104], r13 + mov [reg_p3+112], r14 + mov [reg_p3+120], r15 + + mov r8, [reg_p3+128] + mov r9, [reg_p3+136] + mov r10, [reg_p3+144] + mov r11, [reg_p3+152] + mov r12, [reg_p3+160] + mov r13, [reg_p3+168] + mov r14, [reg_p3+176] + mov r15, [reg_p3+184] + bt rax, 0 + sbb r8, [reg_p1+128] + sbb r9, [reg_p1+136] + sbb r10, [reg_p1+144] + sbb r11, [reg_p1+152] + sbb r12, [reg_p1+160] + sbb r13, [reg_p1+168] + sbb r14, [reg_p1+176] + sbb r15, [reg_p1+184] + bt rcx, 0 + sbb r8, [reg_p2+128] + sbb r9, [reg_p2+136] + sbb r10, [reg_p2+144] + sbb r11, [reg_p2+152] + sbb r12, [reg_p2+160] + sbb r13, [reg_p2+168] + sbb r14, [reg_p2+176] + sbb r15, [reg_p2+184] + mov [reg_p3+128], r8 + mov [reg_p3+136], r9 + mov [reg_p3+144], r10 + mov [reg_p3+152], r11 + mov [reg_p3+160], r12 + mov [reg_p3+168], r13 + mov [reg_p3+176], r14 + mov [reg_p3+184], r15 + + pop r15 + pop r14 + pop r13 + pop r12 + ret \ No newline at end of file diff --git a/SIKE_sw/src/P751/P751.c b/SIKE_sw/src/P751/P751.c new file mode 100644 index 0000000..7ece10c --- /dev/null +++ b/SIKE_sw/src/P751/P751.c @@ -0,0 +1,142 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny parameters and generation of functions for P751 +*********************************************************************************************/ + +#include "P751_api.h" +#include "P751_internal.h" +#include "../internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 751-bit field element is represented with Ceil(751 / 64) = 12 64-bit digits or Ceil(751 / 32) = 24 32-bit digits. + +// +// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=6, B=1, C=1 and p751 = 2^372*3^239-1 +// + +const uint64_t p751[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C }; +const uint64_t p751p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C }; +const uint64_t p751x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF, + 0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38 }; +const uint64_t p751x4[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xBABFFFFFFFFFFFFF, + 0x8FB25A1527E1E2A3, 0x6A566C684FDF31DB, 0x213A619F5BAFA1DB, 0x158AD41172C95D20, 0x384A427E5EEB719A, 0x0001BF975507DC70 }; +const uint64_t p751x16p[2*NWORDS64_FIELD] = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x2A00000000000000, + 0x826D2F56C0F0EAE2, 0xAD4C9CBD81067123, 0xF62CF3052282F124, 0x53A95F7469B516FE, 0x3DADEC0D08A4732F, 0x58AD934557C11C7E, + 0x7F731B89B2DA43F2, 0x51AE9F5F5F6AFF3B, 0xD74319A6C9BCA375, 0x5BAB790796CF84D4, 0xA421554FE2E49CA8, 0x20AD617C8DF437CF, + 0x3AB06E7A12F5FF7B, 0x70A25E037E40347E, 0x51F1D323FB4C1151, 0xAE0D99AA4835FED9, 0xDF5429960D2536B6, 0x000000030E91D466 }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0010000000000000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0xC968549F878A8EEB, 0x59B1A13F7CC76E3E, 0xE9867D6EBE876DA9, 0x2B5045CB25748084, 0x2909F97BADC66856, 0x06FE5D541F71C0E1 }; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p751^2), expressed in Montgomery representation +const uint64_t A_gen[6 * NWORDS64_FIELD] = { 0x884F46B74000BAA8, 0xBA52630F939DEC20, 0xC16FB97BA714A04D, 0x082536745B1AB3DB, 0x1117157F446F9E82, 0xD2F27D621A018490, + 0x6B24AB523D544BCD, 0x9307D6AA2EA85C94, 0xE1A096729528F20F, 0x896446F868F3255C, 0x2401D996B1BFF8A5, 0x00000EF8786A5C0A, // XPA0 + 0xAEB78B3B96F59394, 0xAB26681E29C90B74, 0xE520AC30FDC4ACF1, 0x870AAAE3A4B8111B, 0xF875BDB738D64EFF, 0x50109A7ECD7ED6BC, + 0x4CC64848FF0C56FB, 0xE617CB6C519102C9, 0x9C74B3835921E609, 0xC91DDAE4A35A7146, 0x7FC82A155C1B9129, 0x0000214FA6B980B3, // XPA1 + 0x0F93CC38680A8CA9, 0x762E733822E7FED7, 0xE549F005AC0ADB67, 0x94A71FDD2C43A4ED, 0xD48645C2B04721C5, 0x432DA1FE4D4CA4DC, + 0xBC99655FAA7A80E8, 0xB2C6D502BCFD4823, 0xEE92F40CA2EC8BDB, 0x7B074132EFB6D16C, 0x3340B46FA38A7633, 0x0000215749657F6C, // XQA0 + 0xECFF375BF3079F4C, 0xFBFE74B043E80EF3, 0x17376CBE3C5C7AD1, 0xC06327A7E29CDBF2, 0x2111649C438BF3D4, 0xC1F9298261BA2E97, + 0x1F9FECE869CFD1C2, 0x01A39B4FC9346D62, 0x147CD1D3E82A3C9F, 0xDE84E9D249E533EE, 0x1C48A5ADFB7C578D, 0x000061ACA0B82E1D, // XQA1 + 0x1600C525D41059F1, 0xA596899A0A1D83F7, 0x6BFDEED6D2B23F35, 0x5C7E707270C23910, 0x276CA1A4E8369411, 0xB193651A602925A0, + 0x243D239F1CA1F04A, 0x543DC6DA457860AD, 0xCDA590F325181DE9, 0xD3AB7ACFDA80B395, 0x6C97468580FDDF7B, 0x0000352A3E5C4C77, // XRA0 + 0x9B794F9FD1CC3EE8, 0xDB32E40A9B2FD23E, 0x26192A2542E42B67, 0xA18E94FCA045BCE7, 0x96DC1BC38E7CDA2D, 0x9A1D91B752487DE2, + 0xCC63763987436DA3, 0x1316717AACCC551D, 0xC4C368A4632AFE72, 0x4B6EA85C9CCD5710, 0x7A12CAD582C7BC9A, 0x00001C7E240149BF }; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p751^2), expressed in Montgomery representation +const uint64_t B_gen[6 * NWORDS64_FIELD] = { 0x85691AAF4015F88C, 0x7478C5B8C36E9631, 0x7EF2A185DE4DD6E2, 0x943BBEE46BEB9DC7, 0x1A3EC62798792D22, 0x791BC4B084B31D69, + 0x03DBE6522CEA17C4, 0x04749AA65D665D83, 0x3D52B5C45EF450F3, 0x0B4219848E36947D, 0xA4CF7070466BDE27, 0x0000334B1FA6D193, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0x8E7CB3FA53211340, 0xD67CE54F7A05EEE0, 0xFDDC2C8BCE46FC38, 0x08587FAE3110DF1E, 0xD6B8246FA22B058B, 0x4DAC3ACC905A5DBD, + 0x51D0BF2FADCED3E8, 0xE5A2406DF6484425, 0x907F177584F671B8, 0x4738A2FFCCED051C, 0x2B0067B4177E4853, 0x00002806AC948D3D, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0xB56457016D1D6D1C, 0x03DECCB38F39C491, 0xDFB910AC8A559452, 0xA9D0F17D1FF24883, 0x8562BBAF515C248C, 0x249B2A6DDB1CB67D, + 0x3131AF96FB46835C, 0xE10258398480C3E1, 0xEAB5E2B872D4FAB1, 0xB71E63875FAEB1DF, 0xF8384D4F13757CF6, 0x0000361EC9B09912, // XRB0 + 0x58C967899ED16EF4, 0x81998376DC622A4B, 0x3D1C1DCFE0B12681, 0x9347DEBB953E1730, 0x9ABB344D3A82C2D7, 0xE4881BD2820552B2, + 0x0037247923D90266, 0x2E3156EDB157E5A5, 0xF86A46A7506823F7, 0x8FE5523A7B7F1CFC, 0xFA3CFFA38372F67B, 0x0000692DCE85FFBD }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^768)^2 mod p751 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751 ,0x1F735F1F1EE7FC81, + 0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35 }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000000249ad, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8310000000000000, + 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x00002d5b24bce5e2 }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +80, 48, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, +1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, +1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, +1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, +33, 20, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, +1, 1, 8, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, +1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +112, 63, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, +1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, +1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 31, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, +1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, +2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 49, 31, 16, 8, 4, 2, +1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, +15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, +1, 1, 1, 21, 12, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 3, 2, 1, 1, 1, 1, +2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy751 +#define fpzero fpzero751 +#define fpadd fpadd751 +#define fpsub fpsub751 +#define fpneg fpneg751 +#define fpdiv2 fpdiv2_751 +#define fpcorrection fpcorrection751 +#define fpmul_mont fpmul751_mont +#define fpsqr_mont fpsqr751_mont +#define fpinv_mont fpinv751_mont +#define fpinv_chain_mont fpinv751_chain_mont +#define fp2copy fp2copy751 +#define fp2zero fp2zero751 +#define fp2add fp2add751 +#define fp2sub fp2sub751 +#define mp_sub_p2 mp_sub751_p2 +#define mp_sub_p4 mp_sub751_p4 +#define sub_p4 mp_sub_p4 +#define fp2neg fp2neg751 +#define fp2div2 fp2div2_751 +#define fp2correction fp2correction751 +#define fp2mul_mont fp2mul751_mont +#define fp2sqr_mont fp2sqr751_mont +#define fp2inv_mont fp2inv751_mont +#define fp2inv_mont_ct fp2inv751_mont_ct +#define fp2inv_mont_bingcd fp2inv751_mont_bingcd +#define mp_add_asm mp_add751_asm +#define mp_subaddx2_asm mp_subadd751x2_asm +#define mp_dblsubx2_asm mp_dblsub751x2_asm +#define crypto_kem_keypair crypto_kem_keypair_SIKEp751 +#define crypto_kem_enc crypto_kem_enc_SIKEp751 +#define crypto_kem_dec crypto_kem_dec_SIKEp751 +#define random_mod_order_A random_mod_order_A_SIDHp751 +#define random_mod_order_B random_mod_order_B_SIDHp751 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_SIDHp751 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_SIDHp751 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_SIDHp751 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_SIDHp751 + +#include "../fpx.c" +#include "../ec_isogeny.c" +#include "../sidh.c" +#include "../sike.c" \ No newline at end of file diff --git a/SIKE_sw/src/P751/P751_api.h b/SIKE_sw/src/P751/P751_api.h new file mode 100644 index 0000000..94db7e8 --- /dev/null +++ b/SIKE_sw/src/P751/P751_api.h @@ -0,0 +1,112 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: API header file for P751 +*********************************************************************************************/ + +#ifndef P751_API_H +#define P751_API_H + + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 644 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 564 +#define CRYPTO_BYTES 32 +#define CRYPTO_CIPHERTEXTBYTES 596 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Algorithm name +#define CRYPTO_ALGNAME "SIKEp751" + +// SIKE's key generation +// It produces a private key sk and computes the public key pk. +// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 644 bytes) +// public key pk (CRYPTO_PUBLICKEYBYTES = 564 bytes) +int crypto_kem_keypair_SIKEp751(unsigned char *pk, unsigned char *sk); + +// SIKE's encapsulation +// Input: public key pk (CRYPTO_PUBLICKEYBYTES = 564 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 32 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 596 bytes) +int crypto_kem_enc_SIKEp751(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +// SIKE's decapsulation +// Input: secret key sk (CRYPTO_SECRETKEYBYTES = 644 bytes) +// ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 596 bytes) +// Outputs: shared secret ss (CRYPTO_BYTES = 32 bytes) +int crypto_kem_dec_SIKEp751(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +// Encoding of keys for KEM-based isogeny system "SIKEp751" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p751) are encoded in 94 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p751^2), where a and b are defined over GF(p751), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 32-byte random value, a value in the range [0, 2^Floor(Log(2,3^239))-1] and the public key pk. In the SIKE API, +// private keys are encoded in 644 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p751^2). In the SIKE API, pk is encoded in 564 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 32-byte value. In the SIKE API, ct is encoded in 564 + 32 = 596 octets. +// Shared keys ss consist of a value of 32 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES_A 47 +#define SIDH_SECRETKEYBYTES_B 48 +#define SIDH_PUBLICKEYBYTES 564 +#define SIDH_BYTES 188 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^372 - 1] to be used as Alice's private key +void random_mod_order_A_SIDHp751(unsigned char* random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^239)) - 1] to be used as Bob's private key +void random_mod_order_B_SIDHp751(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^372 - 1], stored in 47 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p751^2) elements encoded in 564 bytes. +int EphemeralKeyGeneration_A_SIDHp751(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^239)) - 1], stored in 48 bytes. +// The public key consists of 3 GF(p751^2) elements encoded in 564 bytes. +int EphemeralKeyGeneration_B_SIDHp751(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^372 - 1], stored in 47 bytes. +// Bob's PublicKeyB consists of 3 GF(p751^2) elements encoded in 564 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p751^2) encoded in 188 bytes. +int EphemeralSecretAgreement_A_SIDHp751(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^239)) - 1], stored in 48 bytes. +// Alice's PublicKeyA consists of 3 GF(p751^2) elements encoded in 564 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p751^2) encoded in 188 bytes. +int EphemeralSecretAgreement_B_SIDHp751(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp751" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p751) are encoded in 94 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p751^2), where a and b are defined over GF(p751), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^372-1] and [0, 2^Floor(Log(2,3^239)) - 1], resp. In the SIDH API, +// Alice's and Bob's private keys are encoded in 47 and 48 octets, resp., in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p751^2). In the SIDH API, they are encoded in 564 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p751^2). In the SIDH API, they are encoded in 188 octets. + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/P751/P751_internal.h b/SIKE_sw/src/P751/P751_internal.h new file mode 100644 index 0000000..a4c7ebd --- /dev/null +++ b/SIKE_sw/src/P751/P751_internal.h @@ -0,0 +1,175 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for P751 +*********************************************************************************************/ + +#ifndef P751_INTERNAL_H +#define P751_INTERNAL_H + +#include "../config.h" + + +#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 12 // Number of words of a 751-bit field element + #define p751_ZERO_WORDS 5 // Number of "0" digits in the least significant part of p751 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 24 + #define p751_ZERO_WORDS 11 +#endif + + +// Basic constants + +#define NBITS_FIELD 751 +#define MAXBITS_FIELD 768 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 751-bit field element +#define NBITS_ORDER 384 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 384-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 372 +#define OBOB_BITS 379 +#define OBOB_EXPON 239 +#define MASK_ALICE 0x0F +#define MASK_BOB 0x03 +#define PRIME p751 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 8 +#define MAX_INT_POINTS_BOB 10 +#define MAX_Alice 186 +#define MAX_Bob 239 +#define MSG_BYTES 32 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 751-bit field elements (768-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x751-bit field elements (2x768-bit max.) +typedef felm_t f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p751^2) + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + +#ifdef COMPRESS + typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj; // Point representation in full projective XYZ Montgomery coordinates + typedef point_full_proj point_full_proj_t[1]; + + typedef struct { f2elm_t x; f2elm_t y; } point_affine; // Point representation in affine coordinates. + typedef point_affine point_t[1]; + + typedef f2elm_t publickey_t[3]; +#endif + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// 751-bit multiprecision addition, c = a+b +void mp_add751(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add751_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 751-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p +extern void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c); +extern void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub751_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_sub751_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// 2x751-bit multiprecision subtraction followed by addition with p751*2^768, c = a-b+(p751*2^768) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); +void mp_subadd751x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Double 2x751-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub751x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy751(const digit_t* a, digit_t* c); + +// Zeroing a field element, a = 0 +void fpzero751(digit_t* a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal751_non_constant_time(const digit_t* a, const digit_t* b); + +// Modular addition, c = a+b mod p751 +extern void fpadd751(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd751_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p751 +extern void fpsub751(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub751_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p751 +extern void fpneg751(digit_t* a); + +// Modular division by two, c = a/2 mod p751. +void fpdiv2_751(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. +void fpcorrection751(digit_t* a); + +// 751-bit Montgomery reduction, c = a mod p +void rdc751_asm(digit_t* ma, digit_t* mc); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768 +void fpmul751_mont(const digit_t* a, const digit_t* b, digit_t* c); +void mul751_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768 +void fpsqr751_mont(const digit_t* ma, digit_t* mc); + +// Field inversion, a = a^-1 in GF(p751) +void fpinv751_mont(digit_t* a); + +// Chain to compute (p751-3)/4 using Montgomery arithmetic +void fpinv751_chain_mont(digit_t* a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p751^2) element, c = a +void fp2copy751(const f2elm_t a, f2elm_t c); + +// Zeroing a GF(p751^2) element, a = 0 +void fp2zero751(f2elm_t a); + +// GF(p751^2) negation, a = -a in GF(p751^2) +void fp2neg751(f2elm_t a); + +// GF(p751^2) addition, c = a+b in GF(p751^2) +extern void fp2add751(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p751^2) subtraction, c = a-b in GF(p751^2) +extern void fp2sub751(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p751^2) division by two, c = a/2 in GF(p751^2) +void fp2div2_751(const f2elm_t a, f2elm_t c); + +// Modular correction, a = a in GF(p751^2) +void fp2correction751(f2elm_t a); + +// GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2) +void fp2sqr751_mont(const f2elm_t a, f2elm_t c); + +// GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2) +void fp2mul751_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); + +// GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv751_mont(f2elm_t a); + + +#endif diff --git a/SIKE_sw/src/P751/generic/fp_generic.c b/SIKE_sw/src/P751/generic/fp_generic.c new file mode 100644 index 0000000..9b73939 --- /dev/null +++ b/SIKE_sw/src/P751/generic/fp_generic.c @@ -0,0 +1,259 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: portable modular arithmetic for P751 +*********************************************************************************************/ + +#include "../P751_internal.h" +#include "../../internal.h" + +// Global constants +extern const uint64_t p751[NWORDS64_FIELD]; +extern const uint64_t p751p1[NWORDS64_FIELD]; +extern const uint64_t p751x2[NWORDS64_FIELD]; +extern const uint64_t p751x4[NWORDS64_FIELD]; + + +__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x2)[i], borrow, c[i]); + } +} + + +__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x4)[i], borrow, c[i]); + } +} + + +__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p751x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p751x2)[i] & mask, carry, c[i]); + } +} + + +__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void fpneg751(digit_t* a) +{ // Modular negation, a = -a mod p751. + // Input/output: a in [0, 2*p751-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p751x2)[i], a[i], borrow, a[i]); + } +} + + +void fpdiv2_751(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p751. + // Input : a in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p751 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p751)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection751(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p751)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p751)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(digit_t* ma, digit_t* mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751. + // mc = ma*R^-1 mod p751x2, where R = 2^768. + // If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p751_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p751_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p751p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p751p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} \ No newline at end of file diff --git a/SIKE_sw/src/config.h b/SIKE_sw/src/config.h new file mode 100644 index 0000000..4f8d368 --- /dev/null +++ b/SIKE_sw/src/config.h @@ -0,0 +1,271 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: configuration file and platform-dependent macros +*********************************************************************************************/ + +#ifndef CONFIG_H +#define CONFIG_H + +#include +#include +#include + +// Definition of operating system + +#define OS_WIN 1 +#define OS_LINUX 2 + +#if defined(__WINDOWS__) // Microsoft Windows OS +#define OS_TARGET OS_WIN +#elif defined(__LINUX__) // Linux OS +#define OS_TARGET OS_LINUX +#else +#error-- "Unsupported OS" +#endif + +#if (OS_TARGET == OS_LINUX) +#define ALIGN_FOOTER(N) __attribute__((aligned(N))) +#else +#define ALIGN_FOOTER(N) +#endif + +// Definition of compiler + +#define COMPILER_VC 1 +#define COMPILER_GCC 2 +#define COMPILER_CLANG 3 + +#if defined(_MSC_VER) // Microsoft Visual C compiler +#define COMPILER COMPILER_VC +#elif defined(__GNUC__) // GNU GCC compiler +#define COMPILER COMPILER_GCC +#elif defined(__clang__) // Clang compiler +#define COMPILER COMPILER_CLANG +#else +#error-- "Unsupported COMPILER" +#endif + +// Definition of the targeted architecture and basic data types + +#define TARGET_AMD64 1 +#define TARGET_ARM64 2 +#define TARGET_x86 3 + +#if defined(_AMD64_) +#define TARGET TARGET_AMD64 +#define RADIX 64 +#define LOG2RADIX 6 +typedef uint64_t digit_t; // Unsigned 64-bit digit +#elif defined(_ARM64_) +#define TARGET TARGET_ARM64 +#define RADIX 64 +#define LOG2RADIX 6 +typedef uint64_t digit_t; // Unsigned 64-bit digit +#elif defined(_X86_) +#define TARGET TARGET_x86 +#define RADIX 32 +#define LOG2RADIX 5 +typedef uint32_t digit_t; // Unsigned 32-bit digit +#else +#error-- "Unsupported ARCHITECTURE" +#endif + +#define RADIX64 64 + +// Selection of generic, portable implementation + +#if defined(_GENERIC_) +#define GENERIC_IMPLEMENTATION +#elif defined(_FAST_) +#define FAST_IMPLEMENTATION +#endif + +// Extended datatype support + +#if defined(GENERIC_IMPLEMENTATION) +typedef uint64_t uint128_t[2]; +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG) +#define UINT128_SUPPORT +typedef unsigned uint128_t __attribute__((mode(TI))); +#elif (TARGET == TARGET_ARM64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG) +#define UINT128_SUPPORT +typedef unsigned uint128_t __attribute__((mode(TI))); +#elif (TARGET == TARGET_AMD64) && (OS_TARGET == OS_WIN && COMPILER == COMPILER_VC) +#define SCALAR_INTRIN_SUPPORT +typedef uint64_t uint128_t[2]; +#else +#error-- "Unsupported configuration" +#endif + +// Macro definitions + +#define NBITS_TO_NBYTES(nbits) (((nbits) + 7) / 8) // Conversion macro from number of bits to number of bytes +#define NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words +#define NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words + + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + +static __inline unsigned int is_digit_nonzero_ct(digit_t x) +{ // Is x != 0? + return (unsigned int)((x | (0 - x)) >> (RADIX - 1)); +} + +static __inline unsigned int is_digit_zero_ct(digit_t x) +{ // Is x = 0? + return (unsigned int)(1 ^ is_digit_nonzero_ct(x)); +} + +static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y) +{ // Is x < y? + return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1)); +} + +/********************** Macros for platform-dependent operations **********************/ + +#if defined(GENERIC_IMPLEMENTATION) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + digit_x_digit((multiplier), (multiplicand), &(lo)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { \ + digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \ + } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { \ + digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn)&is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; \ + } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Shift left with flexible datatype +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift))); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + mp_mul((digit_t *)&(multiplier), (digit_t *)&(multiplicand), (digit_t *)&(product), NWORDS_FIELD / 2); + +// 128-bit addition, inputs < 2^127 +#define ADD128(addend1, addend2, addition) \ + mp_add((digit_t *)(addend1), (digit_t *)(addend2), (digit_t *)(addition), NWORDS_FIELD); + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = mp_add((digit_t *)(addend1), (digit_t *)(addend2), (digit_t *)(addition), NWORDS_FIELD); + +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + (lo) = _umul128((multiplier), (multiplicand), (hi)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + (carryOut) = _addcarry_u64((carryIn), (addend1), (addend2), &(sumOut)); + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + (borrowOut) = _subborrow_u64((borrowIn), (minuend), (subtrahend), &(differenceOut)); + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftright128((lowIn), (highIn), (shift)); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftleft128((lowIn), (highIn), (shift)); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + (product)[0] = _umul128((multiplier), (multiplicand), &(product)[1]); + +// 128-bit addition, inputs < 2^127 +#define ADD128(addend1, addend2, addition) \ + { \ + unsigned char carry = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ + _addcarry_u64(carry, (addend1)[1], (addend2)[1], &(addition)[1]); \ + } + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ + (carry) = _addcarry_u64((carry), (addend1)[1], (addend2)[1], &(addition)[1]); + +// 128-bit subtraction, subtrahend < 2^127 +#define SUB128(minuend, subtrahend, difference) \ + { \ + unsigned char borrow = _subborrow_u64(0, (minuend)[0], (subtrahend)[0], &(difference)[0]); \ + _subborrow_u64(borrow, (minuend)[1], (subtrahend)[1], &(difference)[1]); \ + } + +// 128-bit right shift, max. shift value is 64 +#define SHIFTR128(Input, shift, shiftOut) \ + (shiftOut)[0] = __shiftright128((Input)[0], (Input)[1], (shift)); \ + (shiftOut)[1] = (Input)[1] >> (shift); + +// 128-bit left shift, max. shift value is 64 +#define SHIFTL128(Input, shift, shiftOut) \ + (shiftOut)[1] = __shiftleft128((Input)[0], (Input)[1], (shift)); \ + (shiftOut)[0] = (Input)[0] << (shift); + +#define MULADD128(multiplier, multiplicand, addend, carry, result) \ + ; \ + { \ + uint128_t product; \ + MUL128(multiplier, multiplicand, product); \ + ADC128(addend, product, carry, result); \ + } + +#elif ((TARGET == TARGET_AMD64 || TARGET == TARGET_ARM64) && OS_TARGET == OS_LINUX) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + { \ + uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \ + *(hi) = (digit_t)(tempReg >> RADIX); \ + (lo) = (digit_t)tempReg; \ + } + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { \ + uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \ + (carryOut) = (digit_t)(tempReg >> RADIX); \ + (sumOut) = (digit_t)tempReg; \ + } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { \ + uint128_t tempReg = (uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \ + (borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t) * 8 - 1)); \ + (differenceOut) = (digit_t)tempReg; \ + } + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (RADIX - (shift))); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); + +#endif + +#endif diff --git a/SIKE_sw/src/ec_isogeny.c b/SIKE_sw/src/ec_isogeny.c new file mode 100644 index 0000000..7a0043e --- /dev/null +++ b/SIKE_sw/src/ec_isogeny.c @@ -0,0 +1,416 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: Elliptic curve and isogeny functions +*********************************************************************************************/ + + +void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + f2elm_t t0, t1; + + mp2_sub_p2(P->X, P->Z, t0); // t0 = X1-Z1 + mp2_add(P->X, P->Z, t1); // t1 = X1+Z1 + fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 + fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 + fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 + fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + mp2_sub_p2(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + mp2_add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + + +void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, const int e) +{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q <- (2^e)*P. + int i; + + copy_words((digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD); + + for (i = 0; i < e; i++) { + xDBL(Q, Q, A24plus, C24); + } +} + +#if (OALICE_BITS % 2 == 1) + +void get_2_isog(const point_proj_t P, f2elm_t A, f2elm_t C) +{ // Computes the corresponding 2-isogeny of a projective Montgomery point (X2:Z2) of order 2. + // Input: projective point of order two P = (X2:Z2). + // Output: the 2-isogenous Montgomery curve with projective coefficients A/C. + + fp2sqr_mont(P->X, A); // A = X2^2 + fp2sqr_mont(P->Z, C); // C = Z2^2 + mp2_sub_p2(C, A, A); // A = Z2^2 - X2^2 +} + + +void eval_2_isog(point_proj_t P, point_proj_t Q) +{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 2-isogeny phi. + // Inputs: the projective point P = (X:Z) and the 2-isogeny kernel projetive point Q = (X2:Z2). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + f2elm_t t0, t1, t2, t3; + + mp2_add(Q->X, Q->Z, t0); // t0 = X2+Z2 + mp2_sub_p2(Q->X, Q->Z, t1); // t1 = X2-Z2 + mp2_add(P->X, P->Z, t2); // t2 = X+Z + mp2_sub_p2(P->X, P->Z, t3); // t3 = X-Z + fp2mul_mont(t0, t3, t0); // t0 = (X2+Z2)*(X-Z) + fp2mul_mont(t1, t2, t1); // t1 = (X2-Z2)*(X+Z) + mp2_add(t0, t1, t2); // t2 = (X2+Z2)*(X-Z) + (X2-Z2)*(X+Z) + mp2_sub_p2(t0, t1, t3); // t3 = (X2+Z2)*(X-Z) - (X2-Z2)*(X+Z) + fp2mul_mont(P->X, t2, P->X); // Xfinal + fp2mul_mont(P->Z, t3, P->Z); // Zfinal +} + +#endif + +void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff) +{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. + // Input: projective point of order four P = (X4:Z4). + // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients + // that are used to evaluate the isogeny at a point in eval_4_isog(). + mp2_sub_p2(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4 + mp2_add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4 + fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2 + mp2_add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2 + fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4 + mp2_add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2 + fp2sqr_mont(P->X, A24plus); // A24plus = X4^2 + mp2_add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 + fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 +} + + +void eval_4_isog(point_proj_t P, f2elm_t* coeff) +{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined + // by the 3 coefficients in coeff (computed in the function get_4_isog()). + // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + f2elm_t t0, t1; + + mp2_add(P->X, P->Z, t0); // t0 = X+Z + mp2_sub_p2(P->X, P->Z, t1); // t1 = X-Z + fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1] + fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2] + fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) + fp2mul_mont(coeff[0], t0, t0); // t0 = coeff[0]*(X+Z)*(X-Z) + mp2_add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] + mp2_sub_p2(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] + fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 + mp2_add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + mp2_sub_p2(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) + fp2mul_mont(P->X, t1, P->X); // Xfinal + fp2mul_mont(P->Z, t0, P->Z); // Zfinal +} + + +void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus) +{ // Tripling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). + f2elm_t t0, t1, t2, t3, t4, t5, t6; + + mp2_sub_p2(P->X, P->Z, t0); // t0 = X-Z + fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 + mp2_add(P->X, P->Z, t1); // t1 = X+Z + fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 + mp2_add(P->X, P->X, t4); // t4 = 2*X + mp2_add(P->Z, P->Z, t0); // t0 = 2*Z + fp2sqr_mont(t4, t1); // t1 = 4*X^2 + mp2_sub_p2(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 + mp2_sub_p2(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + fp2mul_mont(A24plus, t3, t5); // t5 = A24plus*(X+Z)^2 + fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^4 + fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 + fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^4 + mp2_sub_p2(t2, t3, t3); // t3 = A24minus*(X-Z)^4 - A24plus*(X+Z)^4 + mp2_sub_p2(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 + fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^4 - A24plus*(X+Z)^4 + fp2sqr_mont(t2, t2); // t2 = t2^2 + fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2 + fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^4 - A24plus*(X+Z)^4 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + fp2sqr_mont(t1, t1); // t1 = t1^2 + fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1 +} + + +void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, const int e) +{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q <- (3^e)*P. + int i; + + copy_words((digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD); + + for (i = 0; i < e; i++) { + xTPL(Q, Q, A24minus, A24plus); + } +} + + +void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff) +{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. + // Input: projective point of order three P = (X3:Z3). + // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. + f2elm_t t0, t1, t2, t3, t4; + + mp2_sub_p2(P->X, P->Z, coeff[0]); // coeff0 = X-Z + fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2 + mp2_add(P->X, P->Z, coeff[1]); // coeff1 = X+Z + fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2 + mp2_add(P->X, P->X, t3); // t3 = 2*X + fp2sqr_mont(t3, t3); // t3 = 4*X^2 + fp2sub(t3, t0, t2); // t2 = 4*X^2 - (X-Z)^2 + fp2sub(t3, t1, t3); // t3 = 4*X^2 - (X+Z)^2 + mp2_add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 + mp2_add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) + mp2_add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 + fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + mp2_add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 + mp2_add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) + mp2_add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 + fp2mul_mont(t3, t4, A24plus); // A24plus = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] +} + + +void eval_3_isog(point_proj_t Q, const f2elm_t* coeff) +{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and + // a point P with 2 coefficients in coeff (computed in the function get_3_isog()). + // Inputs: projective points P = (X3:Z3) and Q = (X:Z). + // Output: the projective point Q <- phi(Q) = (X3:Z3). + f2elm_t t0, t1, t2; + + mp2_add(Q->X, Q->Z, t0); // t0 = X+Z + mp2_sub_p2(Q->X, Q->Z, t1); // t1 = X-Z + fp2mul_mont(coeff[0], t0, t0); // t0 = coeff0*(X+Z) + fp2mul_mont(coeff[1], t1, t1); // t1 = coeff1*(X-Z) + mp2_add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z) + mp2_sub_p2(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z) + fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2 + fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2 + fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2 + fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2 +} + + +void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3) +{ // 3-way simultaneous inversion + // Input: z1,z2,z3 + // Output: 1/z1,1/z2,1/z3 (override inputs). + f2elm_t t0, t1, t2; + + fp2mul_mont(z1, z2, t0); // t0 = z1*z2 + fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 + fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) + fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) + fp2mul_mont(t0, t1, z3); // z3 = 1/z3 + fp2mul_mont(t2, z2, t0); // z1 = 1/z1 + fp2mul_mont(t2, z1, z2); // z2 = 1/z2 + fp2copy(t0, z1); +} + + +void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A) +{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. + // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. + // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. + f2elm_t t0, t1, one = {0}; + + fpcopy((digit_t*)&Montgomery_one, one[0]); + fp2add(xP, xQ, t1); // t1 = xP+xQ + fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ + fp2mul_mont(xR, t1, A); // A = xR*t1 + fp2add(t0, A, A); // A = A+t0 + fp2mul_mont(t0, xR, t0); // t0 = t0*xR + fp2sub(A, one, A); // A = A-1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t1, xR, t1); // t1 = t1+xR + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2sqr_mont(A, A); // A = A^2 + fp2inv_mont(t0); // t0 = 1/t0 + fp2mul_mont(A, t0, A); // A = A*t0 + fp2sub(A, t1, A); // Afinal = A-t1 +} + + +void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv) +{ // Computes the j-invariant of a Montgomery curve with projective constant. + // Input: A,C in GF(p^2). + // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. + f2elm_t t0, t1; + + fp2sqr_mont(A, jinv); // jinv = A^2 + fp2sqr_mont(C, t1); // t1 = C^2 + fp2add(t1, t1, t0); // t0 = t1+t1 + fp2sub(jinv, t0, t0); // t0 = jinv-t0 + fp2sub(t0, t1, t0); // t0 = t0-t1 + fp2sub(t0, t1, jinv); // jinv = t0-t1 + fp2sqr_mont(t1, t1); // t1 = t1^2 + fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2sqr_mont(t0, t1); // t1 = t0^2 + fp2mul_mont(t0, t1, t0); // t0 = t0*t1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2inv_mont(jinv); // jinv = 1/jinv + fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv +} + + +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24) +{ // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + f2elm_t t0, t1, t2; + + mp2_add(P->X, P->Z, t0); // t0 = XP+ZP + mp2_sub_p2(P->X, P->Z, t1); // t1 = XP-ZP + fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 + mp2_sub_p2(Q->X, Q->Z, t2); // t2 = XQ-ZQ + mp2_add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 + fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + mp2_sub_p2(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + fp2mul_mont(A24, t2, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + mp2_sub_p2(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + mp2_add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + mp2_add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(Q->Z, XPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(Q->X, ZPQ, Q->X); // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 +} + + +static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option) +{ // Swap points. + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + digit_t temp; + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + temp = option & (P->X[0][i] ^ Q->X[0][i]); + P->X[0][i] = temp ^ P->X[0][i]; + Q->X[0][i] = temp ^ Q->X[0][i]; + temp = option & (P->X[1][i] ^ Q->X[1][i]); + P->X[1][i] = temp ^ P->X[1][i]; + Q->X[1][i] = temp ^ Q->X[1][i]; + temp = option & (P->Z[0][i] ^ Q->Z[0][i]); + P->Z[0][i] = temp ^ P->Z[0][i]; + Q->Z[0][i] = temp ^ Q->Z[0][i]; + temp = option & (P->Z[1][i] ^ Q->Z[1][i]); + P->Z[1][i] = temp ^ P->Z[1][i]; + Q->Z[1][i] = temp ^ Q->Z[1][i]; + } +} + + +static void LADDER3PT(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t A) +{ + point_proj_t R0 = {0}, R2 = {0}; + f2elm_t A24 = {0}; + digit_t mask; + int i, nbits, bit, swap, prevbit = 0; + + if (AliceOrBob == ALICE) { + nbits = OALICE_BITS; + } else { + nbits = OBOB_BITS - 1; + } + + // Initializing constant + fpcopy((digit_t*)&Montgomery_one, A24[0]); + mp2_add(A24, A24, A24); + mp2_add(A, A24, A24); + fp2div2(A24, A24); + fp2div2(A24, A24); // A24 = (A+2)/4 + + // Initializing points + fp2copy(xQ, R0->X); + fpcopy((digit_t*)&Montgomery_one, (digit_t*)R0->Z); + fp2copy(xPQ, R2->X); + fpcopy((digit_t*)&Montgomery_one, (digit_t*)R2->Z); + fp2copy(xP, R->X); + fpcopy((digit_t*)&Montgomery_one, (digit_t*)R->Z); + fpzero((digit_t*)(R->Z)[1]); + + // Main loop + for (i = 0; i < nbits; i++) { + bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (digit_t)swap; + + swap_points(R, R2, mask); + xDBLADD(R0, R2, R->X, R->Z, A24); + } + swap = 0 ^ prevbit; + mask = 0 - (digit_t)swap; + swap_points(R, R2, mask); +} + + +void TraverseTree(f2elm_t jinv, point_proj_t R, f2elm_t A24plus, f2elm_t C24, const unsigned int *strat, unsigned int lenstrat, bool keygen, + point_proj_t phiP, point_proj_t phiQ, point_proj_t phiR) +{ // Isogeny tree traversal + point_proj_t pts[MAX_INT_POINTS_ALICE]; + f2elm_t coeff[3]; + unsigned int i, m, row, ii = 0, index = 0, npts = 0, pts_index[MAX_INT_POINTS_ALICE]; + + for (row = 1; row < lenstrat; row++) { + while (index < lenstrat - row) { + fp2copy(R->X, pts[npts]->X); + fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = strat[ii++]; + xDBLe(R, R, A24plus, C24, (int)(2*m)); + index += m; + } + get_4_isog(R, A24plus, C24, coeff); + if (keygen) { + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + } + + for (i = 0; i < npts; i++) { + eval_4_isog(pts[i], coeff); + } + + fp2copy(pts[npts-1]->X, R->X); + fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + get_4_isog(R, A24plus, C24, coeff); + if (keygen) { + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + inv_3_way(phiP->Z, phiQ->Z, phiR->Z); + fp2mul_mont(phiP->X, phiP->Z, phiP->X); + fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); + fp2mul_mont(phiR->X, phiR->Z, phiR->X); + } else { + fp2add(A24plus, A24plus, A24plus); + fp2sub(A24plus, C24, A24plus); + fp2add(A24plus, A24plus, A24plus); + j_inv(A24plus, C24, jinv); + } +} \ No newline at end of file diff --git a/SIKE_sw/src/fpx.c b/SIKE_sw/src/fpx.c new file mode 100644 index 0000000..1b8e070 --- /dev/null +++ b/SIKE_sw/src/fpx.c @@ -0,0 +1,1103 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: Core functions over GF(p) and GF(p^2) +*********************************************************************************************/ + + +int8_t ct_compare(const uint8_t *a, const uint8_t *b, unsigned int len) +{ // Compare two byte arrays in constant time. + // Returns 0 if the byte arrays are equal, -1 otherwise. + uint8_t r = 0; + + for (unsigned int i = 0; i < len; i++) + r |= a[i] ^ b[i]; + + return (int8_t)((-(int32_t)r) >> (8*sizeof(uint32_t)-1)); +} + + +void ct_cmov(uint8_t *r, const uint8_t *a, unsigned int len, int8_t selector) +{ // Conditional move in constant time. + // If selector = -1 then load r with a, else if selector = 0 then keep r. + + for (unsigned int i = 0; i < len; i++) + r[i] ^= selector & (a[i] ^ r[i]); +} + + +__inline static void encode_to_bytes(const digit_t* x, unsigned char* enc, int nbytes) +{ // Encoding digits to bytes according to endianness +#ifdef _BIG_ENDIAN_ + int ndigits = nbytes / sizeof(digit_t); + int rem = nbytes % sizeof(digit_t); + + for (int i = 0; i < ndigits; i++) + ((digit_t*)enc)[i] = BSWAP_DIGIT(x[i]); + if (rem) { + digit_t ld = BSWAP_DIGIT(x[ndigits]); + memcpy(enc + ndigits*sizeof(digit_t), (unsigned char*)&ld, rem); + } +#else + memcpy(enc, (const unsigned char*)x, nbytes); +#endif +} + + +__inline static void decode_to_digits(const unsigned char* x, digit_t* dec, int nbytes, int ndigits) +{ // Decoding bytes to digits according to endianness + + dec[ndigits - 1] = 0; + memcpy((unsigned char*)dec, x, nbytes); +#ifdef _BIG_ENDIAN_ + for (int i = 0; i < ndigits; i++) + dec[i] = BSWAP_DIGIT(dec[i]); +#endif +} + + +static void fp2_encode(const f2elm_t x, unsigned char *enc) +{ // Conversion of GF(p^2) element from Montgomery to standard representation, and encoding by removing leading 0 bytes + f2elm_t t; + + from_fp2mont(x, t); + encode_to_bytes(t[0], enc, FP2_ENCODED_BYTES / 2); + encode_to_bytes(t[1], enc + FP2_ENCODED_BYTES / 2, FP2_ENCODED_BYTES / 2); +} + + +static void fp2_decode(const unsigned char *x, f2elm_t dec) +{ // Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation + + decode_to_digits(x, dec[0], FP2_ENCODED_BYTES / 2, NWORDS_FIELD); + decode_to_digits(x + FP2_ENCODED_BYTES / 2, dec[1], FP2_ENCODED_BYTES / 2, NWORDS_FIELD); + to_fp2mont(dec, dec); +} + + +__inline void fpcopy(const felm_t a, felm_t c) +{ // Copy a field element, c = a. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + c[i] = a[i]; +} + + +__inline void fpzero(felm_t a) +{ // Zero a field element, a = 0. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + a[i] = 0; +} + + +void to_mont(const felm_t a, felm_t mc) +{ // Conversion to Montgomery representation, + // mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1]. + // The Montgomery constant R^2 mod p is the global value "Montgomery_R2". + + fpmul_mont(a, (digit_t *)&Montgomery_R2, mc); +} + + +void from_mont(const felm_t ma, felm_t c) +{ // Conversion from Montgomery representation to standard representation, + // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. + digit_t one[NWORDS_FIELD] = {0}; + + one[0] = 1; + fpmul_mont(ma, one, c); + fpcorrection(c); +} + + +void copy_words(const digit_t *a, digit_t *c, const unsigned int nwords) +{ // Copy wordsize digits, c = a, where lng(a) = nwords. + unsigned int i; + + for (i = 0; i < nwords; i++) + c[i] = a[i]; +} + + +void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) +{ // Multiprecision multiplication, c = a*b mod p. + dfelm_t temp = {0}; + + mp_mul(ma, mb, temp, NWORDS_FIELD); + rdc_mont(temp, mc); +} + + +void fpsqr_mont(const felm_t ma, felm_t mc) +{ // Multiprecision squaring, c = a^2 mod p. + dfelm_t temp = {0}; + + mp_mul(ma, ma, temp, NWORDS_FIELD); + rdc_mont(temp, mc); +} + + +void fpinv_mont(felm_t a) +{ // Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. + felm_t tt; + + fpcopy(a, tt); + fpinv_chain_mont(tt); + fpsqr_mont(tt, tt); + fpsqr_mont(tt, tt); + fpmul_mont(a, tt, a); +} + + +void fp2copy(const f2elm_t a, f2elm_t c) +{ // Copy a GF(p^2) element, c = a. + fpcopy(a[0], c[0]); + fpcopy(a[1], c[1]); +} + + +void fp2zero(f2elm_t a) +{ // Zero a GF(p^2) element, a = 0. + fpzero(a[0]); + fpzero(a[1]); +} + + +void fp2neg(f2elm_t a) +{ // GF(p^2) negation, a = -a in GF(p^2). + fpneg(a[0]); + fpneg(a[1]); +} + + +__inline void fp2add(const f2elm_t a, const f2elm_t b, f2elm_t c) +{ // GF(p^2) addition, c = a+b in GF(p^2). + fpadd(a[0], b[0], c[0]); + fpadd(a[1], b[1], c[1]); +} + +__inline void fp2sub(const f2elm_t a, const f2elm_t b, f2elm_t c) +{ // GF(p^2) subtraction, c = a-b in GF(p^2). + fpsub(a[0], b[0], c[0]); + fpsub(a[1], b[1], c[1]); +} + + +void fp2div2(const f2elm_t a, f2elm_t c) +{ // GF(p^2) division by two, c = a/2 in GF(p^2). + fpdiv2(a[0], c[0]); + fpdiv2(a[1], c[1]); +} + + +void fp2correction(f2elm_t a) +{ // Modular correction, a = a in GF(p^2). + fpcorrection(a[0]); + fpcorrection(a[1]); +} + + +__inline static void mp_addfast(const digit_t *a, const digit_t *b, digit_t *c) +{ // Multiprecision addition, c = a+b. +#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) + + mp_add(a, b, c, NWORDS_FIELD); + +#elif (OS_TARGET == OS_LINUX) + + mp_add_asm(a, b, c); + +#endif +} + + +__inline static void mp2_add(const f2elm_t a, const f2elm_t b, f2elm_t c) +{ // GF(p^2) addition without correction, c = a+b in GF(p^2). + mp_addfast(a[0], b[0], c[0]); + mp_addfast(a[1], b[1], c[1]); +} + + +__inline static void mp2_sub_p2(const f2elm_t a, const f2elm_t b, f2elm_t c) +{ // GF(p^2) subtraction with correction with 2*p, c = a-b+2p in GF(p^2). + mp_sub_p2(a[0], b[0], c[0]); + mp_sub_p2(a[1], b[1], c[1]); +} + + +__inline unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + return carry; +} + + +void fp2sqr_mont(const f2elm_t a, f2elm_t c) +{ // GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). + // Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] + // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] + felm_t t1, t2, t3; + + mp_addfast(a[0], a[1], t1); // t1 = a0+a1 + sub_p4(a[0], a[1], t2); // t2 = a0-a1 + mp_addfast(a[0], a[0], t3); // t3 = 2a0 + fpmul_mont(t1, t2, c[0]); // c0 = (a0+a1)(a0-a1) + fpmul_mont(t3, a[1], c[1]); // c1 = 2a0*a1 +} + + +__inline unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) +{ // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) + SUBC(borrow, a[i], b[i], borrow, c[i]); + + return borrow; +} + + +__inline static void mp_subaddfast(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b. +#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) + felm_t t1; + + digit_t mask = 0 - (digit_t)mp_sub(a, b, c, 2*NWORDS_FIELD); + for (int i = 0; i < NWORDS_FIELD; i++) + t1[i] = ((digit_t*)PRIME)[i] & mask; + mp_addfast((digit_t*)&c[NWORDS_FIELD], t1, (digit_t*)&c[NWORDS_FIELD]); + +#elif (OS_TARGET == OS_LINUX) + + mp_subaddx2_asm(a, b, c); + +#endif +} + + +__inline static void mp_dblsubfast(const digit_t *a, const digit_t *b, digit_t *c) +{ // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. + // Inputs should be s.t. c > a and c > b +#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) + + mp_sub(c, a, c, 2 * NWORDS_FIELD); + mp_sub(c, b, c, 2 * NWORDS_FIELD); + +#elif (OS_TARGET == OS_LINUX) + + mp_dblsubx2_asm(a, b, c); + +#endif +} + + +void fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) +{ // GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). + // Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] + // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] + felm_t t1, t2; + dfelm_t tt1, tt2, tt3; + + mp_addfast(a[0], a[1], t1); // t1 = a0+a1 + mp_addfast(b[0], b[1], t2); // t2 = b0+b1 + mp_mul(a[0], b[0], tt1, NWORDS_FIELD); // tt1 = a0*b0 + mp_mul(a[1], b[1], tt2, NWORDS_FIELD); // tt2 = a1*b1 + mp_mul(t1, t2, tt3, NWORDS_FIELD); // tt3 = (a0+a1)*(b0+b1) + mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + mp_subaddfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1 + p*2^MAXBITS_FIELD if a0*b0 - a1*b1 < 0, else tt1 = a0*b0 - a1*b1 + rdc_mont(tt3, c[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + rdc_mont(tt1, c[0]); // c[0] = a0*b0 - a1*b1 +} + + +void fpinv_chain_mont(felm_t a) +{ // Chain to compute a^(p-3)/4 using Montgomery arithmetic. + unsigned int i, j; + +#if (NBITS_FIELD == 377) + felm_t t[15], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 13; i++) fpmul_mont(t[i], tt, t[i+1]); + + fpcopy(t[1], tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 11; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (j = 0; j < 37; j++) { + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + } + fpcopy(tt, a); + +#elif (NBITS_FIELD == 434) + felm_t t[31], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]); + + fpcopy(a, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[21], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[19], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[25], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (j = 0; j < 35; j++) { + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + } + fpcopy(tt, a); + +#elif (NBITS_FIELD == 503) + felm_t t[15], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 13; i++) fpmul_mont(t[i], tt, t[i+1]); + + fpcopy(a, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 12; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (j = 0; j < 49; j++) { + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + } + fpcopy(tt, a); + +#elif (NBITS_FIELD == 546) + felm_t t[31], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]); + + fpcopy(t[0], tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[29], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[29], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[15], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[15], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[17], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[17], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[17], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[29], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[29], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[15], tt, tt); + for (j = 0; j < 45; j++) { + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + } + fpcopy(tt, a); + +#elif (NBITS_FIELD == 610) + felm_t t[31], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]); + + fpcopy(a, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[25], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 11; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[15], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[15], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[19], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[27], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[29], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[25], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 11; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + for (i = 0; i < 11; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (j = 0; j < 50; j++) { + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + } + fpcopy(tt, a); + +#elif (NBITS_FIELD == 697) + felm_t t[31], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]); + + fpcopy(t[0], tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[27], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[27], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[27], tt, tt); + for (i = 0; i < 12; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[27], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[25], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[27], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[19], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[29], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[27], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[21], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[17], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (j = 0; j < 58; j++) { + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + } + fpcopy(tt, a); + +#elif (NBITS_FIELD == 751) + felm_t t[27], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + fpmul_mont(t[0], tt, t[1]); + fpmul_mont(t[1], tt, t[2]); + fpmul_mont(t[2], tt, t[3]); + fpmul_mont(t[3], tt, t[3]); + for (i = 3; i <= 8; i++) fpmul_mont(t[i], tt, t[i+1]); + fpmul_mont(t[9], tt, t[9]); + for (i = 9; i <= 20; i++) fpmul_mont(t[i], tt, t[i+1]); + fpmul_mont(t[21], tt, t[21]); + for (i = 21; i <= 24; i++) fpmul_mont(t[i], tt, t[i+1]); + fpmul_mont(t[25], tt, t[25]); + fpmul_mont(t[25], tt, t[26]); + + fpcopy(a, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[15], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[17], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[19], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[25], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[21], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[23], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[17], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (j = 0; j < 61; j++) { + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + } + fpcopy(tt, a); +#else + (void)a, (void)i, (void)j; +#endif +} + + +void fp2inv_mont(f2elm_t a) +{ // GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). + f2elm_t t1; + + fpsqr_mont(a[0], t1[0]); // t10 = a0^2 + fpsqr_mont(a[1], t1[1]); // t11 = a1^2 + fpadd(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2 + fpinv_mont(t1[0]); // t10 = (a0^2+a1^2)^-1 + fpneg(a[1]); // a = a0-i*a1 + fpmul_mont(a[0], t1[0], a[0]); + fpmul_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} + + +void to_fp2mont(const f2elm_t a, f2elm_t mc) +{ // Conversion of a GF(p^2) element to Montgomery representation, + // mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). + + to_mont(a[0], mc[0]); + to_mont(a[1], mc[1]); +} + + +void from_fp2mont(const f2elm_t ma, f2elm_t c) +{ // Conversion of a GF(p^2) element from Montgomery representation to standard representation, + // c_i = ma_i*R^(-1) = a_i in GF(p^2). + + from_mont(ma[0], c[0]); + from_mont(ma[1], c[1]); +} + + +void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords) +{ + unsigned int i, j = 0; + + while (shift > RADIX) { + j += 1; + shift -= RADIX; + } + + for (i = 0; i < nwords-j; i++) + x[nwords - 1 - i] = x[nwords - 1 - i - j]; + for (i = nwords-j; i < nwords; i++) + x[nwords-1-i] = 0; + if (shift != 0) { + for (j = nwords-1; j > 0; j--) + SHIFTL(x[j], x[j-1], shift, x[j], RADIX); + x[0] <<= shift; + } +} + + +void mp_shiftr1(digit_t *x, const unsigned int nwords) +{ // Multiprecision right shift by one. + + for (unsigned int i = 0; i < nwords-1; i++) { + SHIFTR(x[i + 1], x[i], 1, x[i], RADIX); + } + x[nwords - 1] >>= 1; +} + + +void mp_shiftl1(digit_t *x, const unsigned int nwords) +{ // Multiprecision left shift by one. + + for (int i = nwords-1; i > 0; i--) { + SHIFTL(x[i], x[i-1], 1, x[i], RADIX); + } + x[0] <<= 1; +} \ No newline at end of file diff --git a/SIKE_sw/src/internal.h b/SIKE_sw/src/internal.h new file mode 100644 index 0000000..7fd9da2 --- /dev/null +++ b/SIKE_sw/src/internal.h @@ -0,0 +1,116 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: internal header file for function definitions +*********************************************************************************************/ + +#ifndef INTERNAL_H +#define INTERNAL_H + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// Copy wordsize digits, c = a, where lng(a) = nwords +void copy_words(const digit_t* a, digit_t* c, const unsigned int nwords); + +// Compare two byte arrays in constant time +int8_t ct_compare(const uint8_t *a, const uint8_t *b, unsigned int len) ; + +// Conditional move in constant time +void ct_cmov(uint8_t *r, const uint8_t *a, unsigned int len, int8_t selector); + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit +unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit +unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); + +// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Multiprecision left shift +void mp_shiftleft(digit_t* x, unsigned int shift, const unsigned int nwords); + +// Multiprecision right shift by one +void mp_shiftr1(digit_t* x, const unsigned int nwords); + +// Multiprecision left right shift by one +void mp_shiftl1(digit_t* x, const unsigned int nwords); + +// Digit multiplication, digit * digit -> 2-digit result +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c); + +// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); + +/************ Montgomery reduction and conversion functions *************/ + +// Montgomery reduction, c = a mod p +void rdc_mont(digit_t* a, digit_t* c); + +// Conversion to Montgomery representation +void to_mont(const digit_t* a, digit_t* mc); + +// Conversion from Montgomery representation to standard representation +void from_mont(const digit_t* ma, digit_t* c); + +// Conversion of a GF(p^2) element to Montgomery representation +void to_fp2mont(const f2elm_t a, f2elm_t mc); + +// Conversion of a GF(p^2) element from Montgomery representation to standard representation +void from_fp2mont(const f2elm_t ma, f2elm_t c); + +// n-way Montgomery inversion +void mont_n_way_inv(const f2elm_t* vec, const int n, f2elm_t* out); + +/************ Elliptic curve and isogeny functions *************/ + +// Computes the j-invariant of a Montgomery curve with projective constant. +void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv); + +// Simultaneous doubling and differential addition. +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24); + +// Doubling of a Montgomery point in projective coordinates (X:Z). +void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24); + +// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. +void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, const int e); + +// Differential addition. +void xADD(point_proj_t P, const point_proj_t Q, const f2elm_t xPQ); + +// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. +void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff); + +// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. +void eval_4_isog(point_proj_t P, f2elm_t* coeff); + +// Tripling of a Montgomery point in projective coordinates (X:Z). +void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus); + +// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. +void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, const int e); + +// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. +void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff); + +// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P with coefficients given in coeff. +void eval_3_isog(point_proj_t Q, const f2elm_t* coeff); + +// 3-way simultaneous inversion +void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3); + +// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. +void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A); + +// Isogeny tree traversal +void TraverseTree(f2elm_t jinv, point_proj_t R, f2elm_t A24plus, f2elm_t C24, const unsigned int *strat, unsigned int lenstrat, bool keygen, point_proj_t PhiP, point_proj_t PhiQ, point_proj_t PhiR); + + +#endif diff --git a/SIKE_sw/src/random/random.c b/SIKE_sw/src/random/random.c new file mode 100644 index 0000000..7f445b8 --- /dev/null +++ b/SIKE_sw/src/random/random.c @@ -0,0 +1,61 @@ +/******************************************************************************************** +* Hardware-based random number generation function +* +* It uses /dev/urandom in Linux and CNG's BCryptGenRandom function in Windows +*********************************************************************************************/ + +#include "random.h" +#include +#if defined(__WINDOWS__) + #include + #include +#elif defined(__LINUX__) + #include + #include + static int lock = -1; +#endif + +#define passed 0 +#define failed 1 + + +static __inline void delay(unsigned int count) +{ + while (count--) {} +} + + +int randombytes(unsigned char* random_array, unsigned long long nbytes) +{ // Generation of "nbytes" of random values + +#if defined(__WINDOWS__) + if (!BCRYPT_SUCCESS(BCryptGenRandom(NULL, random_array, (unsigned long)nbytes, BCRYPT_USE_SYSTEM_PREFERRED_RNG))) { + return failed; + } + +#elif defined(__LINUX__) + int r, n = (int)nbytes, count = 0; + + if (lock == -1) { + do { + lock = open("/dev/urandom", O_RDONLY); + if (lock == -1) { + delay(0xFFFFF); + } + } while (lock == -1); + } + + while (n > 0) { + do { + r = read(lock, random_array+count, n); + if (r == -1) { + delay(0xFFFF); + } + } while (r == -1); + count += r; + n -= r; + } +#endif + + return passed; +} \ No newline at end of file diff --git a/SIKE_sw/src/random/random.h b/SIKE_sw/src/random/random.h new file mode 100644 index 0000000..8eb1118 --- /dev/null +++ b/SIKE_sw/src/random/random.h @@ -0,0 +1,9 @@ +#ifndef RANDOM_H +#define RANDOM_H + + +// Generate random bytes and output the result to random_array +int randombytes(unsigned char* random_array, unsigned long long nbytes); + + +#endif \ No newline at end of file diff --git a/SIKE_sw/src/sha3/fips202.c b/SIKE_sw/src/sha3/fips202.c new file mode 100644 index 0000000..8733a9e --- /dev/null +++ b/SIKE_sw/src/sha3/fips202.c @@ -0,0 +1,573 @@ +/******************************************************************************************** +* SHA3-derived functions: SHAKE and cSHAKE +* +* Based on the public domain implementation in crypto_hash/keccakc512/simple/ +* from http://bench.cr.yp.to/supercop.html by Ronny Van Keer +* and the public domain "TweetFips202" implementation from https://twitter.com/tweetfips202 +* by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe +* +* See NIST Special Publication 800-185 for more information: +* http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-185.pdf +* +*********************************************************************************************/ + +#include +#include +#include "fips202.h" + +#define NROUNDS 24 +#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) + + +static uint64_t load64(const unsigned char *x) +{ + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + + +static void store64(uint8_t *x, uint64_t u) +{ + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + + +static const uint64_t KeccakF_RoundConstants[NROUNDS] = +{ + (uint64_t)0x0000000000000001ULL, + (uint64_t)0x0000000000008082ULL, + (uint64_t)0x800000000000808aULL, + (uint64_t)0x8000000080008000ULL, + (uint64_t)0x000000000000808bULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008009ULL, + (uint64_t)0x000000000000008aULL, + (uint64_t)0x0000000000000088ULL, + (uint64_t)0x0000000080008009ULL, + (uint64_t)0x000000008000000aULL, + (uint64_t)0x000000008000808bULL, + (uint64_t)0x800000000000008bULL, + (uint64_t)0x8000000000008089ULL, + (uint64_t)0x8000000000008003ULL, + (uint64_t)0x8000000000008002ULL, + (uint64_t)0x8000000000000080ULL, + (uint64_t)0x000000000000800aULL, + (uint64_t)0x800000008000000aULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008080ULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008008ULL +}; + + +void KeccakF1600_StatePermute(uint64_t * state) +{ + int round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + uint64_t BCa, BCe, BCi, BCo, BCu; + uint64_t Da, De, Di, Do, Du; + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + //copyFromState(A, state) + Aba = state[ 0]; + Abe = state[ 1]; + Abi = state[ 2]; + Abo = state[ 3]; + Abu = state[ 4]; + Aga = state[ 5]; + Age = state[ 6]; + Agi = state[ 7]; + Ago = state[ 8]; + Agu = state[ 9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for( round = 0; round < NROUNDS; round += 2 ) + { + // prepareTheta + BCa = Aba^Aga^Aka^Ama^Asa; + BCe = Abe^Age^Ake^Ame^Ase; + BCi = Abi^Agi^Aki^Ami^Asi; + BCo = Abo^Ago^Ako^Amo^Aso; + BCu = Abu^Agu^Aku^Amu^Asu; + + //thetaRhoPiChiIotaPrepareTheta(round , A, E) + Da = BCu^ROL(BCe, 1); + De = BCa^ROL(BCi, 1); + Di = BCe^ROL(BCo, 1); + Do = BCi^ROL(BCu, 1); + Du = BCo^ROL(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL(Age, 44); + Aki ^= Di; + BCi = ROL(Aki, 43); + Amo ^= Do; + BCo = ROL(Amo, 21); + Asu ^= Du; + BCu = ROL(Asu, 14); + Eba = BCa ^((~BCe)& BCi ); + Eba ^= (uint64_t)KeccakF_RoundConstants[round]; + Ebe = BCe ^((~BCi)& BCo ); + Ebi = BCi ^((~BCo)& BCu ); + Ebo = BCo ^((~BCu)& BCa ); + Ebu = BCu ^((~BCa)& BCe ); + + Abo ^= Do; + BCa = ROL(Abo, 28); + Agu ^= Du; + BCe = ROL(Agu, 20); + Aka ^= Da; + BCi = ROL(Aka, 3); + Ame ^= De; + BCo = ROL(Ame, 45); + Asi ^= Di; + BCu = ROL(Asi, 61); + Ega = BCa ^((~BCe)& BCi ); + Ege = BCe ^((~BCi)& BCo ); + Egi = BCi ^((~BCo)& BCu ); + Ego = BCo ^((~BCu)& BCa ); + Egu = BCu ^((~BCa)& BCe ); + + Abe ^= De; + BCa = ROL(Abe, 1); + Agi ^= Di; + BCe = ROL(Agi, 6); + Ako ^= Do; + BCi = ROL(Ako, 25); + Amu ^= Du; + BCo = ROL(Amu, 8); + Asa ^= Da; + BCu = ROL(Asa, 18); + Eka = BCa ^((~BCe)& BCi ); + Eke = BCe ^((~BCi)& BCo ); + Eki = BCi ^((~BCo)& BCu ); + Eko = BCo ^((~BCu)& BCa ); + Eku = BCu ^((~BCa)& BCe ); + + Abu ^= Du; + BCa = ROL(Abu, 27); + Aga ^= Da; + BCe = ROL(Aga, 36); + Ake ^= De; + BCi = ROL(Ake, 10); + Ami ^= Di; + BCo = ROL(Ami, 15); + Aso ^= Do; + BCu = ROL(Aso, 56); + Ema = BCa ^((~BCe)& BCi ); + Eme = BCe ^((~BCi)& BCo ); + Emi = BCi ^((~BCo)& BCu ); + Emo = BCo ^((~BCu)& BCa ); + Emu = BCu ^((~BCa)& BCe ); + + Abi ^= Di; + BCa = ROL(Abi, 62); + Ago ^= Do; + BCe = ROL(Ago, 55); + Aku ^= Du; + BCi = ROL(Aku, 39); + Ama ^= Da; + BCo = ROL(Ama, 41); + Ase ^= De; + BCu = ROL(Ase, 2); + Esa = BCa ^((~BCe)& BCi ); + Ese = BCe ^((~BCi)& BCo ); + Esi = BCi ^((~BCo)& BCu ); + Eso = BCo ^((~BCu)& BCa ); + Esu = BCu ^((~BCa)& BCe ); + + // prepareTheta + BCa = Eba^Ega^Eka^Ema^Esa; + BCe = Ebe^Ege^Eke^Eme^Ese; + BCi = Ebi^Egi^Eki^Emi^Esi; + BCo = Ebo^Ego^Eko^Emo^Eso; + BCu = Ebu^Egu^Eku^Emu^Esu; + + //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) + Da = BCu^ROL(BCe, 1); + De = BCa^ROL(BCi, 1); + Di = BCe^ROL(BCo, 1); + Do = BCi^ROL(BCu, 1); + Du = BCo^ROL(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL(Ege, 44); + Eki ^= Di; + BCi = ROL(Eki, 43); + Emo ^= Do; + BCo = ROL(Emo, 21); + Esu ^= Du; + BCu = ROL(Esu, 14); + Aba = BCa ^((~BCe)& BCi ); + Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; + Abe = BCe ^((~BCi)& BCo ); + Abi = BCi ^((~BCo)& BCu ); + Abo = BCo ^((~BCu)& BCa ); + Abu = BCu ^((~BCa)& BCe ); + + Ebo ^= Do; + BCa = ROL(Ebo, 28); + Egu ^= Du; + BCe = ROL(Egu, 20); + Eka ^= Da; + BCi = ROL(Eka, 3); + Eme ^= De; + BCo = ROL(Eme, 45); + Esi ^= Di; + BCu = ROL(Esi, 61); + Aga = BCa ^((~BCe)& BCi ); + Age = BCe ^((~BCi)& BCo ); + Agi = BCi ^((~BCo)& BCu ); + Ago = BCo ^((~BCu)& BCa ); + Agu = BCu ^((~BCa)& BCe ); + + Ebe ^= De; + BCa = ROL(Ebe, 1); + Egi ^= Di; + BCe = ROL(Egi, 6); + Eko ^= Do; + BCi = ROL(Eko, 25); + Emu ^= Du; + BCo = ROL(Emu, 8); + Esa ^= Da; + BCu = ROL(Esa, 18); + Aka = BCa ^((~BCe)& BCi ); + Ake = BCe ^((~BCi)& BCo ); + Aki = BCi ^((~BCo)& BCu ); + Ako = BCo ^((~BCu)& BCa ); + Aku = BCu ^((~BCa)& BCe ); + + Ebu ^= Du; + BCa = ROL(Ebu, 27); + Ega ^= Da; + BCe = ROL(Ega, 36); + Eke ^= De; + BCi = ROL(Eke, 10); + Emi ^= Di; + BCo = ROL(Emi, 15); + Eso ^= Do; + BCu = ROL(Eso, 56); + Ama = BCa ^((~BCe)& BCi ); + Ame = BCe ^((~BCi)& BCo ); + Ami = BCi ^((~BCo)& BCu ); + Amo = BCo ^((~BCu)& BCa ); + Amu = BCu ^((~BCa)& BCe ); + + Ebi ^= Di; + BCa = ROL(Ebi, 62); + Ego ^= Do; + BCe = ROL(Ego, 55); + Eku ^= Du; + BCi = ROL(Eku, 39); + Ema ^= Da; + BCo = ROL(Ema, 41); + Ese ^= De; + BCu = ROL(Ese, 2); + Asa = BCa ^((~BCe)& BCi ); + Ase = BCe ^((~BCi)& BCo ); + Asi = BCi ^((~BCo)& BCu ); + Aso = BCo ^((~BCu)& BCa ); + Asu = BCu ^((~BCa)& BCe ); + } + + //copyToState(state, A) + state[ 0] = Aba; + state[ 1] = Abe; + state[ 2] = Abi; + state[ 3] = Abo; + state[ 4] = Abu; + state[ 5] = Aga; + state[ 6] = Age; + state[ 7] = Agi; + state[ 8] = Ago; + state[ 9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; + + #undef round +} + +#include +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + + +static void keccak_absorb(uint64_t *s, unsigned int r, const unsigned char *m, unsigned long long int mlen, unsigned char p) +{ + unsigned long long i; + unsigned char t[200]; + + while (mlen >= r) + { + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(m + 8 * i); + + KeccakF1600_StatePermute(s); + mlen -= r; + m += r; + } + + for (i = 0; i < r; ++i) + t[i] = 0; + for (i = 0; i < mlen; ++i) + t[i] = m[i]; + t[i] = p; + t[r - 1] |= 128; + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(t + 8 * i); +} + + +static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, uint64_t *s, unsigned int r) +{ + unsigned int i; + + while(nblocks > 0) + { + KeccakF1600_StatePermute(s); + for (i = 0; i < (r>>3); i++) + { + store64(h+8*i, s[i]); + } + h += r; + nblocks--; + } +} + + +/********** SHAKE128 ***********/ + +void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) +{ + keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F); +} + + +void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) +{ + keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); +} + + +void shake128(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen) +{ + uint64_t s[25] = {0}; + unsigned char t[SHAKE128_RATE]; + unsigned long long nblocks = outlen/SHAKE128_RATE; + size_t i; + + /* Absorb input */ + keccak_absorb(s, SHAKE128_RATE, input, inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); + + output += nblocks*SHAKE128_RATE; + outlen -= nblocks*SHAKE128_RATE; + + if (outlen) + { + keccak_squeezeblocks(t, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen; i++) + output[i] = t[i]; + } +} + + +/********** cSHAKE128 ***********/ + +void cshake128_simple_absorb(uint64_t s[25], uint16_t cstm, const unsigned char *in, unsigned long long inlen) +{ + unsigned char *sep = (unsigned char*)s; + unsigned int i; + + for (i = 0; i < 25; i++) + s[i] = 0; + + /* Absorb customization (domain-separation) string */ + sep[0] = 0x01; + sep[1] = 0xa8; + sep[2] = 0x01; + sep[3] = 0x00; + sep[4] = 0x01; + sep[5] = 16; // fixed bitlen of cstm + sep[6] = cstm & 0xff; + sep[7] = cstm >> 8; + + KeccakF1600_StatePermute(s); + + /* Absorb input */ + keccak_absorb(s, SHAKE128_RATE, in, inlen, 0x04); +} + + +void cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) +{ + keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); +} + + +void cshake128_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen) +{ + uint64_t s[25]; + unsigned char t[SHAKE128_RATE]; + unsigned int i; + + cshake128_simple_absorb(s, cstm, in, inlen); + + /* Squeeze output */ + keccak_squeezeblocks(output, outlen/SHAKE128_RATE, s, SHAKE128_RATE); + output += (outlen/SHAKE128_RATE)*SHAKE128_RATE; + + if (outlen%SHAKE128_RATE) + { + keccak_squeezeblocks(t, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen%SHAKE128_RATE; i++) + output[i] = t[i]; + } +} + + +/********** SHAKE256 ***********/ + +void shake256_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) +{ + keccak_absorb(s, SHAKE256_RATE, input, inputByteLen, 0x1F); +} + + +void shake256_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) +{ + keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); +} + + +void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen) +{ + uint64_t s[25]; + unsigned char t[SHAKE256_RATE]; + unsigned long long nblocks = outlen/SHAKE256_RATE; + size_t i; + + for (i = 0; i < 25; ++i) + s[i] = 0; + + /* Absorb input */ + keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); + + output += nblocks*SHAKE256_RATE; + outlen -= nblocks*SHAKE256_RATE; + + if (outlen) + { + keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen; i++) + output[i] = t[i]; + } +} + + +/********** cSHAKE256 ***********/ + +void cshake256_simple_absorb(uint64_t s[25], uint16_t cstm, const unsigned char *in, unsigned long long inlen) +{ + unsigned char *sep = (unsigned char*)s; + unsigned int i; + + for (i = 0; i < 25; i++) + s[i] = 0; + + /* Absorb customization (domain-separation) string */ + sep[0] = 0x01; + sep[1] = 0x88; + sep[2] = 0x01; + sep[3] = 0x00; + sep[4] = 0x01; + sep[5] = 16; // fixed bitlen of cstm + sep[6] = cstm & 0xff; + sep[7] = cstm >> 8; + + KeccakF1600_StatePermute(s); + + /* Absorb input */ + keccak_absorb(s, SHAKE256_RATE, in, inlen, 0x04); +} + + +void cshake256_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) +{ + keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); +} + + +void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen) +{ + uint64_t s[25]; + unsigned char t[SHAKE256_RATE]; + unsigned int i; + + cshake256_simple_absorb(s, cstm, in, inlen); + + /* Squeeze output */ + keccak_squeezeblocks(output, outlen/SHAKE256_RATE, s, SHAKE256_RATE); + output += (outlen/SHAKE256_RATE)*SHAKE256_RATE; + + if(outlen%SHAKE256_RATE) + { + keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen%SHAKE256_RATE; i++) + output[i] = t[i]; + } +} \ No newline at end of file diff --git a/SIKE_sw/src/sha3/fips202.h b/SIKE_sw/src/sha3/fips202.h new file mode 100644 index 0000000..55b400a --- /dev/null +++ b/SIKE_sw/src/sha3/fips202.h @@ -0,0 +1,27 @@ +#ifndef FIPS202_H +#define FIPS202_H + +#include + + +#define SHAKE128_RATE 168 +#define SHAKE256_RATE 136 + +void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); +void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); +void shake128(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen); + +void cshake128_simple_absorb(uint64_t *s, uint16_t cstm, const unsigned char *in, unsigned long long inlen); +void cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); +void cshake128_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen); + +void shake256_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); +void shake256_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); +void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input, unsigned long long inlen); + +void cshake256_simple_absorb(uint64_t *s, uint16_t cstm, const unsigned char *in, unsigned long long inlen); +void cshake256_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); +void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen); + + +#endif diff --git a/SIKE_sw/src/sidh.c b/SIKE_sw/src/sidh.c new file mode 100644 index 0000000..2258cac --- /dev/null +++ b/SIKE_sw/src/sidh.c @@ -0,0 +1,263 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: Ephemeral supersingular isogeny Diffie-Hellman key exchange (SIDH) +*********************************************************************************************/ + +#include "random/random.h" + + +static void init_basis(digit_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) +{ // Initialization of basis points + fpcopy(gen, XP[0]); + fpcopy(gen + NWORDS_FIELD, XP[1]); + fpcopy(gen + 2*NWORDS_FIELD, XQ[0]); + fpcopy(gen + 3*NWORDS_FIELD, XQ[1]); + fpcopy(gen + 4*NWORDS_FIELD, XR[0]); + fpcopy(gen + 5*NWORDS_FIELD, XR[1]); +} + + +void random_mod_order_A(unsigned char* random_digits) +{ // Generation of Alice's secret key + // Outputs random value in [0, 2^eA - 1] + + randombytes(random_digits, SECRETKEY_A_BYTES); + random_digits[SECRETKEY_A_BYTES-1] &= MASK_ALICE; // Masking last byte +} + + +void random_mod_order_B(unsigned char* random_digits) +{ // Generation of Bob's secret key + // Outputs random value in [0, 2^Floor(Log(2, oB)) - 1] + + randombytes(random_digits, SECRETKEY_B_BYTES); + random_digits[SECRETKEY_B_BYTES-1] &= MASK_BOB; // Masking last byte +} + + +int EphemeralKeyGeneration_A(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA) +{ // Alice's ephemeral public key generation + // Input: a private key PrivateKeyA in the range [0, 2^eA - 1]. + // Output: the public key PublicKeyA consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. + point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}; + f2elm_t XPA, XQA, XRA, A24plus = {0}, C24 = {0}, A = {0}, unused; + digit_t SecretKeyA[NWORDS_ORDER] = {0}; + + // Initialize basis points + init_basis((digit_t*)A_gen, XPA, XQA, XRA); + init_basis((digit_t*)B_gen, phiP->X, phiQ->X, phiR->X); + fpcopy((digit_t*)&Montgomery_one, (phiP->Z)[0]); + fpcopy((digit_t*)&Montgomery_one, (phiQ->Z)[0]); + fpcopy((digit_t*)&Montgomery_one, (phiR->Z)[0]); + + // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1 + fpcopy((digit_t*)&Montgomery_one, A24plus[0]); + fp2add(A24plus, A24plus, A24plus); + fp2add(A24plus, A24plus, C24); + fp2add(A24plus, C24, A); + fp2add(C24, C24, A24plus); + + // Retrieve kernel point + decode_to_digits(PrivateKeyA, SecretKeyA, SECRETKEY_A_BYTES, NWORDS_ORDER); + LADDER3PT(XPA, XQA, XRA, SecretKeyA, ALICE, R, A); + +#if (OALICE_BITS % 2 == 1) + point_proj_t S; + + xDBLe(R, S, A24plus, C24, (int)(OALICE_BITS-1)); + get_2_isog(S, A24plus, C24); + eval_2_isog(phiP, S); + eval_2_isog(phiQ, S); + eval_2_isog(phiR, S); + eval_2_isog(R, S); +#endif + + // Traverse tree + TraverseTree(unused, R, A24plus, C24, strat_Alice, (unsigned int)MAX_Alice, true, phiP, phiQ, phiR); + // Format public key + fp2_encode(phiP->X, PublicKeyA); + fp2_encode(phiQ->X, PublicKeyA + FP2_ENCODED_BYTES); + fp2_encode(phiR->X, PublicKeyA + 2*FP2_ENCODED_BYTES); + + return 0; +} + + +int EphemeralKeyGeneration_B(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB) +{ // Bob's ephemeral public key generation + // Input: a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. + // Output: the public key PublicKeyB consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. + point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_BOB]; + f2elm_t XPB, XQB, XRB, coeff[3], A24plus = {0}, A24minus = {0}, A = {0}; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + digit_t SecretKeyB[NWORDS_ORDER] = {0}; + + // Initialize basis points + init_basis((digit_t*)B_gen, XPB, XQB, XRB); + init_basis((digit_t*)A_gen, phiP->X, phiQ->X, phiR->X); + fpcopy((digit_t*)&Montgomery_one, (phiP->Z)[0]); + fpcopy((digit_t*)&Montgomery_one, (phiQ->Z)[0]); + fpcopy((digit_t*)&Montgomery_one, (phiR->Z)[0]); + + // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1 + fpcopy((digit_t*)&Montgomery_one, A24plus[0]); + fp2add(A24plus, A24plus, A24plus); + fp2add(A24plus, A24plus, A24minus); + fp2add(A24plus, A24minus, A); + fp2add(A24minus, A24minus, A24plus); + + // Retrieve kernel point + decode_to_digits(PrivateKeyB, SecretKeyB, SECRETKEY_B_BYTES, NWORDS_ORDER); + LADDER3PT(XPB, XQB, XRB, SecretKeyB, BOB, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Bob; row++) { + while (index < MAX_Bob-row) { + fp2copy(R->X, pts[npts]->X); + fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Bob[ii++]; + xTPLe(R, R, A24minus, A24plus, (int)m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + fp2copy(pts[npts-1]->X, R->X); + fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + inv_3_way(phiP->Z, phiQ->Z, phiR->Z); + fp2mul_mont(phiP->X, phiP->Z, phiP->X); + fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); + fp2mul_mont(phiR->X, phiR->Z, phiR->X); + + // Format public key + fp2_encode(phiP->X, PublicKeyB); + fp2_encode(phiQ->X, PublicKeyB + FP2_ENCODED_BYTES); + fp2_encode(phiR->X, PublicKeyB + 2*FP2_ENCODED_BYTES); + + return 0; +} + + +int EphemeralSecretAgreement_A(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA) +{ // Alice's ephemeral shared secret computation + // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB + // Inputs: Alice's PrivateKeyA is an integer in the range [0, oA-1]. + // Bob's PublicKeyB consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. + // Output: a shared secret SharedSecretA that consists of one element in GF(p^2) encoded by removing leading 0 bytes. + point_proj_t R, unused1, unused2, unused3; + f2elm_t PKB[3], jinv, A24plus = {0}, C24 = {0}, A = {0}; + digit_t SecretKeyA[NWORDS_ORDER] = {0}; + + // Initialize images of Bob's basis + fp2_decode(PublicKeyB, PKB[0]); + fp2_decode(PublicKeyB + FP2_ENCODED_BYTES, PKB[1]); + fp2_decode(PublicKeyB + 2*FP2_ENCODED_BYTES, PKB[2]); + + // Initialize constants: A24plus = A+2C, C24 = 4C, where C=1 + get_A(PKB[0], PKB[1], PKB[2], A); + fpadd((digit_t*)&Montgomery_one, (digit_t*)&Montgomery_one, C24[0]); + fp2add(A, C24, A24plus); + fpadd(C24[0], C24[0], C24[0]); + + // Retrieve kernel point + decode_to_digits(PrivateKeyA, SecretKeyA, SECRETKEY_A_BYTES, NWORDS_ORDER); + LADDER3PT(PKB[0], PKB[1], PKB[2], SecretKeyA, ALICE, R, A); + +#if (OALICE_BITS % 2 == 1) + point_proj_t S; + + xDBLe(R, S, A24plus, C24, (int)(OALICE_BITS-1)); + get_2_isog(S, A24plus, C24); + eval_2_isog(R, S); +#endif + + // Traverse tree + TraverseTree(jinv, R, A24plus, C24, strat_Alice, (unsigned int)MAX_Alice, false, unused1, unused2, unused3); + // Format shared secret + fp2_encode(jinv, SharedSecretA); + + return 0; +} + + +int EphemeralSecretAgreement_B(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB) +{ // Bob's ephemeral shared secret computation + // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA + // Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,oB)) - 1]. + // Alice's PublicKeyA consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. + // Output: a shared secret SharedSecretB that consists of one element in GF(p^2) encoded by removing leading 0 bytes. + point_proj_t R, pts[MAX_INT_POINTS_BOB]; + f2elm_t coeff[3], PKB[3], jinv; + f2elm_t A24plus = {0}, A24minus = {0}, A = {0}; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + digit_t SecretKeyB[NWORDS_ORDER] = {0}; + + // Initialize images of Alice's basis + fp2_decode(PublicKeyA, PKB[0]); + fp2_decode(PublicKeyA + FP2_ENCODED_BYTES, PKB[1]); + fp2_decode(PublicKeyA + 2*FP2_ENCODED_BYTES, PKB[2]); + + // Initialize constants: A24plus = A+2C, A24minus = A-2C, where C=1 + get_A(PKB[0], PKB[1], PKB[2], A); + fpadd((digit_t*)&Montgomery_one, (digit_t*)&Montgomery_one, A24minus[0]); + fp2add(A, A24minus, A24plus); + fp2sub(A, A24minus, A24minus); + + // Retrieve kernel point + decode_to_digits(PrivateKeyB, SecretKeyB, SECRETKEY_B_BYTES, NWORDS_ORDER); + LADDER3PT(PKB[0], PKB[1], PKB[2], SecretKeyB, BOB, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Bob; row++) { + while (index < MAX_Bob-row) { + fp2copy(R->X, pts[npts]->X); + fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Bob[ii++]; + xTPLe(R, R, A24minus, A24plus, (int)m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + + fp2copy(pts[npts-1]->X, R->X); + fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + fp2add(A24plus, A24minus, A); + fp2add(A, A, A); + fp2sub(A24plus, A24minus, A24plus); + j_inv(A, A24plus, jinv); + fp2_encode(jinv, SharedSecretB); // Format shared secret + + return 0; +} \ No newline at end of file diff --git a/SIKE_sw/src/sike.c b/SIKE_sw/src/sike.c new file mode 100644 index 0000000..e99aa74 --- /dev/null +++ b/SIKE_sw/src/sike.c @@ -0,0 +1,98 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: supersingular isogeny key encapsulation (SIKE) protocol +*********************************************************************************************/ + +#include +#include "sha3/fips202.h" + + +int crypto_kem_keypair(unsigned char *pk, unsigned char *sk) +{ // SIKE's key generation + // Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes) + // public key pk (CRYPTO_PUBLICKEYBYTES bytes) + + // Generate lower portion of secret key sk <- s||SK + randombytes(sk, MSG_BYTES); + random_mod_order_B(sk + MSG_BYTES); + + // Generate public key pk + EphemeralKeyGeneration_B(sk + MSG_BYTES, pk); + + // Append public key pk to secret key sk + memcpy(&sk[MSG_BYTES + SECRETKEY_B_BYTES], pk, CRYPTO_PUBLICKEYBYTES); + + return 0; +} + + +int crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) +{ // SIKE's encapsulation + // Input: public key pk (CRYPTO_PUBLICKEYBYTES bytes) + // Outputs: shared secret ss (CRYPTO_BYTES bytes) + // ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes) + unsigned char ephemeralsk[SECRETKEY_A_BYTES]; + unsigned char jinvariant[FP2_ENCODED_BYTES]; + unsigned char h[MSG_BYTES]; + unsigned char temp[CRYPTO_CIPHERTEXTBYTES+MSG_BYTES]; + + // Generate ephemeralsk <- G(m||pk) mod oA + randombytes(temp, MSG_BYTES); + memcpy(&temp[MSG_BYTES], pk, CRYPTO_PUBLICKEYBYTES); + shake256(ephemeralsk, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES+MSG_BYTES); + ephemeralsk[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; + + // Encrypt + EphemeralKeyGeneration_A(ephemeralsk, ct); + EphemeralSecretAgreement_A(ephemeralsk, pk, jinvariant); + shake256(h, MSG_BYTES, jinvariant, FP2_ENCODED_BYTES); + for (int i = 0; i < MSG_BYTES; i++) { + ct[i + CRYPTO_PUBLICKEYBYTES] = temp[i] ^ h[i]; + } + + // Generate shared secret ss <- H(m||ct) + memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES); + shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES+MSG_BYTES); + + return 0; +} + + +int crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) +{ // SIKE's decapsulation + // Input: secret key sk (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes) + // ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes) + // Outputs: shared secret ss (CRYPTO_BYTES bytes) + unsigned char ephemeralsk_[SECRETKEY_A_BYTES]; + unsigned char jinvariant_[FP2_ENCODED_BYTES]; + unsigned char h_[MSG_BYTES]; + unsigned char c0_[CRYPTO_PUBLICKEYBYTES]; + unsigned char temp[CRYPTO_CIPHERTEXTBYTES+MSG_BYTES]; + + // Decrypt + EphemeralSecretAgreement_B(sk + MSG_BYTES, ct, jinvariant_); + shake256(h_, MSG_BYTES, jinvariant_, FP2_ENCODED_BYTES); + for (int i = 0; i < MSG_BYTES; i++) { + temp[i] = ct[i + CRYPTO_PUBLICKEYBYTES] ^ h_[i]; + } + + // Generate ephemeralsk_ <- G(m||pk) mod oA + memcpy(&temp[MSG_BYTES], &sk[MSG_BYTES + SECRETKEY_B_BYTES], CRYPTO_PUBLICKEYBYTES); + shake256(ephemeralsk_, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES+MSG_BYTES); + ephemeralsk_[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; + + // Generate shared secret ss <- H(m||ct), or output ss <- H(s||ct) in case of ct verification failure + EphemeralKeyGeneration_A(ephemeralsk_, c0_); + // If selector = 0 then do ss = H(m||ct), else if selector = -1 load s to do ss = H(s||ct) + int8_t selector = ct_compare(c0_, ct, CRYPTO_PUBLICKEYBYTES); + ct_cmov(temp, sk, MSG_BYTES, selector); + memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES); + shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES+MSG_BYTES); + + return 0; +} \ No newline at end of file diff --git a/SIKE_sw/tests/arith_tests-p377.c b/SIKE_sw/tests/arith_tests-p377.c new file mode 100644 index 0000000..3ad7052 --- /dev/null +++ b/SIKE_sw/tests/arith_tests-p377.c @@ -0,0 +1,616 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions +*********************************************************************************************/ + +#include "../src/config.h" +#include "../src/P377/P377_internal.h" +#include "../src/internal.h" +#include "test_extras.h" +#include + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 100 // Number of iterations per bench + #define SMALL_BENCH_LOOPS 100 // Number of iterations per bench + #define TEST_LOOPS 10 // Number of iterations per test +#else + #define BENCH_LOOPS 100000 + #define SMALL_BENCH_LOOPS 10000 + #define TEST_LOOPS 100 +#endif + + +bool fp_test() +{ // Tests for the field arithmetic + bool OK = true; + int n, passed; + felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf; + + printf("\n--------------------------------------------------------------------------------------------------------\n\n"); + printf("Testing field arithmetic over GF(p377): \n\n"); + + // Field addition over the prime p377 + passed = 1; + for (n=0; n + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 100 // Number of iterations per bench + #define SMALL_BENCH_LOOPS 100 // Number of iterations per bench + #define TEST_LOOPS 10 // Number of iterations per test +#else + #define BENCH_LOOPS 100000 + #define SMALL_BENCH_LOOPS 10000 + #define TEST_LOOPS 100 +#endif + + +bool fp_test() +{ // Tests for the field arithmetic + bool OK = true; + int n, passed; + felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf; + + printf("\n--------------------------------------------------------------------------------------------------------\n\n"); + printf("Testing field arithmetic over GF(p434): \n\n"); + + // Field addition over the prime p434 + passed = 1; + for (n=0; n + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 100 // Number of iterations per bench + #define SMALL_BENCH_LOOPS 100 // Number of iterations per bench + #define TEST_LOOPS 10 // Number of iterations per test +#else + #define BENCH_LOOPS 100000 + #define SMALL_BENCH_LOOPS 10000 + #define TEST_LOOPS 100 +#endif + + +bool fp_test() +{ // Tests for the field arithmetic + bool OK = true; + int n, passed; + felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf; + + printf("\n--------------------------------------------------------------------------------------------------------\n\n"); + printf("Testing field arithmetic over GF(p503): \n\n"); + + // Field addition over the prime p503 + passed = 1; + for (n=0; n + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 100 // Number of iterations per bench + #define SMALL_BENCH_LOOPS 100 // Number of iterations per bench + #define TEST_LOOPS 10 // Number of iterations per test +#else + #define BENCH_LOOPS 100000 + #define SMALL_BENCH_LOOPS 10000 + #define TEST_LOOPS 100 +#endif + + +bool fp_test() +{ // Tests for the field arithmetic + bool OK = true; + int n, passed; + felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf; + + printf("\n--------------------------------------------------------------------------------------------------------\n\n"); + printf("Testing field arithmetic over GF(p546): \n\n"); + + // Field addition over the prime p546 + passed = 1; + for (n=0; n + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 100 // Number of iterations per bench + #define SMALL_BENCH_LOOPS 100 // Number of iterations per bench + #define TEST_LOOPS 10 // Number of iterations per test +#else + #define BENCH_LOOPS 100000 + #define SMALL_BENCH_LOOPS 10000 + #define TEST_LOOPS 100 +#endif + + +bool fp_test() +{ // Tests for the field arithmetic + bool OK = true; + int n, passed; + felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf; + + printf("\n--------------------------------------------------------------------------------------------------------\n\n"); + printf("Testing field arithmetic over GF(p610): \n\n"); + + // Field addition over the prime p610 + passed = 1; + for (n=0; n + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 100 // Number of iterations per bench + #define SMALL_BENCH_LOOPS 100 // Number of iterations per bench + #define TEST_LOOPS 10 // Number of iterations per test +#else + #define BENCH_LOOPS 100000 + #define SMALL_BENCH_LOOPS 10000 + #define TEST_LOOPS 100 +#endif + + +bool fp_test() +{ // Tests for the field arithmetic + bool OK = true; + int n, passed; + felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf; + + printf("\n--------------------------------------------------------------------------------------------------------\n\n"); + printf("Testing field arithmetic over GF(p697): \n\n"); + + // Field addition over the prime p697 + passed = 1; + for (n=0; n + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 100 // Number of iterations per bench + #define SMALL_BENCH_LOOPS 100 // Number of iterations per bench + #define TEST_LOOPS 10 // Number of iterations per test +#else + #define BENCH_LOOPS 100000 + #define SMALL_BENCH_LOOPS 10000 + #define TEST_LOOPS 100 +#endif + + +bool fp_test() +{ // Tests for the field arithmetic + bool OK = true; + int n, passed; + felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf; + + printf("\n--------------------------------------------------------------------------------------------------------\n\n"); + printf("Testing field arithmetic over GF(p751): \n\n"); + + // Field addition over the prime p751 + passed = 1; + for (n=0; n +#include +#include "test_extras.h" +#include "../src/P377/P377_api.h" + + +#define SCHEME_NAME "SIKEp377" + +#define crypto_kem_keypair crypto_kem_keypair_SIKEp377 +#define crypto_kem_enc crypto_kem_enc_SIKEp377 +#define crypto_kem_dec crypto_kem_dec_SIKEp377 + +#include "test_sike.c" \ No newline at end of file diff --git a/SIKE_sw/tests/test_SIKEp434.c b/SIKE_sw/tests/test_SIKEp434.c new file mode 100644 index 0000000..fce38b4 --- /dev/null +++ b/SIKE_sw/tests/test_SIKEp434.c @@ -0,0 +1,19 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp434 +*********************************************************************************************/ + +#include +#include +#include "test_extras.h" +#include "../src/P434/P434_api.h" + + +#define SCHEME_NAME "SIKEp434" + +#define crypto_kem_keypair crypto_kem_keypair_SIKEp434 +#define crypto_kem_enc crypto_kem_enc_SIKEp434 +#define crypto_kem_dec crypto_kem_dec_SIKEp434 + +#include "test_sike.c" \ No newline at end of file diff --git a/SIKE_sw/tests/test_SIKEp503.c b/SIKE_sw/tests/test_SIKEp503.c new file mode 100644 index 0000000..e1e9d0e --- /dev/null +++ b/SIKE_sw/tests/test_SIKEp503.c @@ -0,0 +1,19 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp503 +*********************************************************************************************/ + +#include +#include +#include "test_extras.h" +#include "../src/P503/P503_api.h" + + +#define SCHEME_NAME "SIKEp503" + +#define crypto_kem_keypair crypto_kem_keypair_SIKEp503 +#define crypto_kem_enc crypto_kem_enc_SIKEp503 +#define crypto_kem_dec crypto_kem_dec_SIKEp503 + +#include "test_sike.c" \ No newline at end of file diff --git a/SIKE_sw/tests/test_SIKEp546.c b/SIKE_sw/tests/test_SIKEp546.c new file mode 100644 index 0000000..2735d1e --- /dev/null +++ b/SIKE_sw/tests/test_SIKEp546.c @@ -0,0 +1,17 @@ +/******************************************************************************************** +* NEW benchmarking/testing isogeny-based key encapsulation mechanism SIKEp546 +*********************************************************************************************/ + +#include +#include +#include "test_extras.h" +#include "../src/P546/P546_api.h" + + +#define SCHEME_NAME "SIKEp546" + +#define crypto_kem_keypair crypto_kem_keypair_SIKEp546 +#define crypto_kem_enc crypto_kem_enc_SIKEp546 +#define crypto_kem_dec crypto_kem_dec_SIKEp546 + +#include "test_sike.c" \ No newline at end of file diff --git a/SIKE_sw/tests/test_SIKEp610.c b/SIKE_sw/tests/test_SIKEp610.c new file mode 100644 index 0000000..b52b0d1 --- /dev/null +++ b/SIKE_sw/tests/test_SIKEp610.c @@ -0,0 +1,19 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp610 +*********************************************************************************************/ + +#include +#include +#include "test_extras.h" +#include "../src/P610/P610_api.h" + + +#define SCHEME_NAME "SIKEp610" + +#define crypto_kem_keypair crypto_kem_keypair_SIKEp610 +#define crypto_kem_enc crypto_kem_enc_SIKEp610 +#define crypto_kem_dec crypto_kem_dec_SIKEp610 + +#include "test_sike.c" \ No newline at end of file diff --git a/SIKE_sw/tests/test_SIKEp697.c b/SIKE_sw/tests/test_SIKEp697.c new file mode 100644 index 0000000..fd6a4ee --- /dev/null +++ b/SIKE_sw/tests/test_SIKEp697.c @@ -0,0 +1,17 @@ +/******************************************************************************************** +* NEW benchmarking/testing isogeny-based key encapsulation mechanism SIKEp697 +*********************************************************************************************/ + +#include +#include +#include "test_extras.h" +#include "../src/P697/P697_api.h" + + +#define SCHEME_NAME "SIKEp697" + +#define crypto_kem_keypair crypto_kem_keypair_SIKEp697 +#define crypto_kem_enc crypto_kem_enc_SIKEp697 +#define crypto_kem_dec crypto_kem_dec_SIKEp697 + +#include "test_sike.c" \ No newline at end of file diff --git a/SIKE_sw/tests/test_SIKEp751.c b/SIKE_sw/tests/test_SIKEp751.c new file mode 100644 index 0000000..1a1bfaf --- /dev/null +++ b/SIKE_sw/tests/test_SIKEp751.c @@ -0,0 +1,19 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp751 +*********************************************************************************************/ + +#include +#include +#include "test_extras.h" +#include "../src/P751/P751_api.h" + + +#define SCHEME_NAME "SIKEp751" + +#define crypto_kem_keypair crypto_kem_keypair_SIKEp751 +#define crypto_kem_enc crypto_kem_enc_SIKEp751 +#define crypto_kem_dec crypto_kem_dec_SIKEp751 + +#include "test_sike.c" \ No newline at end of file diff --git a/SIKE_sw/tests/test_extras.c b/SIKE_sw/tests/test_extras.c new file mode 100644 index 0000000..643edeb --- /dev/null +++ b/SIKE_sw/tests/test_extras.c @@ -0,0 +1,283 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: utility functions for testing and benchmarking +*********************************************************************************************/ + +#include "test_extras.h" +#if (OS_TARGET == OS_WIN) + #include + #include +#elif (OS_TARGET == OS_LINUX) + #if (TARGET == TARGET_ARM64) + #include + #endif + #include +#endif +#include + +static uint64_t p377[6] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x0B46D546BC2A5699, 0xA879CC6988CE7CF5, 0x015B702E0C542196 }; +static uint64_t p434[7] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF, + 0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344 }; +static uint64_t p503[8] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF, + 0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E }; +static uint64_t p546[9] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xC1CCF59098E1FFFF, + 0x91CA3591A0810F4F, 0xC3A747738CBAAD7D, 0x3E568459654D5F6B, 0x000000030F5EBA42 }; +static uint64_t p610[10] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF, + 0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768 }; +static uint64_t p697[11] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x604054AFFFFFFFFF, + 0xDF4970CF7313736F, 0x719AEC973BF54225, 0x40E474DA88B90FFE, 0x9A0E279D6CEB3C8E, 0x01B39F97671708CF }; +static uint64_t p751[12] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C }; + +#define NBITS_FIELD377 377 +#define NBITS_FIELD434 434 +#define NBITS_FIELD503 503 +#define NBITS_FIELD546 546 +#define NBITS_FIELD610 610 +#define NBITS_FIELD697 697 +#define NBITS_FIELD751 751 + + +int64_t cpucycles(void) +{ // Access system counter for benchmarking +#if (OS_TARGET == OS_WIN) && (TARGET == TARGET_AMD64 || TARGET == TARGET_x86) + return __rdtsc(); +#elif (OS_TARGET == OS_LINUX) && (TARGET == TARGET_AMD64 || TARGET == TARGET_x86) + unsigned int hi, lo; + + __asm volatile ("rdtsc\n\t" : "=a" (lo), "=d"(hi)); + return ((int64_t)lo) | (((int64_t)hi) << 32); +#elif (OS_TARGET == OS_LINUX) && (TARGET == TARGET_ARM64) + struct timespec time; + + clock_gettime(CLOCK_REALTIME, &time); + return (int64_t)(time.tv_sec*1e9 + time.tv_nsec); +#else + return 0; +#endif +} + + +int compare_words(digit_t* a, digit_t* b, unsigned int nwords) +{ // Comparing "nword" elements, a=b? : (1) a>b, (0) a=b, (-1) a= 0; i--) + { + if (a[i] > b[i]) return 1; + else if (a[i] < b[i]) return -1; + } + + return 0; +} + + +static void sub_test(digit_t* a, digit_t* b, digit_t* c, unsigned int nwords) +{ // Subtraction without borrow, c = a-b where a>b + // SECURITY NOTE: this function does not have constant-time execution. It is for TESTING ONLY. + unsigned int i; + digit_t res, carry, borrow = 0; + + for (i = 0; i < nwords; i++) + { + res = a[i] - b[i]; + carry = (a[i] < b[i]); + c[i] = res - borrow; + borrow = carry || (res < borrow); + } +} + + +void fprandom377_test(digit_t* a) +{ // Generating a pseudo-random field element in [0, p377-1] + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + unsigned int i, diff = 384-NBITS_FIELD377, nwords = NBITS_TO_NWORDS(NBITS_FIELD377); + unsigned char* string = NULL; + + string = (unsigned char*)a; + for (i = 0; i < sizeof(digit_t)*nwords; i++) { + *(string + i) = (unsigned char)rand(); // Obtain 384-bit number + } + a[nwords-1] &= (((digit_t)(-1) << diff) >> diff); + + while (compare_words((digit_t*)p377, a, nwords) < 1) { // Force it to [0, modulus-1] + sub_test(a, (digit_t*)p377, a, nwords); + } +} + + +void fprandom434_test(digit_t* a) +{ // Generating a pseudo-random field element in [0, p434-1] + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + unsigned int i, diff = 448-NBITS_FIELD434, nwords = NBITS_TO_NWORDS(NBITS_FIELD434); + unsigned char* string = NULL; + + string = (unsigned char*)a; + for (i = 0; i < sizeof(digit_t)*nwords; i++) { + *(string + i) = (unsigned char)rand(); // Obtain 448-bit number + } + a[nwords-1] &= (((digit_t)(-1) << diff) >> diff); + + while (compare_words((digit_t*)p434, a, nwords) < 1) { // Force it to [0, modulus-1] + sub_test(a, (digit_t*)p434, a, nwords); + } +} + + +void fprandom503_test(digit_t* a) +{ // Generating a pseudo-random field element in [0, p503-1] + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + unsigned int i, diff = 512-NBITS_FIELD503, nwords = NBITS_TO_NWORDS(NBITS_FIELD503); + unsigned char* string = NULL; + + string = (unsigned char*)a; + for (i = 0; i < sizeof(digit_t)*nwords; i++) { + *(string + i) = (unsigned char)rand(); // Obtain 512-bit number + } + a[nwords-1] &= (((digit_t)(-1) << diff) >> diff); + + while (compare_words((digit_t*)p503, a, nwords) < 1) { // Force it to [0, modulus-1] + sub_test(a, (digit_t*)p503, a, nwords); + } +} + + +void fprandom546_test(digit_t* a) +{ // Generating a pseudo-random field element in [0, p546-1] + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + unsigned int i, diff = 576-NBITS_FIELD546, nwords = NBITS_TO_NWORDS(NBITS_FIELD546); + unsigned char* string = NULL; + + string = (unsigned char*)a; + for (i = 0; i < sizeof(digit_t)*nwords; i++) { + *(string + i) = (unsigned char)rand(); // Obtain 576-bit number + } + a[nwords-1] &= (((digit_t)(-1) << diff) >> diff); + + while (compare_words((digit_t*)p546, a, nwords) < 1) { // Force it to [0, modulus-1] + sub_test(a, (digit_t*)p546, a, nwords); + } +} + + +void fprandom610_test(digit_t* a) +{ // Generating a pseudo-random field element in [0, p610-1] + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + unsigned int i, diff = 640-NBITS_FIELD610, nwords = NBITS_TO_NWORDS(NBITS_FIELD610); + unsigned char* string = NULL; + + string = (unsigned char*)a; + for (i = 0; i < sizeof(digit_t)*nwords; i++) { + *(string + i) = (unsigned char)rand(); // Obtain 640-bit number + } + a[nwords-1] &= (((digit_t)(-1) << diff) >> diff); + + while (compare_words((digit_t*)p610, a, nwords) < 1) { // Force it to [0, modulus-1] + sub_test(a, (digit_t*)p610, a, nwords); + } +} + + +void fprandom697_test(digit_t* a) +{ // Generating a pseudo-random field element in [0, p697-1] + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + unsigned int i, diff = 704-NBITS_FIELD697, nwords = NBITS_TO_NWORDS(NBITS_FIELD697); + unsigned char* string = NULL; + + string = (unsigned char*)a; + for (i = 0; i < sizeof(digit_t)*nwords; i++) { + *(string + i) = (unsigned char)rand(); // Obtain 640-bit number + } + a[nwords-1] &= (((digit_t)(-1) << diff) >> diff); + + while (compare_words((digit_t*)p697, a, nwords) < 1) { // Force it to [0, modulus-1] + sub_test(a, (digit_t*)p697, a, nwords); + } +} + + +void fprandom751_test(digit_t* a) +{ // Generating a pseudo-random field element in [0, p751-1] + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + unsigned int i, diff = 768-NBITS_FIELD751, nwords = NBITS_TO_NWORDS(NBITS_FIELD751); + unsigned char* string = NULL; + + string = (unsigned char*)a; + for (i = 0; i < sizeof(digit_t)*nwords; i++) { + *(string + i) = (unsigned char)rand(); // Obtain 768-bit number + } + a[nwords-1] &= (((digit_t)(-1) << diff) >> diff); + + while (compare_words((digit_t*)p751, a, nwords) < 1) { // Force it to [0, modulus-1] + sub_test(a, (digit_t*)p751, a, nwords); + } +} + + +void fp2random377_test(digit_t* a) +{ // Generating a pseudo-random element in GF(p377^2) + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + + fprandom377_test(a); + fprandom377_test(a+NBITS_TO_NWORDS(NBITS_FIELD377)); +} + + +void fp2random434_test(digit_t* a) +{ // Generating a pseudo-random element in GF(p434^2) + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + + fprandom434_test(a); + fprandom434_test(a+NBITS_TO_NWORDS(NBITS_FIELD434)); +} + + +void fp2random503_test(digit_t* a) +{ // Generating a pseudo-random element in GF(p503^2) + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + + fprandom503_test(a); + fprandom503_test(a+NBITS_TO_NWORDS(NBITS_FIELD503)); +} + + +void fp2random546_test(digit_t* a) +{ // Generating a pseudo-random element in GF(p546^2) + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + + fprandom546_test(a); + fprandom546_test(a+NBITS_TO_NWORDS(NBITS_FIELD546)); +} + + +void fp2random610_test(digit_t* a) +{ // Generating a pseudo-random element in GF(p610^2) + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + + fprandom610_test(a); + fprandom610_test(a+NBITS_TO_NWORDS(NBITS_FIELD610)); +} + + +void fp2random697_test(digit_t* a) +{ // Generating a pseudo-random element in GF(p697^2) + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + + fprandom697_test(a); + fprandom697_test(a+NBITS_TO_NWORDS(NBITS_FIELD697)); +} + + +void fp2random751_test(digit_t* a) +{ // Generating a pseudo-random element in GF(p751^2) + // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY. + + fprandom751_test(a); + fprandom751_test(a+NBITS_TO_NWORDS(NBITS_FIELD751)); +} \ No newline at end of file diff --git a/SIKE_sw/tests/test_extras.h b/SIKE_sw/tests/test_extras.h new file mode 100644 index 0000000..0a784e1 --- /dev/null +++ b/SIKE_sw/tests/test_extras.h @@ -0,0 +1,76 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: utility header file for tests +*********************************************************************************************/ + +#ifndef TEST_EXTRAS_H +#define TEST_EXTRAS_H + +#include "../src/config.h" + +#define PASSED 0 +#define FAILURE 1 + + +#if (TARGET == TARGET_ARM64) + #define print_unit printf("nsec"); +#else + #define print_unit printf("cycles"); +#endif + + +// Access system counter for benchmarking +int64_t cpucycles(void); + +// Comparing "nword" elements, a=b? : (1) a!=b, (0) a=b +int compare_words(digit_t* a, digit_t* b, unsigned int nwords); + +// Generating a pseudo-random field element in [0, p377-1] +void fprandom377_test(digit_t* a); + +// Generating a pseudo-random element in GF(p377^2) +void fp2random377_test(digit_t* a); + +// Generating a pseudo-random field element in [0, p434-1] +void fprandom434_test(digit_t* a); + +// Generating a pseudo-random element in GF(p434^2) +void fp2random434_test(digit_t* a); + +// Generating a pseudo-random field element in [0, p503-1] +void fprandom503_test(digit_t* a); + +// Generating a pseudo-random element in GF(p503^2) +void fp2random503_test(digit_t* a); + +// Generating a pseudo-random field element in [0, p546-1] +void fprandom546_test(digit_t* a); + +// Generating a pseudo-random element in GF(p546^2) +void fp2random546_test(digit_t* a); + +// Generating a pseudo-random field element in [0, p610-1] +void fprandom610_test(digit_t* a); + +// Generating a pseudo-random element in GF(p610^2) +void fp2random610_test(digit_t* a); + +// Generating a pseudo-random field element in [0, p697-1] +void fprandom697_test(digit_t* a); + +// Generating a pseudo-random element in GF(p697^2) +void fp2random697_test(digit_t* a); + +// Generating a pseudo-random field element in [0, p751-1] +void fprandom751_test(digit_t* a); + +// Generating a pseudo-random element in GF(p751^2) +void fp2random751_test(digit_t* a); + + +#endif \ No newline at end of file diff --git a/SIKE_sw/tests/test_sike.c b/SIKE_sw/tests/test_sike.c new file mode 100644 index 0000000..6bb42fe --- /dev/null +++ b/SIKE_sw/tests/test_sike.c @@ -0,0 +1,132 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* Copyright (c) Microsoft Corporation +* +* Website: https://github.com/microsoft/PQCrypto-SIDH +* Released under MIT license +* +* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism +*********************************************************************************************/ + +#include "../src/random/random.h" + + +// Benchmark and test parameters +#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) + #define BENCH_LOOPS 5 // Number of iterations per bench + #define TEST_LOOPS 5 // Number of iterations per test +#else + #define BENCH_LOOPS 100 + #define TEST_LOOPS 10 +#endif + + +int cryptotest_kem() +{ // Testing KEM + unsigned int i; + unsigned char sk[CRYPTO_SECRETKEYBYTES] = {0}; + unsigned char pk[CRYPTO_PUBLICKEYBYTES] = {0}; + unsigned char ct[CRYPTO_CIPHERTEXTBYTES] = {0}; + unsigned char ss[CRYPTO_BYTES] = {0}; + unsigned char ss_[CRYPTO_BYTES] = {0}; + unsigned char bytes[4]; + uint32_t* pos = (uint32_t*)bytes; + bool passed = true; + + printf("\n\nTESTING ISOGENY-BASED KEY ENCAPSULATION MECHANISM %s\n", SCHEME_NAME); + printf("--------------------------------------------------------------------------------------------------------\n\n"); + + for (i = 0; i < TEST_LOOPS; i++) + { + crypto_kem_keypair(pk, sk); + crypto_kem_enc(ct, ss, pk); + crypto_kem_dec(ss_, ct, sk); + + if (memcmp(ss, ss_, CRYPTO_BYTES) != 0) { + passed = false; + break; + } + + // Testing decapsulation after changing one bit of ct + randombytes(bytes, 4); + *pos %= CRYPTO_CIPHERTEXTBYTES; + ct[*pos] ^= 1; + crypto_kem_dec(ss_, ct, sk); + + if (memcmp(ss, ss_, CRYPTO_BYTES) == 0) { + passed = false; + break; + } + } + + if (passed == true) printf(" KEM tests .................................................... PASSED"); + else { printf(" KEM tests ... FAILURE"); printf("\n"); return FAILURE; } + printf("\n"); + + return PASSED; +} + + +int cryptorun_kem() +{ // Benchmarking key exchange + unsigned int n; + unsigned char sk[CRYPTO_SECRETKEYBYTES] = {0}; + unsigned char pk[CRYPTO_PUBLICKEYBYTES] = {0}; + unsigned char ct[CRYPTO_CIPHERTEXTBYTES] = {0}; + unsigned char ss[CRYPTO_BYTES] = {0}; + unsigned char ss_[CRYPTO_BYTES] = {0}; + unsigned long long cycles_keygen = 0, cycles_encaps = 0, cycles_decaps = 0, cycles1, cycles2; + + printf("\n\nBENCHMARKING ISOGENY-BASED KEY ENCAPSULATION MECHANISM %s\n", SCHEME_NAME); + printf("--------------------------------------------------------------------------------------------------------\n\n"); + + for (n = 0; n < BENCH_LOOPS; n++) + { + // Benchmarking key generation + cycles1 = cpucycles(); + crypto_kem_keypair(pk, sk); + cycles2 = cpucycles(); + cycles_keygen = cycles_keygen+(cycles2-cycles1); + + // Benchmarking encapsulation + cycles1 = cpucycles(); + crypto_kem_enc(ct, ss, pk); + cycles2 = cpucycles(); + cycles_encaps = cycles_encaps+(cycles2-cycles1); + + // Benchmarking decapsulation + cycles1 = cpucycles(); + crypto_kem_dec(ss_, ct, sk); + cycles2 = cpucycles(); + cycles_decaps = cycles_decaps+(cycles2-cycles1); + } + + printf(" Key generation runs in ....................................... %10lld ", cycles_keygen/BENCH_LOOPS); print_unit; + printf("\n"); + printf(" Encapsulation runs in ........................................ %10lld ", cycles_encaps/BENCH_LOOPS); print_unit; + printf("\n"); + printf(" Decapsulation runs in ........................................ %10lld ", cycles_decaps/BENCH_LOOPS); print_unit; + printf("\n"); + + return PASSED; +} + + +int main() +{ + int Status = PASSED; + + Status = cryptotest_kem(); // Test key encapsulation mechanism + if (Status != PASSED) { + printf("\n\n Error detected: KEM_ERROR_SHARED_KEY \n\n"); + return FAILURE; + } + + Status = cryptorun_kem(); // Benchmark key encapsulation mechanism + if (Status != PASSED) { + printf("\n\n Error detected: KEM_ERROR_SHARED_KEY \n\n"); + return FAILURE; + } + + return Status; +} \ No newline at end of file diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/README b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/README similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/README rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/README diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v similarity index 100% rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v diff --git a/src/hardware/README b/SIKE_vOW_hw-sw/hardware/README similarity index 100% rename from src/hardware/README rename to SIKE_vOW_hw-sw/hardware/README diff --git a/src/hardware/controller_eval_4_isog/README b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/README similarity index 100% rename from src/hardware/controller_eval_4_isog/README rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/README diff --git a/src/hardware/controller_eval_4_isog/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/.gitignore similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/.gitignore diff --git a/src/hardware/controller_eval_4_isog/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/Makefile similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/Makefile diff --git a/src/hardware/controller_eval_4_isog/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/batch-synth.sh similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/batch-synth.sh rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/batch-synth.sh diff --git a/src/hardware/controller_eval_4_isog/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.tcl similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.tcl diff --git a/src/hardware/controller_eval_4_isog/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.xdc similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.xdc diff --git a/src/hardware/controller_eval_4_isog/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/params.mk similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/params.mk rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/params.mk diff --git a/src/hardware/controller_eval_4_isog/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/program.tcl similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/program.tcl diff --git a/src/hardware/controller_eval_4_isog/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/proj.src similarity index 100% rename from src/hardware/controller_eval_4_isog/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/proj.src diff --git a/src/hardware/controller_eval_4_isog/controller.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller.v similarity index 100% rename from src/hardware/controller_eval_4_isog/controller.v rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller.v diff --git a/src/hardware/controller_eval_4_isog/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/.gitignore similarity index 100% rename from src/hardware/controller_eval_4_isog/controller_tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/.gitignore diff --git a/src/hardware/controller_eval_4_isog/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/Makefile similarity index 100% rename from src/hardware/controller_eval_4_isog/controller_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/Makefile diff --git a/src/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh similarity index 100% rename from src/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh diff --git a/src/hardware/controller_eval_4_isog/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/controller_tb.v similarity index 100% rename from src/hardware/controller_eval_4_isog/controller_tb/controller_tb.v rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/controller_tb.v diff --git a/src/hardware/controller_eval_4_isog/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/gen_test.sage similarity index 100% rename from src/hardware/controller_eval_4_isog/controller_tb/gen_test.sage rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/gen_test.sage diff --git a/src/hardware/controller_eval_4_isog/eval_4_isog_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/eval_4_isog_FSM.v similarity index 100% rename from src/hardware/controller_eval_4_isog/eval_4_isog_FSM.v rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/eval_4_isog_FSM.v diff --git a/src/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v similarity index 100% rename from src/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v diff --git a/src/hardware/controller_get_4_isog/README b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/README similarity index 100% rename from src/hardware/controller_get_4_isog/README rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/README diff --git a/src/hardware/controller_get_4_isog/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/.gitignore similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/.gitignore diff --git a/src/hardware/controller_get_4_isog/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/Makefile similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/Makefile diff --git a/src/hardware/controller_get_4_isog/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/batch-synth.sh similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/batch-synth.sh rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/batch-synth.sh diff --git a/src/hardware/controller_get_4_isog/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.tcl similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.tcl diff --git a/src/hardware/controller_get_4_isog/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.xdc similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.xdc diff --git a/src/hardware/controller_get_4_isog/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/params.mk similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/params.mk rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/params.mk diff --git a/src/hardware/controller_get_4_isog/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/program.tcl similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/program.tcl diff --git a/src/hardware/controller_get_4_isog/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/proj.src similarity index 100% rename from src/hardware/controller_get_4_isog/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/proj.src diff --git a/src/hardware/controller_get_4_isog/controller.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller.v similarity index 100% rename from src/hardware/controller_get_4_isog/controller.v rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller.v diff --git a/src/hardware/controller_get_4_isog/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/.gitignore similarity index 100% rename from src/hardware/controller_get_4_isog/controller_tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/.gitignore diff --git a/src/hardware/controller_get_4_isog/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/Makefile similarity index 100% rename from src/hardware/controller_get_4_isog/controller_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/Makefile diff --git a/src/hardware/controller_get_4_isog/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/batch-sim.sh similarity index 100% rename from src/hardware/controller_get_4_isog/controller_tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/batch-sim.sh diff --git a/src/hardware/controller_get_4_isog/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/controller_tb.v similarity index 100% rename from src/hardware/controller_get_4_isog/controller_tb/controller_tb.v rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/controller_tb.v diff --git a/src/hardware/controller_get_4_isog/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/gen_test.sage similarity index 100% rename from src/hardware/controller_get_4_isog/controller_tb/gen_test.sage rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/gen_test.sage diff --git a/src/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v similarity index 100% rename from src/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v diff --git a/src/hardware/controller_get_4_isog/get_4_isog_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/get_4_isog_FSM.v similarity index 100% rename from src/hardware/controller_get_4_isog/get_4_isog_FSM.v rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/get_4_isog_FSM.v diff --git a/src/hardware/controller_xADD/README b/SIKE_vOW_hw-sw/hardware/controller_xADD/README similarity index 100% rename from src/hardware/controller_xADD/README rename to SIKE_vOW_hw-sw/hardware/controller_xADD/README diff --git a/src/hardware/controller_xADD/controller.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller.v similarity index 100% rename from src/hardware/controller_xADD/controller.v rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller.v diff --git a/src/hardware/controller_xADD/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/Makefile similarity index 100% rename from src/hardware/controller_xADD/controller_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/Makefile diff --git a/src/hardware/controller_xADD/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/batch-sim.sh similarity index 100% rename from src/hardware/controller_xADD/controller_tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/batch-sim.sh diff --git a/src/hardware/controller_xADD/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/controller_tb.v similarity index 100% rename from src/hardware/controller_xADD/controller_tb/controller_tb.v rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/controller_tb.v diff --git a/src/hardware/controller_xADD/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/gen_test.sage similarity index 100% rename from src/hardware/controller_xADD/controller_tb/gen_test.sage rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/gen_test.sage diff --git a/src/hardware/controller_xADD/double_to_single_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/double_to_single_memory_wrapper.v similarity index 100% rename from src/hardware/controller_xADD/double_to_single_memory_wrapper.v rename to SIKE_vOW_hw-sw/hardware/controller_xADD/double_to_single_memory_wrapper.v diff --git a/src/hardware/controller_xADD/xADD_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/xADD_FSM.v similarity index 100% rename from src/hardware/controller_xADD/xADD_FSM.v rename to SIKE_vOW_hw-sw/hardware/controller_xADD/xADD_FSM.v diff --git a/src/hardware/controller_xDBL/README b/SIKE_vOW_hw-sw/hardware/controller_xDBL/README similarity index 100% rename from src/hardware/controller_xDBL/README rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/README diff --git a/src/hardware/controller_xDBL/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/.gitignore similarity index 100% rename from src/hardware/controller_xDBL/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/.gitignore diff --git a/src/hardware/controller_xDBL/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/Makefile similarity index 100% rename from src/hardware/controller_xDBL/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/Makefile diff --git a/src/hardware/controller_xDBL/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/batch-synth.sh similarity index 100% rename from src/hardware/controller_xDBL/Vivado/batch-synth.sh rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/batch-synth.sh diff --git a/src/hardware/controller_xDBL/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.tcl similarity index 100% rename from src/hardware/controller_xDBL/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.tcl diff --git a/src/hardware/controller_xDBL/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.xdc similarity index 100% rename from src/hardware/controller_xDBL/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.xdc diff --git a/src/hardware/controller_xDBL/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/params.mk similarity index 100% rename from src/hardware/controller_xDBL/Vivado/params.mk rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/params.mk diff --git a/src/hardware/controller_xDBL/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/program.tcl similarity index 100% rename from src/hardware/controller_xDBL/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/program.tcl diff --git a/src/hardware/controller_xDBL/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/proj.src similarity index 100% rename from src/hardware/controller_xDBL/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/proj.src diff --git a/src/hardware/controller_xDBL/controller.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller.v similarity index 100% rename from src/hardware/controller_xDBL/controller.v rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller.v diff --git a/src/hardware/controller_xDBL/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/.gitignore similarity index 100% rename from src/hardware/controller_xDBL/controller_tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/.gitignore diff --git a/src/hardware/controller_xDBL/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/Makefile similarity index 100% rename from src/hardware/controller_xDBL/controller_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/Makefile diff --git a/src/hardware/controller_xDBL/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/batch-sim.sh similarity index 100% rename from src/hardware/controller_xDBL/controller_tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/batch-sim.sh diff --git a/src/hardware/controller_xDBL/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/controller_tb.v similarity index 100% rename from src/hardware/controller_xDBL/controller_tb/controller_tb.v rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/controller_tb.v diff --git a/src/hardware/controller_xDBL/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/gen_test.sage similarity index 100% rename from src/hardware/controller_xDBL/controller_tb/gen_test.sage rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/gen_test.sage diff --git a/src/hardware/controller_xDBL/double_to_single_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/double_to_single_memory_wrapper.v similarity index 100% rename from src/hardware/controller_xDBL/double_to_single_memory_wrapper.v rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/double_to_single_memory_wrapper.v diff --git a/src/hardware/controller_xDBL/xDBL_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/xDBL_FSM.v similarity index 100% rename from src/hardware/controller_xDBL/xDBL_FSM.v rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/xDBL_FSM.v diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v similarity index 100% rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/README b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/README similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/README rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/README diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage similarity index 100% rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage diff --git a/src/hardware/fp2_sub_add_correction/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/.gitignore similarity index 100% rename from src/hardware/fp2_sub_add_correction/.gitignore rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/.gitignore diff --git a/src/hardware/fp2_sub_add_correction/README b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/README similarity index 100% rename from src/hardware/fp2_sub_add_correction/README rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/README diff --git a/src/hardware/fp2_sub_add_correction/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/.gitignore similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/.gitignore diff --git a/src/hardware/fp2_sub_add_correction/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/Makefile similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/Makefile diff --git a/src/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh diff --git a/src/hardware/fp2_sub_add_correction/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.tcl similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.tcl diff --git a/src/hardware/fp2_sub_add_correction/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.xdc similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.xdc diff --git a/src/hardware/fp2_sub_add_correction/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/params.mk similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/params.mk rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/params.mk diff --git a/src/hardware/fp2_sub_add_correction/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/program.tcl similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/program.tcl diff --git a/src/hardware/fp2_sub_add_correction/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/proj.src similarity index 100% rename from src/hardware/fp2_sub_add_correction/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/proj.src diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v similarity index 100% rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore similarity index 100% rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile similarity index 100% rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh similarity index 100% rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v similarity index 100% rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage similarity index 100% rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage diff --git a/src/hardware/fp2_sub_add_correction/gen.mk b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen.mk similarity index 100% rename from src/hardware/fp2_sub_add_correction/gen.mk rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen.mk diff --git a/src/hardware/fp2_sub_add_correction/gen_serial_comparator.py b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen_serial_comparator.py similarity index 100% rename from src/hardware/fp2_sub_add_correction/gen_serial_comparator.py rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen_serial_comparator.py diff --git a/src/hardware/fp_sub_and_add/README b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/README similarity index 100% rename from src/hardware/fp_sub_and_add/README rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/README diff --git a/src/hardware/fp_sub_and_add/fp_add_and_compare.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_add_and_compare.v similarity index 100% rename from src/hardware/fp_sub_and_add/fp_add_and_compare.v rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_add_and_compare.v diff --git a/src/hardware/fp_sub_and_add/fp_adder.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_adder.v similarity index 100% rename from src/hardware/fp_sub_and_add/fp_adder.v rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_adder.v diff --git a/src/hardware/fp_sub_and_add/fp_sub_and_compare.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_sub_and_compare.v similarity index 100% rename from src/hardware/fp_sub_and_add/fp_sub_and_compare.v rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_sub_and_compare.v diff --git a/src/hardware/fp_sub_and_add/gen.mk b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen.mk similarity index 100% rename from src/hardware/fp_sub_and_add/gen.mk rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen.mk diff --git a/src/hardware/fp_sub_and_add/gen_serial_comparator.py b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen_serial_comparator.py similarity index 100% rename from src/hardware/fp_sub_and_add/gen_serial_comparator.py rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen_serial_comparator.py diff --git a/src/hardware/fp_sub_and_add/unit_adder.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/unit_adder.v similarity index 100% rename from src/hardware/fp_sub_and_add/unit_adder.v rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/unit_adder.v diff --git a/src/hardware/top_controller/README b/SIKE_vOW_hw-sw/hardware/top_controller/README similarity index 100% rename from src/hardware/top_controller/README rename to SIKE_vOW_hw-sw/hardware/top_controller/README diff --git a/src/hardware/top_controller/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/.gitignore similarity index 100% rename from src/hardware/top_controller/Vivado/.gitignore rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/.gitignore diff --git a/src/hardware/top_controller/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/Makefile similarity index 100% rename from src/hardware/top_controller/Vivado/Makefile rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/Makefile diff --git a/src/hardware/top_controller/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/batch-synth.sh similarity index 100% rename from src/hardware/top_controller/Vivado/batch-synth.sh rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/batch-synth.sh diff --git a/src/hardware/top_controller/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.tcl similarity index 100% rename from src/hardware/top_controller/Vivado/board.tcl rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.tcl diff --git a/src/hardware/top_controller/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.xdc similarity index 100% rename from src/hardware/top_controller/Vivado/board.xdc rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.xdc diff --git a/src/hardware/top_controller/Vivado/gen.mk b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen.mk similarity index 100% rename from src/hardware/top_controller/Vivado/gen.mk rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen.mk diff --git a/src/hardware/top_controller/Vivado/gen_p_mem.sage b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen_p_mem.sage similarity index 100% rename from src/hardware/top_controller/Vivado/gen_p_mem.sage rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen_p_mem.sage diff --git a/src/hardware/top_controller/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/program.tcl similarity index 100% rename from src/hardware/top_controller/Vivado/program.tcl rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/program.tcl diff --git a/src/hardware/top_controller/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/proj.src similarity index 100% rename from src/hardware/top_controller/Vivado/proj.src rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/proj.src diff --git a/src/hardware/top_controller/gen_mem_wrapper.py b/SIKE_vOW_hw-sw/hardware/top_controller/gen_mem_wrapper.py similarity index 100% rename from src/hardware/top_controller/gen_mem_wrapper.py rename to SIKE_vOW_hw-sw/hardware/top_controller/gen_mem_wrapper.py diff --git a/src/hardware/top_controller/opt/top_controller.v b/SIKE_vOW_hw-sw/hardware/top_controller/opt/top_controller.v similarity index 100% rename from src/hardware/top_controller/opt/top_controller.v rename to SIKE_vOW_hw-sw/hardware/top_controller/opt/top_controller.v diff --git a/src/hardware/top_controller/tb/.gitignore b/SIKE_vOW_hw-sw/hardware/top_controller/tb/.gitignore similarity index 100% rename from src/hardware/top_controller/tb/.gitignore rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/.gitignore diff --git a/src/hardware/top_controller/tb/Makefile b/SIKE_vOW_hw-sw/hardware/top_controller/tb/Makefile similarity index 100% rename from src/hardware/top_controller/tb/Makefile rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/Makefile diff --git a/src/hardware/top_controller/tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/top_controller/tb/batch-sim.sh similarity index 100% rename from src/hardware/top_controller/tb/batch-sim.sh rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/batch-sim.sh diff --git a/src/hardware/top_controller/tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/top_controller/tb/gen_test.sage similarity index 100% rename from src/hardware/top_controller/tb/gen_test.sage rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/gen_test.sage diff --git a/src/hardware/top_controller/tb/top_tb.v b/SIKE_vOW_hw-sw/hardware/top_controller/tb/top_tb.v similarity index 100% rename from src/hardware/top_controller/tb/top_tb.v rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/top_tb.v diff --git a/src/hardware/util/clog2.v b/SIKE_vOW_hw-sw/hardware/util/clog2.v similarity index 100% rename from src/hardware/util/clog2.v rename to SIKE_vOW_hw-sw/hardware/util/clog2.v diff --git a/src/hardware/util/delay.v b/SIKE_vOW_hw-sw/hardware/util/delay.v similarity index 100% rename from src/hardware/util/delay.v rename to SIKE_vOW_hw-sw/hardware/util/delay.v diff --git a/src/hardware/util/single_port_mem.v b/SIKE_vOW_hw-sw/hardware/util/single_port_mem.v similarity index 100% rename from src/hardware/util/single_port_mem.v rename to SIKE_vOW_hw-sw/hardware/util/single_port_mem.v diff --git a/src/murax/README b/SIKE_vOW_hw-sw/murax/README similarity index 100% rename from src/murax/README rename to SIKE_vOW_hw-sw/murax/README diff --git a/src/murax/software/README.md b/SIKE_vOW_hw-sw/murax/software/README.md similarity index 100% rename from src/murax/software/README.md rename to SIKE_vOW_hw-sw/murax/software/README.md diff --git a/src/murax/software/VexRiscvSocSoftware/README.md b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/README.md similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/README.md rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/README.md diff --git a/src/murax/software/VexRiscvSocSoftware/libs/gpio.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/gpio.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/libs/gpio.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/gpio.h diff --git a/src/murax/software/VexRiscvSocSoftware/libs/hex.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/hex.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/libs/hex.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/hex.h diff --git a/src/murax/software/VexRiscvSocSoftware/libs/interrupt.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/interrupt.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/libs/interrupt.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/interrupt.h diff --git a/src/murax/software/VexRiscvSocSoftware/libs/prescaler.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/prescaler.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/libs/prescaler.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/prescaler.h diff --git a/src/murax/software/VexRiscvSocSoftware/libs/timer.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/timer.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/libs/timer.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/timer.h diff --git a/src/murax/software/VexRiscvSocSoftware/libs/uart.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/uart.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/libs/uart.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/uart.h diff --git a/src/murax/software/VexRiscvSocSoftware/libs/vga.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/vga.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/libs/vga.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/vga.h diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h similarity index 100% rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h