From 58841a0f02fee18ad989787c7b4a7d8f931e8486 Mon Sep 17 00:00:00 2001
From: Patrick Longa <plonga@microsoft.com>
Date: Tue, 25 May 2021 21:46:27 -0700
Subject: [PATCH] Add SIKE software

---
 Python_script/script_security45nm.py          | 1147 ++++++
 README.md                                     |   15 +-
 SIKE_sw/Makefile                              |  263 ++
 SIKE_sw/README.md                             |   57 +
 SIKE_sw/Visual Studio/SIDH/SIDH.sln           |  293 ++
 SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj   |  590 +++
 .../SIDH/SIDHp377.vcxproj.filters             |   81 +
 SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj   |  587 +++
 .../SIDH/SIDHp434.vcxproj.filters             |   81 +
 SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj   |  598 ++++
 .../SIDH/SIDHp503.vcxproj.filters             |   81 +
 SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj   |  584 +++
 .../SIDH/SIDHp546.vcxproj.filters             |   81 +
 SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj   |  577 +++
 .../SIDH/SIDHp610.vcxproj.filters             |   81 +
 SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj   |  572 +++
 .../SIDH/SIDHp697.vcxproj.filters             |   81 +
 SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj   |  598 ++++
 .../SIDH/SIDHp751.vcxproj.filters             |   81 +
 .../arith_tests/arith_tests-P377.vcxproj      |  432 +++
 .../arith_tests-P377.vcxproj.filters          |   30 +
 .../arith_tests/arith_tests-P434.vcxproj      |  432 +++
 .../arith_tests-P434.vcxproj.filters          |   30 +
 .../arith_tests/arith_tests-P503.vcxproj      |  432 +++
 .../arith_tests-P503.vcxproj.filters          |   30 +
 .../arith_tests/arith_tests-P546.vcxproj      |  432 +++
 .../arith_tests-P546.vcxproj.filters          |   30 +
 .../arith_tests/arith_tests-P610.vcxproj      |  432 +++
 .../arith_tests-P610.vcxproj.filters          |   30 +
 .../arith_tests/arith_tests-P697.vcxproj      |  432 +++
 .../arith_tests-P697.vcxproj.filters          |   30 +
 .../arith_tests/arith_tests-P751.vcxproj      |  432 +++
 .../arith_tests-P751.vcxproj.filters          |   30 +
 .../kem_tests/test-SIKEp377.vcxproj           |  487 +++
 .../kem_tests/test-SIKEp377.vcxproj.filters   |   33 +
 .../kem_tests/test-SIKEp434.vcxproj           |  483 +++
 .../kem_tests/test-SIKEp434.vcxproj.filters   |   33 +
 .../kem_tests/test-SIKEp503.vcxproj           |  483 +++
 .../kem_tests/test-SIKEp503.vcxproj.filters   |   33 +
 .../kem_tests/test-SIKEp546.vcxproj           |  483 +++
 .../kem_tests/test-SIKEp546.vcxproj.filters   |   33 +
 .../kem_tests/test-SIKEp610.vcxproj           |  487 +++
 .../kem_tests/test-SIKEp610.vcxproj.filters   |   33 +
 .../kem_tests/test-SIKEp697.vcxproj           |  487 +++
 .../kem_tests/test-SIKEp697.vcxproj.filters   |   33 +
 .../kem_tests/test-SIKEp751.vcxproj           |  487 +++
 .../kem_tests/test-SIKEp751.vcxproj.filters   |   33 +
 SIKE_sw/src/P377/AMD64/fp_x64.c               |  439 +++
 SIKE_sw/src/P377/AMD64/fp_x64_asm.S           |  747 ++++
 SIKE_sw/src/P377/P377.c                       |  114 +
 SIKE_sw/src/P377/P377_api.h                   |  112 +
 SIKE_sw/src/P377/P377_internal.h              |  165 +
 SIKE_sw/src/P377/generic/fp_generic.c         |  259 ++
 SIKE_sw/src/P434/AMD64/fp_x64.c               |  491 +++
 SIKE_sw/src/P434/AMD64/fp_x64_asm.S           | 1024 ++++++
 SIKE_sw/src/P434/P434.c                       |  133 +
 SIKE_sw/src/P434/P434_api.h                   |  112 +
 SIKE_sw/src/P434/P434_internal.h              |  175 +
 SIKE_sw/src/P434/generic/fp_generic.c         |  259 ++
 SIKE_sw/src/P503/AMD64/fp_x64.c               |  572 +++
 SIKE_sw/src/P503/AMD64/fp_x64_asm.S           | 1824 ++++++++++
 SIKE_sw/src/P503/P503.c                       |  138 +
 SIKE_sw/src/P503/P503_api.h                   |  112 +
 SIKE_sw/src/P503/P503_internal.h              |  175 +
 SIKE_sw/src/P503/generic/fp_generic.c         |  259 ++
 SIKE_sw/src/P546/AMD64/fp_x64.c               |  634 ++++
 SIKE_sw/src/P546/AMD64/fp_x64_asm.S           | 1353 +++++++
 SIKE_sw/src/P546/P546.c                       |  135 +
 SIKE_sw/src/P546/P546_api.h                   |  112 +
 SIKE_sw/src/P546/P546_internal.h              |  175 +
 SIKE_sw/src/P546/generic/fp_generic.c         |  259 ++
 SIKE_sw/src/P610/AMD64/fp_x64.c               |  729 ++++
 SIKE_sw/src/P610/AMD64/fp_x64_asm.S           | 1314 +++++++
 SIKE_sw/src/P610/P610.c                       |  140 +
 SIKE_sw/src/P610/P610_api.h                   |  112 +
 SIKE_sw/src/P610/P610_internal.h              |  174 +
 SIKE_sw/src/P610/generic/fp_generic.c         |  259 ++
 SIKE_sw/src/P697/AMD64/fp_x64.c               |  802 +++++
 SIKE_sw/src/P697/AMD64/fp_x64_asm.S           | 1681 +++++++++
 SIKE_sw/src/P697/P697.c                       |  139 +
 SIKE_sw/src/P697/P697_api.h                   |  112 +
 SIKE_sw/src/P697/P697_internal.h              |  175 +
 SIKE_sw/src/P697/generic/fp_generic.c         |  259 ++
 SIKE_sw/src/P751/AMD64/fp_x64.c               |  910 +++++
 SIKE_sw/src/P751/AMD64/fp_x64_asm.S           | 3147 +++++++++++++++++
 SIKE_sw/src/P751/P751.c                       |  142 +
 SIKE_sw/src/P751/P751_api.h                   |  112 +
 SIKE_sw/src/P751/P751_internal.h              |  175 +
 SIKE_sw/src/P751/generic/fp_generic.c         |  259 ++
 SIKE_sw/src/config.h                          |  271 ++
 SIKE_sw/src/ec_isogeny.c                      |  416 +++
 SIKE_sw/src/fpx.c                             | 1103 ++++++
 SIKE_sw/src/internal.h                        |  116 +
 SIKE_sw/src/random/random.c                   |   61 +
 SIKE_sw/src/random/random.h                   |    9 +
 SIKE_sw/src/sha3/fips202.c                    |  573 +++
 SIKE_sw/src/sha3/fips202.h                    |   27 +
 SIKE_sw/src/sidh.c                            |  263 ++
 SIKE_sw/src/sike.c                            |   98 +
 SIKE_sw/tests/arith_tests-p377.c              |  616 ++++
 SIKE_sw/tests/arith_tests-p434.c              |  616 ++++
 SIKE_sw/tests/arith_tests-p503.c              |  616 ++++
 SIKE_sw/tests/arith_tests-p546.c              |  616 ++++
 SIKE_sw/tests/arith_tests-p610.c              |  617 ++++
 SIKE_sw/tests/arith_tests-p697.c              |  616 ++++
 SIKE_sw/tests/arith_tests-p751.c              |  617 ++++
 SIKE_sw/tests/test_SIKEp377.c                 |   17 +
 SIKE_sw/tests/test_SIKEp434.c                 |   19 +
 SIKE_sw/tests/test_SIKEp503.c                 |   19 +
 SIKE_sw/tests/test_SIKEp546.c                 |   17 +
 SIKE_sw/tests/test_SIKEp610.c                 |   19 +
 SIKE_sw/tests/test_SIKEp697.c                 |   17 +
 SIKE_sw/tests/test_SIKEp751.c                 |   19 +
 SIKE_sw/tests/test_extras.c                   |  283 ++
 SIKE_sw/tests/test_extras.h                   |   76 +
 SIKE_sw/tests/test_sike.c                     |  132 +
 .../Montgomery_multiplier_add.v               |    0
 .../Montgomery_multiplier_sub.v               |    0
 .../Montgomery_multiplier_tb/.gitignore       |    0
 .../Montgomery_multiplier_tb/Makefile         |    0
 .../Montgomery_multiplier_tb.v                |    0
 .../gen_test_add.sage                         |    0
 .../gen_test_sub.sage                         |    0
 .../README                                    |    0
 .../Vivado/.gitignore                         |    0
 .../Vivado/Makefile                           |    0
 .../Vivado/board.tcl                          |    0
 .../Vivado/board.xdc                          |    0
 .../Vivado/program.tcl                        |    0
 .../Vivado/proj.src                           |    0
 .../multiplier.v                              |    0
 .../step_add.v                                |    0
 .../step_sub.v                                |    0
 {src => SIKE_vOW_hw-sw}/hardware/README       |    0
 .../hardware/controller_eval_4_isog/README    |    0
 .../controller_eval_4_isog/Vivado/.gitignore  |    0
 .../controller_eval_4_isog/Vivado/Makefile    |    0
 .../Vivado/batch-synth.sh                     |    0
 .../controller_eval_4_isog/Vivado/board.tcl   |    0
 .../controller_eval_4_isog/Vivado/board.xdc   |    0
 .../controller_eval_4_isog/Vivado/params.mk   |    0
 .../controller_eval_4_isog/Vivado/program.tcl |    0
 .../controller_eval_4_isog/Vivado/proj.src    |    0
 .../controller_eval_4_isog/controller.v       |    0
 .../controller_tb/.gitignore                  |    0
 .../controller_tb/Makefile                    |    0
 .../controller_tb/batch-sim.sh                |    0
 .../controller_tb/controller_tb.v             |    0
 .../controller_tb/gen_test.sage               |    0
 .../controller_eval_4_isog/eval_4_isog_FSM.v  |    0
 .../single_to_double_memory_wrapper.v         |    0
 .../hardware/controller_get_4_isog/README     |    0
 .../controller_get_4_isog/Vivado/.gitignore   |    0
 .../controller_get_4_isog/Vivado/Makefile     |    0
 .../Vivado/batch-synth.sh                     |    0
 .../controller_get_4_isog/Vivado/board.tcl    |    0
 .../controller_get_4_isog/Vivado/board.xdc    |    0
 .../controller_get_4_isog/Vivado/params.mk    |    0
 .../controller_get_4_isog/Vivado/program.tcl  |    0
 .../controller_get_4_isog/Vivado/proj.src     |    0
 .../controller_get_4_isog/controller.v        |    0
 .../controller_tb/.gitignore                  |    0
 .../controller_tb/Makefile                    |    0
 .../controller_tb/batch-sim.sh                |    0
 .../controller_tb/controller_tb.v             |    0
 .../controller_tb/gen_test.sage               |    0
 .../double_to_single_memory_wrapper.v         |    0
 .../controller_get_4_isog/get_4_isog_FSM.v    |    0
 .../hardware/controller_xADD/README           |    0
 .../hardware/controller_xADD/controller.v     |    0
 .../controller_xADD/controller_tb/Makefile    |    0
 .../controller_tb/batch-sim.sh                |    0
 .../controller_tb/controller_tb.v             |    0
 .../controller_tb/gen_test.sage               |    0
 .../double_to_single_memory_wrapper.v         |    0
 .../hardware/controller_xADD/xADD_FSM.v       |    0
 .../hardware/controller_xDBL/README           |    0
 .../controller_xDBL/Vivado/.gitignore         |    0
 .../hardware/controller_xDBL/Vivado/Makefile  |    0
 .../controller_xDBL/Vivado/batch-synth.sh     |    0
 .../hardware/controller_xDBL/Vivado/board.tcl |    0
 .../hardware/controller_xDBL/Vivado/board.xdc |    0
 .../hardware/controller_xDBL/Vivado/params.mk |    0
 .../controller_xDBL/Vivado/program.tcl        |    0
 .../hardware/controller_xDBL/Vivado/proj.src  |    0
 .../hardware/controller_xDBL/controller.v     |    0
 .../controller_xDBL/controller_tb/.gitignore  |    0
 .../controller_xDBL/controller_tb/Makefile    |    0
 .../controller_tb/batch-sim.sh                |    0
 .../controller_tb/controller_tb.v             |    0
 .../controller_tb/gen_test.sage               |    0
 .../double_to_single_memory_wrapper.v         |    0
 .../hardware/controller_xDBL/xDBL_FSM.v       |    0
 .../README                                    |    0
 .../Vivado/.gitignore                         |    0
 .../Vivado/Makefile                           |    0
 .../Vivado/batch-synth.sh                     |    0
 .../Vivado/board.tcl                          |    0
 .../Vivado/board.xdc                          |    0
 .../Vivado/program.tcl                        |    0
 .../Vivado/proj.src                           |    0
 .../controller.v                              |    0
 .../controller_tb/.gitignore                  |    0
 .../controller_tb/Makefile                    |    0
 .../controller_tb/batch-sim.sh                |    0
 .../controller_tb/controller_tb.v             |    0
 .../single_to_double_memory_wrapper.v         |    0
 .../fp2_mont_mul_one_cycle_pipeline/README    |    0
 .../Vivado/.gitignore                         |    0
 .../Vivado/Makefile                           |    0
 .../Vivado/batch-synth.sh                     |    0
 .../Vivado/board.tcl                          |    0
 .../Vivado/board.xdc                          |    0
 .../Vivado/params.mk                          |    0
 .../Vivado/program.tcl                        |    0
 .../Vivado/proj.src                           |    0
 .../fp2_mont_mul.v                            |    0
 .../fp2_mont_mul_tb/.gitignore                |    0
 .../fp2_mont_mul_tb/Makefile                  |    0
 .../fp2_mont_mul_tb/batch-sim.sh              |    0
 .../fp2_mont_mul_tb/fp2_mont_mul_tb.v         |    0
 .../fp2_mont_mul_tb/gen_input.sage            |    0
 .../fp2_sub_add_correction/.gitignore         |    0
 .../hardware/fp2_sub_add_correction/README    |    0
 .../fp2_sub_add_correction/Vivado/.gitignore  |    0
 .../fp2_sub_add_correction/Vivado/Makefile    |    0
 .../Vivado/batch-synth.sh                     |    0
 .../fp2_sub_add_correction/Vivado/board.tcl   |    0
 .../fp2_sub_add_correction/Vivado/board.xdc   |    0
 .../fp2_sub_add_correction/Vivado/params.mk   |    0
 .../fp2_sub_add_correction/Vivado/program.tcl |    0
 .../fp2_sub_add_correction/Vivado/proj.src    |    0
 .../fp2_sub_add_correction.v                  |    0
 .../fp2_sub_add_correction_tb/.gitignore      |    0
 .../fp2_sub_add_correction_tb/Makefile        |    0
 .../fp2_sub_add_correction_tb/batch-sim.sh    |    0
 .../fp2_sub_add_correction_tb.v               |    0
 .../fp2_sub_add_correction_tb/gen_test.sage   |    0
 .../hardware/fp2_sub_add_correction/gen.mk    |    0
 .../gen_serial_comparator.py                  |    0
 .../hardware/fp_sub_and_add/README            |    0
 .../fp_sub_and_add/fp_add_and_compare.v       |    0
 .../hardware/fp_sub_and_add/fp_adder.v        |    0
 .../fp_sub_and_add/fp_sub_and_compare.v       |    0
 .../hardware/fp_sub_and_add/gen.mk            |    0
 .../fp_sub_and_add/gen_serial_comparator.py   |    0
 .../hardware/fp_sub_and_add/unit_adder.v      |    0
 .../hardware/top_controller/README            |    0
 .../hardware/top_controller/Vivado/.gitignore |    0
 .../hardware/top_controller/Vivado/Makefile   |    0
 .../top_controller/Vivado/batch-synth.sh      |    0
 .../hardware/top_controller/Vivado/board.tcl  |    0
 .../hardware/top_controller/Vivado/board.xdc  |    0
 .../hardware/top_controller/Vivado/gen.mk     |    0
 .../top_controller/Vivado/gen_p_mem.sage      |    0
 .../top_controller/Vivado/program.tcl         |    0
 .../hardware/top_controller/Vivado/proj.src   |    0
 .../top_controller/gen_mem_wrapper.py         |    0
 .../top_controller/opt/top_controller.v       |    0
 .../hardware/top_controller/tb/.gitignore     |    0
 .../hardware/top_controller/tb/Makefile       |    0
 .../hardware/top_controller/tb/batch-sim.sh   |    0
 .../hardware/top_controller/tb/gen_test.sage  |    0
 .../hardware/top_controller/tb/top_tb.v       |    0
 {src => SIKE_vOW_hw-sw}/hardware/util/clog2.v |    0
 {src => SIKE_vOW_hw-sw}/hardware/util/delay.v |    0
 .../hardware/util/single_port_mem.v           |    0
 {src => SIKE_vOW_hw-sw}/murax/README          |    0
 .../murax/software/README.md                  |    0
 .../software/VexRiscvSocSoftware/README.md    |    0
 .../software/VexRiscvSocSoftware/libs/gpio.h  |    0
 .../software/VexRiscvSocSoftware/libs/hex.h   |    0
 .../VexRiscvSocSoftware/libs/interrupt.h      |    0
 .../VexRiscvSocSoftware/libs/prescaler.h      |    0
 .../software/VexRiscvSocSoftware/libs/timer.h |    0
 .../software/VexRiscvSocSoftware/libs/uart.h  |    0
 .../software/VexRiscvSocSoftware/libs/vga.h   |    0
 .../projects/murax/hex/cmd.gbd                |    0
 .../projects/murax/hex/makefile               |    0
 .../projects/murax/hex/src/crt.S              |    0
 .../projects/murax/hex/src/main.c             |    0
 .../projects/murax/libs/linker.ld             |    0
 .../projects/murax/libs/makefile              |    0
 .../projects/murax/libs/murax.h               |    0
 .../projects/murax/libs/murax_hex.h           |    0
 285 files changed, 41593 insertions(+), 5 deletions(-)
 create mode 100644 Python_script/script_security45nm.py
 create mode 100644 SIKE_sw/Makefile
 create mode 100644 SIKE_sw/README.md
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDH.sln
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj
 create mode 100644 SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters
 create mode 100644 SIKE_sw/src/P377/AMD64/fp_x64.c
 create mode 100644 SIKE_sw/src/P377/AMD64/fp_x64_asm.S
 create mode 100644 SIKE_sw/src/P377/P377.c
 create mode 100644 SIKE_sw/src/P377/P377_api.h
 create mode 100644 SIKE_sw/src/P377/P377_internal.h
 create mode 100644 SIKE_sw/src/P377/generic/fp_generic.c
 create mode 100644 SIKE_sw/src/P434/AMD64/fp_x64.c
 create mode 100644 SIKE_sw/src/P434/AMD64/fp_x64_asm.S
 create mode 100644 SIKE_sw/src/P434/P434.c
 create mode 100644 SIKE_sw/src/P434/P434_api.h
 create mode 100644 SIKE_sw/src/P434/P434_internal.h
 create mode 100644 SIKE_sw/src/P434/generic/fp_generic.c
 create mode 100644 SIKE_sw/src/P503/AMD64/fp_x64.c
 create mode 100644 SIKE_sw/src/P503/AMD64/fp_x64_asm.S
 create mode 100644 SIKE_sw/src/P503/P503.c
 create mode 100644 SIKE_sw/src/P503/P503_api.h
 create mode 100644 SIKE_sw/src/P503/P503_internal.h
 create mode 100644 SIKE_sw/src/P503/generic/fp_generic.c
 create mode 100644 SIKE_sw/src/P546/AMD64/fp_x64.c
 create mode 100644 SIKE_sw/src/P546/AMD64/fp_x64_asm.S
 create mode 100644 SIKE_sw/src/P546/P546.c
 create mode 100644 SIKE_sw/src/P546/P546_api.h
 create mode 100644 SIKE_sw/src/P546/P546_internal.h
 create mode 100644 SIKE_sw/src/P546/generic/fp_generic.c
 create mode 100644 SIKE_sw/src/P610/AMD64/fp_x64.c
 create mode 100644 SIKE_sw/src/P610/AMD64/fp_x64_asm.S
 create mode 100644 SIKE_sw/src/P610/P610.c
 create mode 100644 SIKE_sw/src/P610/P610_api.h
 create mode 100644 SIKE_sw/src/P610/P610_internal.h
 create mode 100644 SIKE_sw/src/P610/generic/fp_generic.c
 create mode 100644 SIKE_sw/src/P697/AMD64/fp_x64.c
 create mode 100644 SIKE_sw/src/P697/AMD64/fp_x64_asm.S
 create mode 100644 SIKE_sw/src/P697/P697.c
 create mode 100644 SIKE_sw/src/P697/P697_api.h
 create mode 100644 SIKE_sw/src/P697/P697_internal.h
 create mode 100644 SIKE_sw/src/P697/generic/fp_generic.c
 create mode 100644 SIKE_sw/src/P751/AMD64/fp_x64.c
 create mode 100644 SIKE_sw/src/P751/AMD64/fp_x64_asm.S
 create mode 100644 SIKE_sw/src/P751/P751.c
 create mode 100644 SIKE_sw/src/P751/P751_api.h
 create mode 100644 SIKE_sw/src/P751/P751_internal.h
 create mode 100644 SIKE_sw/src/P751/generic/fp_generic.c
 create mode 100644 SIKE_sw/src/config.h
 create mode 100644 SIKE_sw/src/ec_isogeny.c
 create mode 100644 SIKE_sw/src/fpx.c
 create mode 100644 SIKE_sw/src/internal.h
 create mode 100644 SIKE_sw/src/random/random.c
 create mode 100644 SIKE_sw/src/random/random.h
 create mode 100644 SIKE_sw/src/sha3/fips202.c
 create mode 100644 SIKE_sw/src/sha3/fips202.h
 create mode 100644 SIKE_sw/src/sidh.c
 create mode 100644 SIKE_sw/src/sike.c
 create mode 100644 SIKE_sw/tests/arith_tests-p377.c
 create mode 100644 SIKE_sw/tests/arith_tests-p434.c
 create mode 100644 SIKE_sw/tests/arith_tests-p503.c
 create mode 100644 SIKE_sw/tests/arith_tests-p546.c
 create mode 100644 SIKE_sw/tests/arith_tests-p610.c
 create mode 100644 SIKE_sw/tests/arith_tests-p697.c
 create mode 100644 SIKE_sw/tests/arith_tests-p751.c
 create mode 100644 SIKE_sw/tests/test_SIKEp377.c
 create mode 100644 SIKE_sw/tests/test_SIKEp434.c
 create mode 100644 SIKE_sw/tests/test_SIKEp503.c
 create mode 100644 SIKE_sw/tests/test_SIKEp546.c
 create mode 100644 SIKE_sw/tests/test_SIKEp610.c
 create mode 100644 SIKE_sw/tests/test_SIKEp697.c
 create mode 100644 SIKE_sw/tests/test_SIKEp751.c
 create mode 100644 SIKE_sw/tests/test_extras.c
 create mode 100644 SIKE_sw/tests/test_extras.h
 create mode 100644 SIKE_sw/tests/test_sike.c
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/batch-synth.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/params.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/controller_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/controller_tb/gen_test.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/eval_4_isog_FSM.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/batch-synth.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/params.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/controller_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/controller_tb/gen_test.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_get_4_isog/get_4_isog_FSM.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/controller_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/controller_tb/gen_test.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/double_to_single_memory_wrapper.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xADD/xADD_FSM.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/batch-synth.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/params.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/controller_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/controller_tb/gen_test.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/double_to_single_memory_wrapper.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL/xDBL_FSM.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/params.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/gen.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp2_sub_add_correction/gen_serial_comparator.py (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/fp_add_and_compare.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/fp_adder.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/fp_sub_and_compare.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/gen.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/gen_serial_comparator.py (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/fp_sub_and_add/unit_adder.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/README (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/batch-synth.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/board.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/board.xdc (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/gen.mk (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/gen_p_mem.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/program.tcl (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/Vivado/proj.src (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/gen_mem_wrapper.py (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/opt/top_controller.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/.gitignore (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/Makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/batch-sim.sh (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/gen_test.sage (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/top_controller/tb/top_tb.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/util/clog2.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/util/delay.v (100%)
 rename {src => SIKE_vOW_hw-sw}/hardware/util/single_port_mem.v (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/README (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/README.md (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/README.md (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/gpio.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/hex.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/interrupt.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/prescaler.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/timer.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/uart.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/libs/vga.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h (100%)
 rename {src => SIKE_vOW_hw-sw}/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h (100%)

diff --git a/Python_script/script_security45nm.py b/Python_script/script_security45nm.py
new file mode 100644
index 0000000..75dd20a
--- /dev/null
+++ b/Python_script/script_security45nm.py
@@ -0,0 +1,1147 @@
+#####################################################################################################################
+# Python3 script to calculate security estimates using a budget-based cost model on ASICs 
+# Targeted primitives: SIKE, AES and SHA-3
+# Technology used by the hardware implementations used in the model: NanGate 45nm open-cell library
+#
+# The script produces all the figures and security estimates included in the paper:
+#      "The Cost to Break SIKE: A Comparative Hardware-Based Analysis with AES and SHA-3",
+#      Patrick Longa, Wen Wang, Jakub Szefer. CRYPTO 2021
+#      https://eprint.iacr.org/2020/1457
+#####################################################################################################################
+
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MultipleLocator
+
+# Assumptions and constants
+NumberTransPerGate = 4           # Number of transistors per 2-NAND CMOS Gate Equivalent (GE)
+SecondsPerYear = 3600*24*365     # Seconds in a year
+MoneyOptions = [1e6, 10e6, 100e6, 1000e6, 10000e6, 100000e6, 1000000e6]  # One million, ten million, hundred million, one billion, ten billion, hundred billion, one trillion (in US$)
+titlefigure = "on"
+dividepricebyfactor = "on"       # Reduction factor applied to the transistor and memory release prices. 
+reductionpricefactor = 7.40      # This factor is obtained using the estimated transistor cost at production for year 2020 (reference: Khan and Mann (2020)) 
+                                 # In contrast to release prices, the adjusted prices are expected to match more closely production costs in bulk.
+
+############################################################################################
+#### Historical prices of memory and transistors/gates (see paper for references)
+
+# Hard drive disk (HDD) cost US$, years 2000-2020
+CostHDD = [125.00, 259.00, 146.00, 89.99, 97.50, 130.00, 69.99, 99.99, 99.99, 69.99, 89.99, 54.99, 54.99, 54.99, 104.99, 84.99, 221.63, 99.99, 93.49, 149.99, 129.99]
+# Hard drive disk (HDD) bytes, years 2000-2020
+BytesHDD = [3.07e10, 1e11, 1.2e11, 1.2e11, 1.6e11, 3.2e11, 3.2e11, 5.0e11, 1.0e12, 1.0e12, 2.0e12, 1.5e12, 1.5e12, 1.5e12, 3.0e12, 3.0e12, 8.0e12, 4.0e12, 4.0e12, 8.0e12, 8.0e12]
+
+# Dynamic random-access memory (RAM) cost US$, years 2000-2020
+CostDRAM = [89.00, 18.89, 34.19, 39.00, 39.00, 39.00, 148.99, 49.95, 39.99, 39.99, 39.99, 41.99, 29.99, 29.99, 29.99, 29.99, 44.99, 44.99, 44.99, 44.99, 44.99]
+# Dynamic random-access memory (RAM) bytes, years 2000-2020
+BytesDRAM = [1.31e8, 1.31e8, 2.62e8, 5.24e8, 5.24e8, 5.24e8, 20.97e8, 20.97e8, 41.94e8, 41.94e8, 41.94e8, 83.89e8, 83.89e8, 83.89e8, 83.89e8, 83.89e8, 167.77e8, 167.77e8, 167.77e8, 167.77e8, 167.77e8]
+
+# Solid state drive (SSD) cost US$, years 2000-2020
+CostSSD = [None, None, None, None, None, None, None, None, None, None, None, None, None, 159.99, 179.99, 59.99, 194.99, 194.99, 49.99, 75.99, 75.99]
+# Solid state drive (SSD) bytes, years 2000-2020
+BytesSSD = [None, None, None, None, None, None, None, None, None, None, None, None, None, 2.56e11, 4.80e11, 2.40e11, 9.60e11, 9.60e11, 4.80e11, 9.60e11, 9.60e11]
+
+# MPU cost US$ (Intel), years 2000-2020
+CostMPU_Intel = [112.0, 64.0, 33.0, 33.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 70.0, 42.0, 117.0, 122.0, 42.0, 42.0, None, None, None, None, None ]
+# MPU cost US$ (AMD), years 2000-2020
+CostMPU_AMD = [None, None, None, None, None, None, None, None, None, None, None, 79.0, 71.0, 71.0, 101.0, 79.0, 58.0, 51.0, 51.0, 51.0, 60.0]
+# Intel and AMD MPU costs US$, years 2000-2020 (corresponding to the lowest cost per transistor per year)
+CostMPU = [112.0, 64.0, 33.0, 33.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 70.0, 79.0, 71.0, 71.0, 42.0, 42.0, 58.0, 51.0, 51.0, 51.0, 60.0]
+
+# MPU transistors (Intel), years 2000-2020
+TransMPU_Intel = [28.1e6, 28.1e6, 55e6, 55e6, 125e6, 125e6, 125e6, 125e6, 125e6, 125e6, 382e6, 624e6, 1400e6, 1400e6, 1400e6, 1400e6, None, None, None, None, None]
+# MPU transistors (AMD), years 2000-2020
+TransMPU_AMD = [None, None, None, None, None, None, None, None, None, None, None, 1178e6, 1303e6, 1303e6, 2410e6, 2410e6, 3100e6, 3100e6, 3100e6, 3100e6, 4940e6]
+# Intel and AMD MPU transistors, years 2000-2020 (corresponding to lowest cost per transistor per year) 
+TransMPU = [28.1e6, 28.1e6, 55e6, 55e6, 125e6, 125e6, 125e6, 125e6, 125e6, 125e6, 382e6, 1178e6, 1303e6, 1303e6, 1400e6, 1400e6, 3100e6, 3100e6, 3100e6, 3100e6, 4940e6]
+
+DollarsPerByte_HDD = []
+BytesPerDollar_HDD = []
+DollarsPerByte_DRAM = []
+BytesPerDollar_DRAM = []
+DollarsPerByte_SSD = []
+BytesPerDollar_SSD = []
+DollarsPerTrans_MPU = []
+TransPerDollar_MPU = []
+DollarsPerGate_MPU = []
+GatesPerDollar_MPU = []
+BytesPerGate = []
+for i in range(0,21):
+    DollarsPerByte_HDD.append(CostHDD[i]/BytesHDD[i])
+    BytesPerDollar_HDD.append(1/DollarsPerByte_HDD[i])
+    DollarsPerByte_DRAM.append(CostDRAM[i]/BytesDRAM[i])
+    BytesPerDollar_DRAM.append(1/DollarsPerByte_DRAM[i])
+    if CostSSD[i] == None:
+        DollarsPerByte_SSD.append(None)
+        BytesPerDollar_SSD.append(None)
+    else:
+        DollarsPerByte_SSD.append(CostSSD[i]/BytesSSD[i])
+        BytesPerDollar_SSD.append(1/DollarsPerByte_SSD[i])
+    DollarsPerTrans_MPU.append(CostMPU[i]/TransMPU[i])
+    TransPerDollar_MPU.append(TransMPU[i]/CostMPU[i])
+    DollarsPerGate_MPU.append((CostMPU[i]*NumberTransPerGate)/TransMPU[i])
+    GatesPerDollar_MPU.append(1/DollarsPerGate_MPU[i])
+    if dividepricebyfactor == 'on': 
+        BytesPerDollar_HDD[i] *= reductionpricefactor
+        GatesPerDollar_MPU[i] *= reductionpricefactor
+        BytesPerDollar_DRAM[i] *= reductionpricefactor
+        if BytesPerDollar_SSD[i] != None: BytesPerDollar_SSD[i] *= reductionpricefactor
+    BytesPerGate.append(BytesPerDollar_HDD[i]/GatesPerDollar_MPU[i])
+        
+# Linley Group report (ITRS 2014) with costs of transistors, years 2002-2014 (every two years)
+GatesPerDollar_Linley = [None, None, 2.6e6/NumberTransPerGate, None, 4.4e6/NumberTransPerGate, None, 7.3e6/NumberTransPerGate, None, 11.2e6/NumberTransPerGate, 
+                         None, 16.0e6/NumberTransPerGate, None, 20.0e6/NumberTransPerGate, None, 20.0e6/NumberTransPerGate, 19.0e6/NumberTransPerGate, None, None, None, None, None]
+        
+# ITRS 2007 forecast for costs of transistors, years 2002-2014 (every two years)
+GatesPerDollar_ITRS = [None, 1/(9.7e-7*NumberTransPerGate), 1/(6.9e-7*NumberTransPerGate), 1/(4.9e-7*NumberTransPerGate), 1/(3.4e-7*NumberTransPerGate), 
+                         1/(2.44e-7*NumberTransPerGate), 1/(1.72e-7*NumberTransPerGate), 1/(1.22e-7*NumberTransPerGate), 1/(8.6e-8*NumberTransPerGate), 
+                         1/(6.1e-8*NumberTransPerGate), 1/(4.3e-8*NumberTransPerGate), 1/(3.0e-8*NumberTransPerGate), 1/(2.2e-8*NumberTransPerGate), 
+                         1/(1.5e-8*NumberTransPerGate), 1/(1.1e-8*NumberTransPerGate), 1/(7.6e-9*NumberTransPerGate), 1/(5.4e-9*NumberTransPerGate), 
+                         1/(3.8e-9*NumberTransPerGate), 1/(2.7e-9*NumberTransPerGate), 1/(1.9e-9*NumberTransPerGate), 1/(1.3e-9*NumberTransPerGate)]
+    
+print (BytesPerDollar_HDD)
+print (BytesPerDollar_DRAM)
+print (BytesPerDollar_SSD)
+print (TransPerDollar_MPU)
+print (GatesPerDollar_MPU)
+print (BytesPerGate)
+
+#########################################################################################################
+#### "Optimistic" projections for prices of memory and transistors/gates, years 2025-2040, every 5 years.
+#### Based on a constant rate in cost reduction derived from data between years 2015 and 2020 
+#### For memory (HDD): reduction factor = BytesPerDollar_SSD[20] / BytesPerDollar_SSD[15]  
+#### For gates  (MPU): reduction factor = GatesPerDollar_MPU[20] / GatesPerDollar_MPU[15]
+
+memrate = BytesPerDollar_SSD[20] / BytesPerDollar_SSD[15]
+transrate = GatesPerDollar_MPU[20] / GatesPerDollar_MPU[15]
+
+ProjBytesPerDollar_HDD = [BytesPerDollar_HDD[0], BytesPerDollar_HDD[5], BytesPerDollar_HDD[10], BytesPerDollar_HDD[15], BytesPerDollar_HDD[20],
+                          BytesPerDollar_HDD[20]*memrate, BytesPerDollar_HDD[20]*memrate**2, BytesPerDollar_HDD[20]*memrate**3, BytesPerDollar_HDD[20]*memrate**4]
+
+ProjGatesPerDollar_MPU = [GatesPerDollar_MPU[0], GatesPerDollar_MPU[5], GatesPerDollar_MPU[10], GatesPerDollar_MPU[15], GatesPerDollar_MPU[20],
+                          GatesPerDollar_MPU[20]*transrate, GatesPerDollar_MPU[20]*transrate**2, GatesPerDollar_MPU[20]*transrate**3, GatesPerDollar_MPU[20]*transrate**4]
+                  
+############################################################################################
+#### AES security estimator
+
+def AES_estimator(version, AESgates, AEStime, YearIndex, Money, BytesPerDollar_HDD, GatesPerDollar_MPU):
+    N=2**version                                # Number of AES operations (search space)
+    AESperYear=SecondsPerYear/AEStime           # Number of AES operations per year per key-search engine
+    bytesIO=version/8                           # Number of bytes to represent input and outputs
+
+    p=Money*GatesPerDollar_MPU[YearIndex]/AESgates   # Number of key-search engines I can buy
+    w=p*(2*bytesIO + bytesIO)                        # Required storage: two input buffers and one output buffer per engine
+    
+    if w*GatesPerDollar_MPU[YearIndex] > p*BytesPerDollar_HDD[YearIndex]*AESgates/8:  # Check that cost of memory is relatively small 
+        return 'failed', 0, 0, 0
+    LogMemBytes = math.log2(w)
+    LogEngUnits = math.log2(p)
+    LogYears = math.log2(N/(p * AESperYear))
+
+    return 'passed', LogYears, LogMemBytes, LogEngUnits
+
+############################################################################################
+#### AES128
+
+version = 128               # AES128  
+AESgates = 11587            # Number of GEs occupied by Ueno et al.'s AES128 implementation
+node = 45                   # 45nm
+NISTgates=2**15             # AES gate complexity according to NIST
+
+if version == 128:
+    AEStime = (13.97e-9 * 10/11)     # InvThroughput of AES encryption implementation by Ueno et al. on 45nm
+elif version == 192:
+    AEStime = (17.16e-9 * 12/13)  
+elif version == 256:
+    AEStime = (19.35e-9 * 14/15)  
+
+print ("\nAES" +repr(version))
+print ("-------------------")
+print ("\nSerial key-search (NIST):")
+N=2**version
+p=1              # Processor use
+print ("N * AES in gates: 2 ^", math.log2((N * NISTgates) * p))
+
+print ("\nParallel key-search, 45nm, based on Ueno et al.'s implementation:")
+print ("Ueno et al. 2020 using 45nm: throughput of 13.97 * 10/11 = 12.7nsec/AES128 encryption, area of 11,587 GE")
+print ("N * AES in seconds: 2 ^", math.log2(N * AEStime), "\n")
+
+YearsAES128 = [[None for i in range(21)] for j in range(7)]
+MemBytesAES128 = [[None for i in range(21)] for j in range(7)]
+EngUnitsAES128 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("AES128: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        if CostMPU[YearIndex] != None:
+            test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], BytesPerDollar_HDD, GatesPerDollar_MPU)
+            if test == 'passed':
+                YearsAES128[k][YearIndex] = LogYears
+                MemBytesAES128[k][YearIndex] = LogMemBytes
+                EngUnitsAES128[k][YearIndex] = LogEngUnits
+            else:
+                print ("ERROR: memory is not negligible")
+    print ("Log(years):", YearsAES128[k]); print ("Log(memory bytes):", MemBytesAES128[k]); print ("Log(engine units):", EngUnitsAES128[k], "\n")
+
+ProjYearsAES128 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesAES128 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsAES128 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("AES128 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        if CostMPU[YearIndex] != None:
+            test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+            if test == 'passed':
+                ProjYearsAES128[k][YearIndex] = LogYears
+                ProjMemBytesAES128[k][YearIndex] = LogMemBytes
+                ProjEngUnitsAES128[k][YearIndex] = LogEngUnits
+            else:
+                print ("ERROR: memory is not negligible")
+    print ("Log(years):", ProjYearsAES128[k]); print ("Log(memory bytes):", ProjMemBytesAES128[k]); print ("Log(engine units):", ProjEngUnitsAES128[k], "\n")
+
+############################################################################################
+#### AES192
+
+version = 192               # AES192 
+AESgates = 13319            # Number of GEs occupied by Ueno et al.'s AES192 implementation
+node = 45                   # 45nm
+NISTgates=2**15             # AES gate complexity according to NIST
+
+if version == 128:
+    AEStime = (13.97e-9 * 10/11)     
+elif version == 192:
+    AEStime = (17.16e-9 * 12/13)     # InvThroughput of AES encryption implementation by Ueno et al. on 45nm
+elif version == 256:
+    AEStime = (19.35e-9 * 14/15)
+
+print ("\nAES" +repr(version))
+print ("-------------------")
+print ("\nSerial key-search (NIST):")
+N=2**version
+p=1              # Processor use
+print ("N * AES in gates: 2 ^", math.log2((N * NISTgates) * p))
+
+print ("\nParallel key-search, 45nm, based on Ueno et al.'s implementation:")
+print ("Ueno et al. 2020 using 45nm: throughput of 17.16 * 12/13 = 15.84nsec/AES192 encryption, area of 13,319 GE")
+print ("N * AES in seconds: 2 ^", math.log2(N * AEStime), "\n")
+
+YearsAES192 = [[None for i in range(21)] for j in range(7)]
+MemBytesAES192 = [[None for i in range(21)] for j in range(7)]
+EngUnitsAES192 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("AES192: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        if CostMPU[YearIndex] != None:
+            test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], BytesPerDollar_HDD, GatesPerDollar_MPU)
+            if test == 'passed':
+                YearsAES192[k][YearIndex] = LogYears
+                MemBytesAES192[k][YearIndex] = LogMemBytes
+                EngUnitsAES192[k][YearIndex] = LogEngUnits
+            else:
+                print ("ERROR: memory is not negligible")
+    print ("Log(years):", YearsAES192[k]); print ("Log(memory bytes):", MemBytesAES192[k]); print ("Log(engine units):", EngUnitsAES192[k], "\n")
+
+ProjYearsAES192 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesAES192 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsAES192 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("AES192 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        if CostMPU[YearIndex] != None:
+            test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+            if test == 'passed':
+                ProjYearsAES192[k][YearIndex] = LogYears
+                ProjMemBytesAES192[k][YearIndex] = LogMemBytes
+                ProjEngUnitsAES192[k][YearIndex] = LogEngUnits
+            else:
+                print ("ERROR: memory is not negligible")
+    print ("Log(years):", ProjYearsAES192[k]); print ("Log(memory bytes):", ProjMemBytesAES192[k]); print ("Log(engine units):", ProjEngUnitsAES192[k], "\n")
+
+############################################################################################
+#### AES256
+
+version = 256               # AES256 
+AESgates = 13974            # Number of GEs occupied by Ueno et al.'s AES256 implementation
+node = 45                   # 45nm
+NISTgates=2**16             # AES gate complexity according to NIST
+
+if version == 128:
+    AEStime = (13.97e-9 * 10/11)     
+elif version == 192:
+    AEStime = (17.16e-9 * 12/13)  
+elif version == 256:
+    AEStime = (19.35e-9 * 14/15)     # InvThroughput of AES encryption implementation by Ueno et al. on 45nm
+
+print ("\nAES" +repr(version))
+print ("-------------------")
+print ("\nSerial key-search (NIST):")
+N=2**version
+p=1              # Processor use
+print ("N * AES in gates: 2 ^", math.log2((N * NISTgates) * p))
+
+print ("\nParallel key-search, 45nm, based on Ueno et al.'s implementation:")
+print ("Ueno et al. 2020 using 45nm: throughput of 19.35 * 14/15 = 18.06nsec/AES256 encryption, area of 13,974 GE")
+print ("N * AES in seconds: 2 ^", math.log2(N * AEStime), "\n")
+
+YearsAES256 = [[None for i in range(21)] for j in range(7)]
+MemBytesAES256 = [[None for i in range(21)] for j in range(7)]
+EngUnitsAES256 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("AES256: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        if CostMPU[YearIndex] != None:
+            test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], BytesPerDollar_HDD, GatesPerDollar_MPU)
+            if test == 'passed':
+                YearsAES256[k][YearIndex] = LogYears
+                MemBytesAES256[k][YearIndex] = LogMemBytes
+                EngUnitsAES256[k][YearIndex] = LogEngUnits
+            else:
+                print ("ERROR: memory is not negligible")
+    print ("Log(years):", YearsAES256[k]); print ("Log(memory bytes):", MemBytesAES256[k]); print ("Log(engine units):", EngUnitsAES256[k], "\n")
+
+ProjYearsAES256 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesAES256 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsAES256 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("AES256 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        if CostMPU[YearIndex] != None:
+            test, LogYears, LogMemBytes, LogEngUnits = AES_estimator(version, AESgates, AEStime, YearIndex, MoneyOptions[k], ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+            if test == 'passed':
+                ProjYearsAES256[k][YearIndex] = LogYears
+                ProjMemBytesAES256[k][YearIndex] = LogMemBytes
+                ProjEngUnitsAES256[k][YearIndex] = LogEngUnits
+            else:
+                print ("ERROR: memory is not negligible")
+    print ("Log(years):", ProjYearsAES256[k]); print ("Log(memory bytes):", ProjMemBytesAES256[k]); print ("Log(engine units):", ProjEngUnitsAES256[k], "\n")
+
+############################################################################################
+#### SHA-3 security estimator
+
+def SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, Money, p, top_zero_bits, BytesPerDollar_HDD, GatesPerDollar_MPU):
+    N=2**version                                # Number of SHA-3 operations (search space)
+    SHA3perYear=SecondsPerYear/SHA3time         # Number of SHA-3 operations per year per collision-search engine
+
+    theta = 2**-top_zero_bits 
+    mem_unit=version/8 + (version/8 - math.floor(top_zero_bits/8)) + 6        # Bytes per memory unit
+    w=(Money - p * SHA3gates / GatesPerDollar_MPU[YearIndex]) * BytesPerDollar_HDD[YearIndex] / mem_unit  # Number of memory units I can buy
+    LogYears = 0; LogMemUnits = 0; LogEngUnits = 0; SHA3inSeconds = 0
+    if w > 0:
+        LogMemUnits = math.log2(w)
+        LogEngUnits = math.log2(p)
+        LogYears = math.log2((math.sqrt(math.pi*N/2)/p + 2.5/theta) * SHA3time/SecondsPerYear)
+        SHA3inSeconds = math.log2((math.sqrt(math.pi*N/2)/p + 2.5/theta) * SHA3time)
+
+    return LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds
+
+############################################################################################
+#### SHA3-256
+
+version = 256                           # SHA3-256 
+SHA3gates = 10500 * 1.2                 # Number of GEs occupied by Akin et al.'s implementation (SMH option), scaled to include initialization and absorb stages
+SHA3time = (54.95e-9 * (45/90)**2)*1.5  # Latency of implementation (scaled to 45nm from 90nm, scaled to include initialization and absorb stages )
+node = 45                               # 45nm
+
+print ("\nSHA3-" +repr(version)+ " on " +repr(node)+ "nm node")
+print ("-------------------")
+print ("Akin et al. implementation using 90nm: 54.95nsec/Keccak computation, 10.5KGE. Area and timing results are scaled to 45nm and SHA-3")
+
+MinYearsSHA3 = [[None for i in range(21)] for j in range(7)]
+MemBytesSHA3 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSHA3 = [[None for i in range(21)] for j in range(7)]
+
+top_zero_bits = 74   #### NOTE: this can be tuned per option
+
+for k in range(0, 7):
+    print ("SHA3-256: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    engines = 2**(i+j/10)
+                    LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds
+            MinYearsSHA3[k][YearIndex] = MinLogYears
+            MemBytesSHA3[k][YearIndex] = MemBytes
+            EngUnitsSHA3[k][YearIndex] = EngUnits  
+            #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-256 in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSHA3[k]); print ("Log(memory bytes):", MemBytesSHA3[k]); print ("Log(engine units):", EngUnitsSHA3[k], "\n")
+
+ProjMinYearsSHA3 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSHA3 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSHA3 = [[None for i in range(9)] for j in range(7)]
+
+top_zero_bits = 77   #### NOTE: this can be tuned per option
+
+for k in range(0, 7):    
+    print ("SHA3-256 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                engines = 2**(i+j/10)
+                LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds
+        ProjMinYearsSHA3[k][YearIndex] = MinLogYears
+        ProjMemBytesSHA3[k][YearIndex] = MemBytes
+        ProjEngUnitsSHA3[k][YearIndex] = EngUnits        
+        #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-256 in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSHA3[k]); print ("Log(memory bytes):", ProjMemBytesSHA3[k]); print ("Log(engine units):", ProjEngUnitsSHA3[k], "\n")
+
+############################################################################################
+#### SHA3-384
+
+version = 384                           # SHA3-384 
+SHA3gates = 10500 * 1.2                 # Number of GEs occupied by Akin et al.'s implementation (SMH option), scaled to include initialization and absorb stages
+SHA3time = (54.95e-9 * (45/90)**2)*1.5  # Latency of implementation (scaled to 45nm from 90nm, scaled to include initialization and absorb stages )
+node = 45                               # 45nm
+
+print ("\nSHA3-" +repr(version)+ " on " +repr(node)+ "nm node")
+print ("-------------------")
+print ("Akin et al. implementation using 90nm: 54.95nsec/Keccak computation, 10.5KGE. Area and timing results are scaled to 45nm and SHA-3")
+
+MinYearsSHA3_384 = [[None for i in range(21)] for j in range(7)]
+MemBytesSHA3_384 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSHA3_384 = [[None for i in range(21)] for j in range(7)]
+
+top_zero_bits = 74   #### NOTE: this can be tuned per option
+
+for k in range(0, 7):
+    print ("SHA3-384: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    engines = 2**(i+j/10)
+                    LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds
+            MinYearsSHA3_384[k][YearIndex] = MinLogYears
+            MemBytesSHA3_384[k][YearIndex] = MemBytes
+            EngUnitsSHA3_384[k][YearIndex] = EngUnits  
+            #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-384 in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSHA3_384[k]); print ("Log(memory bytes):", MemBytesSHA3_384[k]); print ("Log(engine units):", EngUnitsSHA3_384[k], "\n")
+
+ProjMinYearsSHA3_384 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSHA3_384 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSHA3_384 = [[None for i in range(9)] for j in range(7)]
+
+top_zero_bits = 77   #### NOTE: this can be tuned per option
+
+for k in range(0, 7):    
+    print ("SHA3-384 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                engines = 2**(i+j/10)
+                LogYears, LogMemUnits, mem_unit, LogEngUnits, SHA3inSeconds = SHA3_estimator(version, SHA3gates, SHA3time, YearIndex, MoneyOptions[k], engines, top_zero_bits, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(mem_unit*2**LogMemUnits); EngUnits = LogEngUnits; t = SHA3inSeconds
+        ProjMinYearsSHA3_384[k][YearIndex] = MinLogYears
+        ProjMemBytesSHA3_384[k][YearIndex] = MemBytes
+        ProjEngUnitsSHA3_384[k][YearIndex] = EngUnits        
+        #print ("(sqrt(Pi*N/2)/p + 2.5/theta) * SHA3-384 in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSHA3_384[k]); print ("Log(memory bytes):", ProjMemBytesSHA3_384[k]); print ("Log(engine units):", ProjEngUnitsSHA3_384[k], "\n")
+
+############################################################################################
+#### SIKE security estimator
+
+def SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, Money, memory, BytesPerDollar_HDD, GatesPerDollar_MPU):
+    SIKEtime45nm = SIKEtime              # Latency of half-degree isogeny implementation by the proposed implementation on 45nm
+    t=SecondsPerYear/SIKEtime45nm        # Number of half-degree isogeny operations per year per collision-search engine
+    
+    if version == 377:         # Determine search space          
+        if isogeny == 2:
+            e2 = 191
+            N=2**((e2-1)/2)
+        else:
+            e3 = 117
+            N=3**((e3-1)/2)     
+    elif version == 434:
+        e2 = 216
+        N=2**(e2/2-1)
+    elif version == 503:
+        e2 = 250
+        N=2**(e2/2-1)
+    elif version == 546:
+        e2 = 273
+        N=2**((e2-1)/2)
+    elif version == 610:
+        e2 = 305
+        N=2**((e2-1)/2)
+    elif version == 697:
+        if isogeny == 2: 
+            e2 = 356
+            N=2**(e2/2-1)
+        else:
+            e3 = 215
+            N=3**((e3-1)/2)
+    elif version == 751:
+        e2 = 372
+        N=2**(e2/2-1)
+
+    mem_unit=math.ceil((2*math.log2(N) + math.log2(20))/8);   # Bytes per memory unit
+    w=memory/mem_unit   # Memory units
+    p=(Money - (1/BytesPerDollar_HDD[YearIndex] * w * mem_unit))*GatesPerDollar_MPU[YearIndex]/SIKEgates  # Number of engines I can buy
+    LogYears = 0; LogMemUnits = 0; LogEngUnits = 0; SIKEinSeconds = 0
+    if p > 0:
+        LogMemUnits = math.log2(w)
+        LogEngUnits = math.log2(p)
+        LogYears = math.log2(2.5*math.sqrt(N**3/w)/(p * t))
+        SIKEinSeconds = math.log2(2.5*math.sqrt(N**3/w) * SIKEtime45nm)
+
+    return LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds
+
+############################################################################################
+#### SIKEp377, 2-isogeny attack
+
+version = 377               # SIKE377  
+isogeny = 2
+SIKEgates = 341300          # Number of GEs occupied by the proposed implementation
+SIKEtime = 2.347e-3         # Latency of half-degree isogeny implementation on 45nm
+node = 45                   # 45nm
+
+print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies")
+print ("-------------------")
+print ("Proposed implementation using 45nm, radix = 32: 2.347msec/half-degree isogeny, area of 341,300 GE")
+
+MinYearsSIKEp377 = [[None for i in range(21)] for j in range(7)]
+MemBytesSIKEp377 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSIKEp377 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("SIKEp377: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    memory = 2**(i+j/10)
+                    LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+            MinYearsSIKEp377[k][YearIndex] = MinLogYears
+            MemBytesSIKEp377[k][YearIndex] = MemBytes
+            EngUnitsSIKEp377[k][YearIndex] = EngUnits            
+            #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSIKEp377[k]); print ("Log(memory bytes):", MemBytesSIKEp377[k]); print ("Log(engine units):", EngUnitsSIKEp377[k], "\n")
+
+ProjMinYearsSIKEp377 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSIKEp377 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSIKEp377 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):    
+    print ("SIKEp377 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                memory = 2**(i+j/10)
+                LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+        ProjMinYearsSIKEp377[k][YearIndex] = MinLogYears
+        ProjMemBytesSIKEp377[k][YearIndex] = MemBytes
+        ProjEngUnitsSIKEp377[k][YearIndex] = EngUnits            
+        #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSIKEp377[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp377[k]); print ("Log(engine units):", ProjEngUnitsSIKEp377[k], "\n")
+
+############################################################################################
+#### SIKEp434
+
+version = 434               # SIKE434  
+isogeny = 2
+SIKEgates = 372200          # Number of GEs occupied by the proposed implementation
+SIKEtime = 3.253e-3         # Latency of half-degree isogeny implementation on 45nm
+node = 45                   # 45nm
+
+print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies")
+print ("-------------------")
+print ("Proposed implementation using 45nm, radix = 32: 3.253msec/half-degree isogeny, area of 372,200 GE")
+
+MinYearsSIKEp434 = [[None for i in range(21)] for j in range(7)]
+MemBytesSIKEp434 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSIKEp434 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("SIKEp434: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    memory = 2**(i+j/10)
+                    LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+            MinYearsSIKEp434[k][YearIndex] = MinLogYears
+            MemBytesSIKEp434[k][YearIndex] = MemBytes
+            EngUnitsSIKEp434[k][YearIndex] = EngUnits            
+            #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSIKEp434[k]); print ("Log(memory bytes):", MemBytesSIKEp434[k]); print ("Log(engine units):", EngUnitsSIKEp434[k], "\n")
+
+ProjMinYearsSIKEp434 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSIKEp434 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSIKEp434 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):    
+    print ("SIKEp434 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                memory = 2**(i+j/10)
+                LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+        ProjMinYearsSIKEp434[k][YearIndex] = MinLogYears
+        ProjMemBytesSIKEp434[k][YearIndex] = MemBytes
+        ProjEngUnitsSIKEp434[k][YearIndex] = EngUnits            
+        #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSIKEp434[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp434[k]); print ("Log(engine units):", ProjEngUnitsSIKEp434[k], "\n")
+
+############################################################################################
+#### SIKEp503
+
+version = 503               # SIKE503  
+isogeny = 2
+SIKEgates = 409500          # Number of GEs occupied by the proposed implementation
+SIKEtime = 4.814e-3         # Latency of half-degree isogeny implementation on 45nm
+node = 45                   # 45nm
+
+print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies")
+print ("-------------------")
+print ("Proposed implementation using 45nm, radix = 32: 4.814msec/half-degree isogeny, area of 409,500 GE")
+
+MinYearsSIKEp503 = [[None for i in range(21)] for j in range(7)]
+MemBytesSIKEp503 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSIKEp503 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("SIKEp503: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    memory = 2**(i+j/10)
+                    LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+            MinYearsSIKEp503[k][YearIndex] = MinLogYears
+            MemBytesSIKEp503[k][YearIndex] = MemBytes
+            EngUnitsSIKEp503[k][YearIndex] = EngUnits            
+            #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSIKEp503[k]); print ("Log(memory bytes):", MemBytesSIKEp503[k]); print ("Log(engine units):", EngUnitsSIKEp503[k], "\n")
+
+ProjMinYearsSIKEp503 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSIKEp503 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSIKEp503 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):    
+    print ("SIKEp503 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                memory = 2**(i+j/10)
+                LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+        ProjMinYearsSIKEp503[k][YearIndex] = MinLogYears
+        ProjMemBytesSIKEp503[k][YearIndex] = MemBytes
+        ProjEngUnitsSIKEp503[k][YearIndex] = EngUnits            
+        #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSIKEp503[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp503[k]); print ("Log(engine units):", ProjEngUnitsSIKEp503[k], "\n")
+
+############################################################################################
+#### SIKEp546
+
+version = 546               # SIKE546
+isogeny = 2
+SIKEgates = 441100          # Number of GEs occupied by the proposed implementation
+SIKEtime = 7.095e-3         # Latency of half-degree isogeny implementation on 45nm
+node = 45                   # 45nm
+
+print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies")
+print ("-------------------")
+print ("Proposed implementation using 45nm, radix = 32: 7.095msec/half-degree isogeny, area of 441,100 GE")
+
+MinYearsSIKEp546 = [[None for i in range(21)] for j in range(7)]
+MemBytesSIKEp546 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSIKEp546 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("SIKEp546: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    memory = 2**(i+j/10)
+                    LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+            MinYearsSIKEp546[k][YearIndex] = MinLogYears
+            MemBytesSIKEp546[k][YearIndex] = MemBytes
+            EngUnitsSIKEp546[k][YearIndex] = EngUnits            
+            #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSIKEp546[k]); print ("Log(memory bytes):", MemBytesSIKEp546[k]); print ("Log(engine units):", EngUnitsSIKEp546[k], "\n")
+
+ProjMinYearsSIKEp546 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSIKEp546 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSIKEp546 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):    
+    print ("SIKEp546 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                memory = 2**(i+j/10)
+                LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+        ProjMinYearsSIKEp546[k][YearIndex] = MinLogYears
+        ProjMemBytesSIKEp546[k][YearIndex] = MemBytes
+        ProjEngUnitsSIKEp546[k][YearIndex] = EngUnits            
+        #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSIKEp546[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp546[k]); print ("Log(engine units):", ProjEngUnitsSIKEp546[k], "\n")
+
+############################################################################################
+#### SIKEp610
+
+version = 610               # SIKE610  
+isogeny = 2
+SIKEgates = 748000          # Number of GEs occupied by the proposed implementation
+SIKEtime = 5.803e-3         # Latency of half-degree isogeny implementation on 45nm
+node = 45                   # 45nm
+
+print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies")
+print ("-------------------")
+print ("Proposed implementation using 45nm, radix = 64: 5.803msec/half-degree isogeny, area of 748,000 GE")
+
+MinYearsSIKEp610 = [[None for i in range(21)] for j in range(7)]
+MemBytesSIKEp610 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSIKEp610 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("SIKEp610: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    memory = 2**(i+j/10)
+                    LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+            MinYearsSIKEp610[k][YearIndex] = MinLogYears
+            MemBytesSIKEp610[k][YearIndex] = MemBytes
+            EngUnitsSIKEp610[k][YearIndex] = EngUnits            
+            #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSIKEp610[k]); print ("Log(memory bytes):", MemBytesSIKEp610[k]); print ("Log(engine units):", EngUnitsSIKEp610[k], "\n")
+
+ProjMinYearsSIKEp610 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSIKEp610 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSIKEp610 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):    
+    print ("SIKEp610 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                memory = 2**(i+j/10)
+                LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+        ProjMinYearsSIKEp610[k][YearIndex] = MinLogYears
+        ProjMemBytesSIKEp610[k][YearIndex] = MemBytes
+        ProjEngUnitsSIKEp610[k][YearIndex] = EngUnits            
+        #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSIKEp610[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp610[k]); print ("Log(engine units):", ProjEngUnitsSIKEp610[k], "\n")
+
+############################################################################################
+#### SIKEp697
+
+version = 697               # SIKE697                
+isogeny = 2
+SIKEgates = 798900          # Number of GEs occupied by the proposed implementation
+SIKEtime = 8.595e-3         # Latency of half-degree isogeny implementation on 45nm
+node = 45                   # 45nm
+
+print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies")
+print ("-------------------")
+print ("Proposed implementation using 45nm, radix = 64: 8.595msec/half-degree isogeny, area of 798,900 GE")
+
+MinYearsSIKEp697 = [[None for i in range(21)] for j in range(7)]
+MemBytesSIKEp697 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSIKEp697 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("SIKEp697: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    memory = 2**(i+j/10)
+                    LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+            MinYearsSIKEp697[k][YearIndex] = MinLogYears
+            MemBytesSIKEp697[k][YearIndex] = MemBytes
+            EngUnitsSIKEp697[k][YearIndex] = EngUnits            
+            #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSIKEp697[k]); print ("Log(memory bytes):", MemBytesSIKEp697[k]); print ("Log(engine units):", EngUnitsSIKEp697[k], "\n")
+
+ProjMinYearsSIKEp697 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSIKEp697 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSIKEp697 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):    
+    print ("SIKEp697 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                memory = 2**(i+j/10)
+                LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+        ProjMinYearsSIKEp697[k][YearIndex] = MinLogYears
+        ProjMemBytesSIKEp697[k][YearIndex] = MemBytes
+        ProjEngUnitsSIKEp697[k][YearIndex] = EngUnits            
+        #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSIKEp697[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp697[k]); print ("Log(engine units):", ProjEngUnitsSIKEp697[k], "\n")
+
+############################################################################################
+#### SIKEp751
+
+version = 751               # SIKE751  
+isogeny = 2
+SIKEgates = 822300          # Number of GEs occupied by the proposed implementation
+SIKEtime = 9.703e-3         # Latency of half-degree isogeny implementation on 45nm
+node = 45                   # 45nm
+
+print ("\nSIKEp" +repr(version)+ " on " +repr(node)+ "nm node, using " +repr(isogeny)+ "-isogenies")
+print ("-------------------")
+print ("Proposed implementation using 45nm, radix = 64: 9.703msec/half-degree isogeny, area of 822,300 GE")
+
+MinYearsSIKEp751 = [[None for i in range(21)] for j in range(7)]
+MemBytesSIKEp751 = [[None for i in range(21)] for j in range(7)]
+EngUnitsSIKEp751 = [[None for i in range(21)] for j in range(7)]
+
+for k in range(0, 7):
+    print ("SIKEp751: results per year (2000-2020), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 21):
+        lock = 0
+        if CostMPU[YearIndex] != None: 
+            for i in range(10, 100):
+                for j in range(0, 10):
+                    memory = 2**(i+j/10)
+                    LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, BytesPerDollar_HDD, GatesPerDollar_MPU)
+                    if LogYears != 0:  
+                        if lock == 0: MinLogYears = LogYears; lock = 1 
+                        if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+            MinYearsSIKEp751[k][YearIndex] = MinLogYears
+            MemBytesSIKEp751[k][YearIndex] = MemBytes
+            EngUnitsSIKEp751[k][YearIndex] = EngUnits            
+            #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", MinYearsSIKEp751[k]); print ("Log(memory bytes):", MemBytesSIKEp751[k]); print ("Log(engine units):", EngUnitsSIKEp751[k], "\n")
+
+ProjMinYearsSIKEp751 = [[None for i in range(9)] for j in range(7)]
+ProjMemBytesSIKEp751 = [[None for i in range(9)] for j in range(7)]
+ProjEngUnitsSIKEp751 = [[None for i in range(9)] for j in range(7)]
+
+for k in range(0, 7):    
+    print ("SIKEp751 (projection): results every 5 years (2000-2040), budget (millions of dollars) = " +repr(MoneyOptions[k]/10**6))
+
+    for YearIndex in range(0, 9):
+        lock = 0
+        for i in range(10, 100):
+            for j in range(0, 10):
+                memory = 2**(i+j/10)
+                LogYears, LogMemUnits, LogEngUnits, SIKEinSeconds = SIKE_estimator(version, isogeny, SIKEgates, SIKEtime, YearIndex, MoneyOptions[k], memory, ProjBytesPerDollar_HDD, ProjGatesPerDollar_MPU)
+                if LogYears != 0:  
+                    if lock == 0: MinLogYears = LogYears; lock = 1 
+                    if LogYears <= MinLogYears: MinLogYears = LogYears; MemBytes = math.log2(memory); EngUnits = LogEngUnits; t = SIKEinSeconds
+        ProjMinYearsSIKEp751[k][YearIndex] = MinLogYears
+        ProjMemBytesSIKEp751[k][YearIndex] = MemBytes
+        ProjEngUnitsSIKEp751[k][YearIndex] = EngUnits            
+        #print ("2.5*sqrt(N^3/w) * SIKE in seconds: 2 ^", t)
+    print ("Log(years):", ProjMinYearsSIKEp751[k]); print ("Log(memory bytes):", ProjMemBytesSIKEp751[k]); print ("Log(engine units):", ProjEngUnitsSIKEp751[k], "\n")
+
+############################################################################################
+#### Graph of security estimates (in years) using historical prices of memory (bytes) and
+#### computing resources (gates), years 2000-2020 
+
+def grapher_historical(k):
+    x = np.linspace(0, 20, 21)        
+    y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12 = YearsAES128[k], YearsAES192[k], YearsAES256[k], MinYearsSHA3[k], MinYearsSHA3_384[k], MinYearsSIKEp377[k], MinYearsSIKEp434[k], MinYearsSIKEp503[k], MinYearsSIKEp546[k], MinYearsSIKEp610[k], MinYearsSIKEp697[k], MinYearsSIKEp751[k]
+
+    # Setting the figure size and resolution
+    fig, ax = plt.subplots(figsize=(10, 3), dpi=300)
+
+    # Changing spine style
+    plt.axes().xaxis.set_minor_locator(MultipleLocator(1))
+    plt.axes().yaxis.set_minor_locator(MultipleLocator(5))
+    plt.grid(color='gray', ls = '-.', lw = 0.25)
+
+    # Setting the color, linewidth, linestyle and legend
+    plt.plot(x, y1, color="crimson", linewidth=1.0, linestyle="-", label="AES128")
+    plt.plot(x, y2, color="crimson", linewidth=1.0, linestyle="--", label="AES192")
+    plt.plot(x, y3, color="crimson", linewidth=1.0, linestyle="-.", label="AES256")
+    plt.plot(x, y4, color="tab:brown", linewidth=1.0, linestyle="--", label="SHA3-256")
+    plt.plot(x, y5, color="tab:brown", linewidth=1.0, linestyle="-.", label="SHA3-384")
+    plt.plot(x, y6, color="royalblue", linewidth=1.0, linestyle="-", label="SIKEp377")
+    plt.plot(x, y7, color="royalblue", linewidth=1.0, linestyle="--", label="SIKEp434")
+    plt.plot(x, y8, color="royalblue", linewidth=1.0, linestyle="-.", label="SIKEp503")
+    plt.plot(x, y9, color="royalblue", linewidth=1.0, linestyle=(0, (5, 1)), label="SIKEp546")
+    plt.plot(x, y10, color="royalblue", linewidth=1.0, linestyle=(0, (5, 10)), label="SIKEp610")
+    plt.plot(x, y11, color="royalblue", linewidth=1.0, linestyle=(0, (5, 5)), label="SIKEp697")
+    plt.plot(x, y12, color="royalblue", linewidth=1.0, linestyle=(0, (1, 5)), label="SIKEp751")
+    leg = plt.legend(loc='upper right', prop={'size': 6}, frameon=True)
+    plt.draw() # Draw the figure so you can find the positon of the legend 
+
+    # Get the bounding box of the original legend
+    bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
+
+    # Change to location of the legend 
+    xOffset = -0.01
+    bb.x0 += xOffset
+    bb.x1 += xOffset
+    leg.set_bbox_to_anchor(bb, transform = ax.transAxes)
+
+    # Use Latex to set tick labels
+    plt.xticks([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [r'2000', r'$2002$', r'$2004$', r'$2006$', r'$2008$', r'$2010$', r'$2012$', r'$2014$', r'$2016$', r'$2018$', r'$2020$'])
+    plt.xticks(fontsize=8, rotation=0)
+    plt.yticks(fontsize=8, rotation=0)
+    plt.xlabel('Year')  # add x-label
+    plt.ylabel('Log(Years)')  # add y-label
+    if titlefigure == 'on': plt.title('Security estimates in years, budget = US$' +repr(int(MoneyOptions[k]/1e6))+ ' million')  # add title
+
+    # Setting the boundaries of the figure
+    plt.xlim(x.min()*1.0, x.max()*1.0)
+    plt.ylim(0, y3[19]*1.5)
+
+    plt.gcf().subplots_adjust(bottom=0.12)
+    plt.show() # show figure
+    fig.savefig("historical_estimates_" +repr(int(MoneyOptions[k]/1e6))+ "million.png", dpi = 300) # save figure
+
+    return
+    
+############################################################################################
+#### Graphing for all the budget options
+   
+for i in range(0, 7):
+    grapher_historical(i)
+
+############################################################################################
+#### Graph of security estimates (in years) using projection of prices of memory (bytes) and
+#### computing resources (gates), years 2000-2040
+#### Uses historical prices for 2000-2020, projections for 2025-2040 
+
+def grapher_projection(k):
+    x = np.linspace(0, 8, 9)        
+    y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12 = ProjYearsAES128[k], ProjYearsAES192[k], ProjYearsAES256[k], ProjMinYearsSHA3[k], ProjMinYearsSHA3_384[k], ProjMinYearsSIKEp377[k], ProjMinYearsSIKEp434[k], ProjMinYearsSIKEp503[k], ProjMinYearsSIKEp546[k], ProjMinYearsSIKEp610[k], ProjMinYearsSIKEp697[k], ProjMinYearsSIKEp751[k]
+
+    # Setting the figure size and resolution
+    fig, ax = plt.subplots(figsize=(10, 3), dpi=300)
+
+    # Changing spine style
+    plt.axes().xaxis.set_minor_locator(MultipleLocator(1))
+    plt.axes().yaxis.set_minor_locator(MultipleLocator(5))
+    plt.grid(color='gray', ls = '-.', lw = 0.25)
+
+    # Setting the color, linewidth, linestyle and legend
+    plt.plot(x, y1, color="crimson", linewidth=1.3, linestyle="-", label="AES128")
+    plt.plot(x, y2, color="crimson", linewidth=1.3, linestyle="--", label="AES192")
+    plt.plot(x, y3, color="crimson", linewidth=1.3, linestyle="-.", label="AES256")
+    plt.plot(x, y4, color="tab:brown", linewidth=1.3, linestyle="--", label="SHA3-256")
+    plt.plot(x, y5, color="tab:brown", linewidth=1.3, linestyle="-.", label="SHA3-384")
+    plt.plot(x, y6, color="royalblue", linewidth=1.3, linestyle="-", label="SIKEp377")
+    plt.plot(x, y7, color="royalblue", linewidth=1.3, linestyle=(0, (5, 1)), label="SIKEp434")
+    plt.plot(x, y8, color="royalblue", linewidth=1.3, linestyle="-.", label="SIKEp503")
+    plt.plot(x, y9, color="royalblue", linewidth=1.3, linestyle="--", label="SIKEp546")
+    plt.plot(x, y10, color="royalblue", linewidth=1.3, linestyle=(0, (5, 5)), label="SIKEp610")
+    plt.plot(x, y11, color="royalblue", linewidth=1.3, linestyle=(0, (5, 10)), label="SIKEp697")
+    plt.plot(x, y12, color="royalblue", linewidth=1.3, linestyle=(0, (1, 5)), label="SIKEp751")
+    
+    if titlefigure == 'on':
+        leg = plt.legend(loc='upper right', prop={'size': 7}, frameon=True)
+        plt.draw() # Draw the figure so you can find the positon of the legend 
+
+        # Get the bounding box of the original legend
+        bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
+
+        # Change to location of the legend 
+        xOffset = -0.01
+        bb.x0 += xOffset
+        bb.x1 += xOffset
+        leg.set_bbox_to_anchor(bb, transform = ax.transAxes)
+
+    # Use Latex to set tick labels
+    plt.xticks([0, 1, 2, 3, 4, 5, 6, 7, 8], [r'2000', r'$2005$', r'$2010$', r'$2015$', r'$2020$', r'$2025$', r'$2030$', r'$2035$', r'$2040$'])
+    plt.xticks(fontsize=8, rotation=0)
+    plt.yticks(fontsize=8, rotation=0)
+    plt.xlabel('Year')  # add x-label
+    plt.ylabel('Log(Years)')  # add y-label
+    if titlefigure == 'on': plt.title('Security estimates in years (projection), budget = US$' +repr(int(MoneyOptions[k]/1e6))+ ' million')  # add title
+    
+    plt.text(0.8,  y1[1]-12, 'AES128', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(1.7,  y4[2]-12, 'SHA3-256', color='tab:brown', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8,  y6[1]+2, 'SIKEp377', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8,  y7[1]+2, 'SIKEp434', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8, y8[1]-12, 'SIKEp503', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    if k < 3:
+        plt.text(0.8, y2[1]+2, 'AES192', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+        plt.text(1.7, y5[2]+1, 'SHA3-384', color='tab:brown', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    else:
+        plt.text(0.8, y2[1]-10, 'AES192', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+        plt.text(1.7, y5[2]-11, 'SHA3-384', color='tab:brown', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8, y9[1]+2, 'SIKEp546', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8, y10[1]+2, 'SIKEp610', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8, y3[1]-12, 'AES256', color='crimson', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8, y11[1]+2, 'SIKEp697', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+    plt.text(0.8, y12[1]+2, 'SIKEp751', color='royalblue', fontsize=7, verticalalignment='bottom', horizontalalignment='left')
+
+    # Setting the boundaries of the figure
+    plt.xlim(x.min()*1.0, x.max()*1.0)
+    plt.ylim(0, y3[8]*1.5)
+
+    plt.gcf().subplots_adjust(bottom=0.12)
+    plt.show() # show figure
+    fig.savefig("projection_estimates_" +repr(int(MoneyOptions[k]/1e6))+ "million.png", dpi = 300) # save figure
+
+    return
+    
+############################################################################################
+#### Graphing for all the budget options
+      
+for i in range(0, 7):
+    grapher_projection(i)
+
+############################################################################################
+#### Historical graph of number of components (bytes/gates) that can be bought per dollar
+
+x = np.linspace(0, 20, 21)
+x4 = [i for i in range(0,21)]
+LogBytesPerDollar_HDD = [None for i in range(21)]
+LogBytesPerDollar_DRAM = [None for i in range(21)]
+LogBytesPerDollar_SSD = [None for i in range(21)]
+LogBytesPerDollar_HDD = [None for i in range(21)]
+LogGatesPerDollar_MPU = [None for i in range(21)]
+LogGatesPerDollar_Linley = [None for i in range(21)]
+LogGatesPerDollar_ITRS = [None for i in range(21)]
+LogBytesPerGate = [None for i in range(21)]
+
+for i in range(0,21):
+    if BytesPerDollar_HDD[i] != None:
+        LogBytesPerDollar_HDD[i] = math.log2(BytesPerDollar_HDD[i])
+    if BytesPerDollar_DRAM[i] != None:
+        LogBytesPerDollar_DRAM[i] = math.log2(BytesPerDollar_DRAM[i])
+    if BytesPerDollar_SSD[i] != None:
+        LogBytesPerDollar_SSD[i] = math.log2(BytesPerDollar_SSD[i])
+    if GatesPerDollar_MPU[i] != None:
+        LogGatesPerDollar_MPU[i] = math.log2(GatesPerDollar_MPU[i])
+    if GatesPerDollar_Linley[i] != None:
+        LogGatesPerDollar_Linley[i] = math.log2(GatesPerDollar_Linley[i])
+    if GatesPerDollar_ITRS[i] != None:
+        LogGatesPerDollar_ITRS[i] = math.log2(GatesPerDollar_ITRS[i])
+    if BytesPerGate[i] != None:
+        LogBytesPerGate[i] = math.log2(BytesPerGate[i])
+        
+y1, y2, y3, y4, y5, y6, y7 = LogBytesPerDollar_HDD, LogGatesPerDollar_MPU, LogGatesPerDollar_Linley, LogGatesPerDollar_ITRS, LogBytesPerGate, LogBytesPerDollar_DRAM, LogBytesPerDollar_SSD
+
+print (LogBytesPerDollar_HDD)
+print (LogBytesPerDollar_DRAM)
+print (LogBytesPerDollar_SSD)
+print (LogGatesPerDollar_MPU)
+print (LogGatesPerDollar_Linley)
+print (LogGatesPerDollar_ITRS)
+print (LogBytesPerGate)
+
+# Setting the figure size and resolution
+fig, ax = plt.subplots(figsize=(10, 3), dpi=300)
+
+# Changing spine style
+plt.axes().xaxis.set_minor_locator(MultipleLocator(1))
+plt.axes().yaxis.set_minor_locator(MultipleLocator(5))
+plt.grid(color='gray', ls = '-.', lw = 0.25)
+
+# Setting the color, linewidth, linestyle and legend
+plt.plot(x, y1, color="crimson", linewidth=1.0, linestyle="-", label="Bytes/dollar (HDD)")
+plt.plot(x, y2, color="royalblue", linewidth=1.0, linestyle="-", label="Gates/dollar (MPU)")
+plt.scatter(x, y3, color="royalblue", s = 7.0, marker='^', label="Gates/dollar (Linley Group)")
+plt.scatter(x, y4, color="aquamarine", s = 5.0, marker='x', label="Gates/dollar (ITRS 2001-2007, forecast)")
+plt.plot(x, y5, color="olivedrab", linewidth=1.5, linestyle="-", label="Bytes (HDD)/gate (MPU) ratio")
+plt.plot(x, y6, color="crimson", linewidth=1.0, linestyle="--", label="Bytes/dollar (DRAM)")
+plt.plot(x, y7, color="crimson", linewidth=1.0, linestyle="-.", label="Bytes/dollar (SSD)")
+plt.legend(loc='upper left', prop={'size': 6}, frameon=True)
+
+# Use Latex to set tick labels
+plt.xticks([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [r'$2000$', r'$2002$', r'$2004$', r'$2006$', r'$2008$', r'$2010$', r'$2012$', r'$2014$', r'$2016$', r'$2018$', r'$2020$'])
+plt.xticks(fontsize=8, rotation=0)
+plt.yticks(fontsize=8, rotation=0)
+plt.xlabel('Year')  # add x-label
+plt.ylabel('Log(components/dollar)')  # add y-label
+if titlefigure == 'on': plt.title('Historical prices of memory and gates (MPUs), 2000-2020')  # add title
+
+# Setting the boundaries of the figure
+plt.xlim(x.min()*1.0, x.max()*1.0)
+plt.ylim(5, y1[20]*1.5)
+
+plt.gcf().subplots_adjust(bottom=0.12)
+plt.show() # show figure
+fig.savefig("historical_mpu_hdd.png", dpi = 300) # save figure
\ No newline at end of file
diff --git a/README.md b/README.md
index a55e297..7fc1ef9 100644
--- a/README.md
+++ b/README.md
@@ -26,19 +26,24 @@ relative to AES and SHA-3.
 - - -
 ## File Organization
 
-- `platform/AC701/` contains hardware development files targeting the Artix-7 AC701 XC7A200TFBG676 FPGA.
+- `platforms/AC701/` contains hardware development files targeting the Artix-7 AC701 XC7A200TFBG676 FPGA.
  
 - `platforms/Murax/` contains the scala source code for generating the Murax SoC.
 
 - `platforms/rtl` contains the APB bridge modules developed for the communication between the software and hardware.
 
-- `src/hardware` contains hardware accelerators source code.
+- `Python_script` contains the Python3 script for the security estimation of SIKE relative to AES and SHA-3.
 
-- `src/murax` contains Murax library files.
+- `SIKE_sw` contains the software implementation of SIKE, including the new parameter sets SIKEp377, SIKEp546 and SIKEp697.
 
-- `src/ref_c` contains the software implementation of vOW on SIKE, which is based on [3] and the [vOW4SIKE library](https://github.com/microsoft/vOW4SIKE).
+- `SIKE_vOW_hw-sw/hardware` contains the hardware accelerators source code.
 
-- `src/ref_c_riscv` contains the software libraries for calling the hardware accelerators and RISC-V testing files.
+- `SIKE_vOW_hw-sw/murax` contains the Murax library files.
+
+- `SIKE_vOW_hw-sw/ref_c` contains the software implementation of vOW on SIKE, which is based on [3] and the [vOW4SIKE library](https://github.com/microsoft/vOW4SIKE).
+
+- `SIKE_vOW_hw-sw/ref_c_riscv` contains the hardware/software co-design of vOW on SIKE. 
+  It contains the software libraries for calling the hardware accelerators and RISC-V testing files.
 
 - `LICENSE` MIT license covering all the implementations, except for the files that are labeled as created by third parties.
 
diff --git a/SIKE_sw/Makefile b/SIKE_sw/Makefile
new file mode 100644
index 0000000..33a75fe
--- /dev/null
+++ b/SIKE_sw/Makefile
@@ -0,0 +1,263 @@
+####  Makefile for compilation on Linux  ####
+
+OPT=-O3   # Optimization option by default
+
+CC=clang
+
+ifeq "$(CC)" "gcc"
+    COMPILER=gcc
+else ifeq "$(CC)" "clang"
+    COMPILER=clang
+endif
+
+ARCHITECTURE=_AMD64_
+USE_OPT_LEVEL=_FAST_
+ifeq "$(ARCH)" "x64"
+    ARCHITECTURE=_AMD64_
+    USE_OPT_LEVEL=_FAST_
+else ifeq "$(ARCH)" "x86"
+    ARCHITECTURE=_X86_
+    USE_OPT_LEVEL=_GENERIC_
+endif
+
+ifeq "$(OPT_LEVEL)" "GENERIC"
+    USE_OPT_LEVEL=_GENERIC_
+endif
+
+ifeq "$(ARCHITECTURE)" "_AMD64_"
+	ifeq "$(USE_OPT_LEVEL)" "_FAST_"
+		MULX=-D _MULX_
+		ifeq "$(USE_MULX)" "FALSE"
+			MULX=
+		else
+			ADX=-D _ADX_
+			ifeq "$(USE_ADX)" "FALSE"
+				ADX=
+			endif
+		endif
+	endif
+endif
+
+ifeq "$(SET)" "EXTENDED"
+    ADDITIONAL_SETTINGS=-fwrapv -fomit-frame-pointer -march=native
+endif
+
+AR=ar rcs
+RANLIB=ranlib
+
+CFLAGS=$(OPT) $(ADDITIONAL_SETTINGS) -D $(ARCHITECTURE) -D __LINUX__ -D $(USE_OPT_LEVEL) $(MULX) $(ADX) -fPIE
+LDFLAGS=-lm
+
+ifeq "$(ARCHITECTURE)" "_AMD64_"
+ifeq "$(USE_OPT_LEVEL)" "_FAST_"
+CFLAGS += -mavx2 -maes -msse2
+endif
+endif
+
+ifeq "$(USE_OPT_LEVEL)" "_GENERIC_"
+    EXTRA_OBJECTS_377=objs377/fp_generic.o
+    EXTRA_OBJECTS_434=objs434/fp_generic.o
+    EXTRA_OBJECTS_503=objs503/fp_generic.o
+    EXTRA_OBJECTS_546=objs546/fp_generic.o
+    EXTRA_OBJECTS_610=objs610/fp_generic.o
+    EXTRA_OBJECTS_697=objs697/fp_generic.o
+    EXTRA_OBJECTS_751=objs751/fp_generic.o
+else ifeq "$(USE_OPT_LEVEL)" "_FAST_"
+ifeq "$(ARCHITECTURE)" "_AMD64_"
+	EXTRA_OBJECTS_377=objs377/fp_x64.o objs377/fp_x64_asm.o
+	EXTRA_OBJECTS_434=objs434/fp_x64.o objs434/fp_x64_asm.o
+	EXTRA_OBJECTS_503=objs503/fp_x64.o objs503/fp_x64_asm.o
+	EXTRA_OBJECTS_546=objs546/fp_x64.o objs546/fp_x64_asm.o
+	EXTRA_OBJECTS_610=objs610/fp_x64.o objs610/fp_x64_asm.o
+	EXTRA_OBJECTS_697=objs697/fp_x64.o objs697/fp_x64_asm.o
+	EXTRA_OBJECTS_751=objs751/fp_x64.o objs751/fp_x64_asm.o
+	CFLAGS+= -fPIC
+endif
+endif
+OBJECTS_377=objs377/P377.o $(EXTRA_OBJECTS_377) objs/random.o objs/fips202.o
+OBJECTS_434=objs434/P434.o $(EXTRA_OBJECTS_434) objs/random.o objs/fips202.o
+OBJECTS_503=objs503/P503.o $(EXTRA_OBJECTS_503) objs/random.o objs/fips202.o
+OBJECTS_546=objs546/P546.o $(EXTRA_OBJECTS_546) objs/random.o objs/fips202.o
+OBJECTS_610=objs610/P610.o $(EXTRA_OBJECTS_610) objs/random.o objs/fips202.o
+OBJECTS_697=objs697/P697.o $(EXTRA_OBJECTS_697) objs/random.o objs/fips202.o
+OBJECTS_751=objs751/P751.o $(EXTRA_OBJECTS_751) objs/random.o objs/fips202.o
+
+all: lib377 lib434 lib503 lib546 lib610 lib697 lib751 tests tests_sike
+
+objs377/%.o: src/P377/%.c
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+objs434/%.o: src/P434/%.c
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+objs503/%.o: src/P503/%.c
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+objs546/%.o: src/P546/%.c
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+objs610/%.o: src/P610/%.c
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+objs697/%.o: src/P697/%.c
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+objs751/%.o: src/P751/%.c
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+ifeq "$(USE_OPT_LEVEL)" "_GENERIC_"
+objs377/fp_generic.o: src/P377/generic/fp_generic.c
+	$(CC) -c $(CFLAGS) src/P377/generic/fp_generic.c -o objs377/fp_generic.o
+
+objs434/fp_generic.o: src/P434/generic/fp_generic.c
+	$(CC) -c $(CFLAGS) src/P434/generic/fp_generic.c -o objs434/fp_generic.o
+
+objs503/fp_generic.o: src/P503/generic/fp_generic.c
+	$(CC) -c $(CFLAGS) src/P503/generic/fp_generic.c -o objs503/fp_generic.o
+
+objs546/fp_generic.o: src/P546/generic/fp_generic.c
+	$(CC) -c $(CFLAGS) src/P546/generic/fp_generic.c -o objs546/fp_generic.o
+
+objs610/fp_generic.o: src/P610/generic/fp_generic.c
+	$(CC) -c $(CFLAGS) src/P610/generic/fp_generic.c -o objs610/fp_generic.o
+
+objs697/fp_generic.o: src/P697/generic/fp_generic.c
+	$(CC) -c $(CFLAGS) src/P697/generic/fp_generic.c -o objs697/fp_generic.o
+
+objs751/fp_generic.o: src/P751/generic/fp_generic.c
+	$(CC) -c $(CFLAGS) src/P751/generic/fp_generic.c -o objs751/fp_generic.o
+else ifeq "$(USE_OPT_LEVEL)" "_FAST_"
+ifeq "$(ARCHITECTURE)" "_AMD64_"
+objs377/fp_x64.o: src/P377/AMD64/fp_x64.c
+	$(CC) -c $(CFLAGS) src/P377/AMD64/fp_x64.c -o objs377/fp_x64.o
+
+objs377/fp_x64_asm.o: src/P377/AMD64/fp_x64_asm.S
+	$(CC) -c $(CFLAGS) src/P377/AMD64/fp_x64_asm.S -o objs377/fp_x64_asm.o
+
+objs434/fp_x64.o: src/P434/AMD64/fp_x64.c
+	$(CC) -c $(CFLAGS) src/P434/AMD64/fp_x64.c -o objs434/fp_x64.o
+
+objs434/fp_x64_asm.o: src/P434/AMD64/fp_x64_asm.S
+	$(CC) -c $(CFLAGS) src/P434/AMD64/fp_x64_asm.S -o objs434/fp_x64_asm.o
+
+objs503/fp_x64.o: src/P503/AMD64/fp_x64.c
+	$(CC) -c $(CFLAGS) src/P503/AMD64/fp_x64.c -o objs503/fp_x64.o
+
+objs503/fp_x64_asm.o: src/P503/AMD64/fp_x64_asm.S
+	$(CC) -c $(CFLAGS) src/P503/AMD64/fp_x64_asm.S -o objs503/fp_x64_asm.o
+
+objs546/fp_x64.o: src/P546/AMD64/fp_x64.c
+	$(CC) -c $(CFLAGS) src/P546/AMD64/fp_x64.c -o objs546/fp_x64.o
+
+objs546/fp_x64_asm.o: src/P546/AMD64/fp_x64_asm.S
+	$(CC) -c $(CFLAGS) src/P546/AMD64/fp_x64_asm.S -o objs546/fp_x64_asm.o
+
+objs610/fp_x64.o: src/P610/AMD64/fp_x64.c
+	$(CC) -c $(CFLAGS) src/P610/AMD64/fp_x64.c -o objs610/fp_x64.o
+
+objs610/fp_x64_asm.o: src/P610/AMD64/fp_x64_asm.S
+	$(CC) -c $(CFLAGS) src/P610/AMD64/fp_x64_asm.S -o objs610/fp_x64_asm.o
+
+objs697/fp_x64.o: src/P697/AMD64/fp_x64.c
+	$(CC) -c $(CFLAGS) src/P697/AMD64/fp_x64.c -o objs697/fp_x64.o
+
+objs697/fp_x64_asm.o: src/P697/AMD64/fp_x64_asm.S
+	$(CC) -c $(CFLAGS) src/P697/AMD64/fp_x64_asm.S -o objs697/fp_x64_asm.o
+
+objs751/fp_x64.o: src/P751/AMD64/fp_x64.c
+	$(CC) -c $(CFLAGS) src/P751/AMD64/fp_x64.c -o objs751/fp_x64.o
+
+objs751/fp_x64_asm.o: src/P751/AMD64/fp_x64_asm.S
+	$(CC) -c $(CFLAGS) src/P751/AMD64/fp_x64_asm.S -o objs751/fp_x64_asm.o
+endif
+endif
+
+INDEPENDENT_OBJS=objs/random.o objs/fips202.o
+objs/random.o: src/random/random.c
+objs/fips202.o: src/sha3/fips202.c
+
+$(INDEPENDENT_OBJS):
+	@mkdir -p $(@D)
+	$(CC) -c $(CFLAGS) $< -o $@
+
+lib377: $(OBJECTS_377)
+	rm -rf lib377 sike377
+	mkdir lib377 sike377
+	$(AR) lib377/libsidh.a $^
+	$(RANLIB) lib377/libsidh.a
+
+lib434: $(OBJECTS_434)
+	rm -rf lib434 sike434
+	mkdir lib434 sike434
+	$(AR) lib434/libsidh.a $^
+	$(RANLIB) lib434/libsidh.a
+
+lib503: $(OBJECTS_503)
+	rm -rf lib503 sike503
+	mkdir lib503 sike503
+	$(AR) lib503/libsidh.a $^
+	$(RANLIB) lib503/libsidh.a
+
+lib546: $(OBJECTS_546)
+	rm -rf lib546 sike546
+	mkdir lib546 sike546
+	$(AR) lib546/libsidh.a $^
+	$(RANLIB) lib546/libsidh.a
+
+lib610: $(OBJECTS_610)
+	rm -rf lib610 sike610
+	mkdir lib610 sike610
+	$(AR) lib610/libsidh.a $^
+	$(RANLIB) lib610/libsidh.a
+
+lib697: $(OBJECTS_697)
+	rm -rf lib697 sike697
+	mkdir lib697 sike697
+	$(AR) lib697/libsidh.a $^
+	$(RANLIB) lib697/libsidh.a
+
+lib751: $(OBJECTS_751)
+	rm -rf lib751 sike751
+	mkdir lib751 sike751
+	$(AR) lib751/libsidh.a $^
+	$(RANLIB) lib751/libsidh.a
+
+tests: lib377 lib434 lib503 lib546 lib610 lib697 lib751
+	$(CC) $(CFLAGS) -L./lib377 tests/arith_tests-p377.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p377 $(ARM_SETTING)
+	$(CC) $(CFLAGS) -L./lib434 tests/arith_tests-p434.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p434 $(ARM_SETTING)
+	$(CC) $(CFLAGS) -L./lib503 tests/arith_tests-p503.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p503 $(ARM_SETTING)
+	$(CC) $(CFLAGS) -L./lib546 tests/arith_tests-p546.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p546 $(ARM_SETTING)
+	$(CC) $(CFLAGS) -L./lib610 tests/arith_tests-p610.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p610 $(ARM_SETTING)
+	$(CC) $(CFLAGS) -L./lib697 tests/arith_tests-p697.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p697 $(ARM_SETTING)
+	$(CC) $(CFLAGS) -L./lib751 tests/arith_tests-p751.c tests/test_extras.c -lsidh $(LDFLAGS) -o arith_tests-p751 $(ARM_SETTING)
+	
+tests_sike377: lib377
+	$(CC) $(CFLAGS) -L./lib377 tests/test_SIKEp377.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike377/test_SIKE $(ARM_SETTING)
+tests_sike434: lib434
+	$(CC) $(CFLAGS) -L./lib434 tests/test_SIKEp434.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike434/test_SIKE $(ARM_SETTING)
+tests_sike503: lib503
+	$(CC) $(CFLAGS) -L./lib503 tests/test_SIKEp503.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike503/test_SIKE $(ARM_SETTING)
+tests_sike546: lib546
+	$(CC) $(CFLAGS) -L./lib546 tests/test_SIKEp546.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike546/test_SIKE $(ARM_SETTING)
+tests_sike610: lib610
+	$(CC) $(CFLAGS) -L./lib610 tests/test_SIKEp610.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike610/test_SIKE $(ARM_SETTING)
+tests_sike697: lib697
+	$(CC) $(CFLAGS) -L./lib697 tests/test_SIKEp697.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike697/test_SIKE $(ARM_SETTING)
+tests_sike751: lib751
+	$(CC) $(CFLAGS) -L./lib751 tests/test_SIKEp751.c tests/test_extras.c -lsidh $(LDFLAGS) -o sike751/test_SIKE $(ARM_SETTING)
+
+tests_sike: tests_sike377 tests_sike434 tests_sike503 tests_sike546 tests_sike610 tests_sike697 tests_sike751
+
+check: tests tests_sike
+
+.PHONY: clean
+
+clean:
+	rm -rf *.req objs* lib* sike* arith_tests-*
diff --git a/SIKE_sw/README.md b/SIKE_sw/README.md
new file mode 100644
index 0000000..fd454ae
--- /dev/null
+++ b/SIKE_sw/README.md
@@ -0,0 +1,57 @@
+# Software implementation of SIKE 
+# Paper "The Cost to Break SIKE: A Comparative Hardware-Based Analysis with AES and SHA-3"
+
+This library contains efficient C implementations of the CCA-secure key encapsulation mechanism "SIKE". 
+This scheme is conjectured to be secure against quantum computer attacks.
+The software is based on the SIDH library version 3.3 (https://github.com/microsoft/PQCrypto-SIDH).
+
+This library includes the following KEM schemes:
+
+* SIKEp377: matching the post-quantum security of AES128 (NEW, level 1).
+* SIKEp434: matching the post-quantum security of AES128 (level 1).
+* SIKEp503: matching the post-quantum security of SHA3-256 (level 2).
+* SIKEp546: matching the post-quantum security of AES192 (NEW, level 3).
+* SIKEp610: matching the post-quantum security of AES192 (level 3).
+* SIKEp697: matching the post-quantum security of AES256 (NEW, level 5).
+* SIKEp751: matching the post-quantum security of AES256 (level 5).
+
+## Contents
+
+In the remainder, pXXX is one of {p377,p434,p503,p546,p610,p697,p751}.
+
+* [`src folder`](src/): C and header files. Public APIs can be found in src/PXXX/PXXX_api.h.
+* Optimized x64 implementation for pXXX (src/PXXX/AMD64/): optimized implementation of the field arithmetic over the prime pXXX for x64 platforms. 
+* Generic implementation for pXXX (src/PXXX/generic/): implementation of the field arithmetic over the prime pXXX in portable C.
+* [`random folder`](src/random/): randombytes function using the system random number generator.
+* [`sha3 folder`](src/sha3/): SHAKE256 implementation.  
+* [`Test folder`](tests/): test files.   
+* [`Visual Studio folder`](Visual%20Studio/): Visual Studio 2015 files for compilation in Windows.
+* [`Makefile`](Makefile): Makefile for compilation using the GNU GCC or clang compilers on Linux. 
+* [`Readme`](README.md): this readme file.
+
+## Instructions for Linux
+
+By executing:
+
+```sh
+$ make
+```
+
+the library is compiled by default for x64 using clang, optimization level `FAST` that uses assembly-optimized arithmetic
+(this option requires CPU support for the instructions MULX and ADX).
+
+Other options for x64:
+
+```sh
+$ make CC=[gcc/clang] OPT_LEVEL=[FAST/GENERIC]
+```
+
+The use of `OPT_LEVEL=GENERIC` disables the use of assembly-optimized arithmetic.
+
+To run the different tests and benchmarking results, execute:
+
+```sh
+$ ./arith_tests-pXXX
+$ ./sikeXXX/test_SIKE
+```
+
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDH.sln b/SIKE_sw/Visual Studio/SIDH/SIDH.sln
new file mode 100644
index 0000000..f82ec43
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDH.sln	
@@ -0,0 +1,293 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P751", "SIDHp751.vcxproj", "{8283DD76-E88A-4B63-ABDE-33F014178413}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P503", "SIDHp503.vcxproj", "{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp503", "..\kem_tests\test-SIKEp503.vcxproj", "{EF9FE361-D94D-4CE0-8873-739A925326A3}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp751", "..\kem_tests\test-SIKEp751.vcxproj", "{0D570915-7551-4D5F-A2F0-A4A6200185F9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P434", "SIDHp434.vcxproj", "{E46FD055-7619-4C50-8360-FA3BC2F650FB}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp434", "..\kem_tests\test-SIKEp434.vcxproj", "{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P377", "SIDHp377.vcxproj", "{05CEF530-F410-4C21-AC70-A7EF991DEE6A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp377", "..\kem_tests\test-SIKEp377.vcxproj", "{0D497554-D408-4061-BA26-2A65F4272841}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P377", "..\arith_tests\arith_tests-P377.vcxproj", "{10C4B543-0224-43D3-B84D-390665AA6C25}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P434", "..\arith_tests\arith_tests-P434.vcxproj", "{8944AC47-A218-4F4D-8AF1-AF704160A727}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P503", "..\arith_tests\arith_tests-P503.vcxproj", "{464B689B-7C93-47A2-B2F5-FE162A4EF404}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P610", "..\arith_tests\arith_tests-P610.vcxproj", "{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P751", "..\arith_tests\arith_tests-P751.vcxproj", "{C9639168-C3FF-4427-BC3B-D907FF11DE73}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P610", "SIDHp610.vcxproj", "{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp610", "..\kem_tests\test-SIKEp610.vcxproj", "{DC10CB31-A905-402E-B466-46ADCD1AD61C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P546", "SIDHp546.vcxproj", "{48010B78-5594-4FE9-81AC-909B670C1516}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P546", "..\arith_tests\arith_tests-P546.vcxproj", "{5572CD5B-7F2F-4F44-B7AC-844291850C6E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp546", "..\kem_tests\test-SIKEp546.vcxproj", "{E52D3FE9-FD9F-4D93-9712-8172DB469831}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arith_tests-P697", "..\arith_tests\arith_tests-P697.vcxproj", "{C0892335-7EB7-48D4-83A1-500953D0526B}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test-SIKEp697", "..\kem_tests\test-SIKEp697.vcxproj", "{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "P697", "SIDHp697.vcxproj", "{F7447653-7518-4BF0-934D-65801C74D42A}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Fast|x64 = Fast|x64
+		Fast|x86 = Fast|x86
+		Generic|x64 = Generic|x64
+		Generic|x86 = Generic|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x64.ActiveCfg = Debug|x64
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x64.Build.0 = Debug|x64
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x86.ActiveCfg = Debug|Win32
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Debug|x86.Build.0 = Debug|Win32
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Fast|x64.ActiveCfg = Fast|x64
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Fast|x64.Build.0 = Fast|x64
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Fast|x86.ActiveCfg = Fast|Win32
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x64.ActiveCfg = Generic|x64
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x64.Build.0 = Generic|x64
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x86.ActiveCfg = Generic|Win32
+		{8283DD76-E88A-4B63-ABDE-33F014178413}.Generic|x86.Build.0 = Generic|Win32
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x64.ActiveCfg = Debug|x64
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x64.Build.0 = Debug|x64
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x86.ActiveCfg = Debug|Win32
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Debug|x86.Build.0 = Debug|Win32
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Fast|x64.ActiveCfg = Fast|x64
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Fast|x64.Build.0 = Fast|x64
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Fast|x86.ActiveCfg = Fast|Win32
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x64.ActiveCfg = Generic|x64
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x64.Build.0 = Generic|x64
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x86.ActiveCfg = Generic|Win32
+		{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}.Generic|x86.Build.0 = Generic|Win32
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x64.ActiveCfg = Debug|x64
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x64.Build.0 = Debug|x64
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x86.ActiveCfg = Debug|Win32
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Debug|x86.Build.0 = Debug|Win32
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Fast|x64.ActiveCfg = Fast|x64
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Fast|x64.Build.0 = Fast|x64
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Fast|x86.ActiveCfg = Fast|Win32
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x64.ActiveCfg = Generic|x64
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x64.Build.0 = Generic|x64
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x86.ActiveCfg = Generic|Win32
+		{EF9FE361-D94D-4CE0-8873-739A925326A3}.Generic|x86.Build.0 = Generic|Win32
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x64.ActiveCfg = Debug|x64
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x64.Build.0 = Debug|x64
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x86.ActiveCfg = Debug|Win32
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Debug|x86.Build.0 = Debug|Win32
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Fast|x64.ActiveCfg = Fast|x64
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Fast|x64.Build.0 = Fast|x64
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Fast|x86.ActiveCfg = Fast|Win32
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x64.ActiveCfg = Generic|x64
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x64.Build.0 = Generic|x64
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x86.ActiveCfg = Generic|Win32
+		{0D570915-7551-4D5F-A2F0-A4A6200185F9}.Generic|x86.Build.0 = Generic|Win32
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x64.ActiveCfg = Debug|x64
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x64.Build.0 = Debug|x64
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x86.ActiveCfg = Debug|Win32
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Debug|x86.Build.0 = Debug|Win32
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Fast|x64.ActiveCfg = Fast|x64
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Fast|x64.Build.0 = Fast|x64
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Fast|x86.ActiveCfg = Fast|Win32
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x64.ActiveCfg = Generic|x64
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x64.Build.0 = Generic|x64
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x86.ActiveCfg = Generic|Win32
+		{E46FD055-7619-4C50-8360-FA3BC2F650FB}.Generic|x86.Build.0 = Generic|Win32
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x64.ActiveCfg = Debug|x64
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x64.Build.0 = Debug|x64
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x86.ActiveCfg = Debug|Win32
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Debug|x86.Build.0 = Debug|Win32
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Fast|x64.ActiveCfg = Fast|x64
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Fast|x64.Build.0 = Fast|x64
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Fast|x86.ActiveCfg = Fast|Win32
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x64.ActiveCfg = Generic|x64
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x64.Build.0 = Generic|x64
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x86.ActiveCfg = Generic|Win32
+		{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}.Generic|x86.Build.0 = Generic|Win32
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x64.ActiveCfg = Debug|x64
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x64.Build.0 = Debug|x64
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x86.ActiveCfg = Debug|Win32
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Debug|x86.Build.0 = Debug|Win32
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Fast|x64.ActiveCfg = Fast|x64
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Fast|x64.Build.0 = Fast|x64
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Fast|x86.ActiveCfg = Fast|Win32
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x64.ActiveCfg = Generic|x64
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x64.Build.0 = Generic|x64
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x86.ActiveCfg = Generic|Win32
+		{05CEF530-F410-4C21-AC70-A7EF991DEE6A}.Generic|x86.Build.0 = Generic|Win32
+		{0D497554-D408-4061-BA26-2A65F4272841}.Debug|x64.ActiveCfg = Debug|x64
+		{0D497554-D408-4061-BA26-2A65F4272841}.Debug|x64.Build.0 = Debug|x64
+		{0D497554-D408-4061-BA26-2A65F4272841}.Debug|x86.ActiveCfg = Debug|Win32
+		{0D497554-D408-4061-BA26-2A65F4272841}.Debug|x86.Build.0 = Debug|Win32
+		{0D497554-D408-4061-BA26-2A65F4272841}.Fast|x64.ActiveCfg = Fast|x64
+		{0D497554-D408-4061-BA26-2A65F4272841}.Fast|x64.Build.0 = Fast|x64
+		{0D497554-D408-4061-BA26-2A65F4272841}.Fast|x86.ActiveCfg = Fast|Win32
+		{0D497554-D408-4061-BA26-2A65F4272841}.Generic|x64.ActiveCfg = Generic|x64
+		{0D497554-D408-4061-BA26-2A65F4272841}.Generic|x64.Build.0 = Generic|x64
+		{0D497554-D408-4061-BA26-2A65F4272841}.Generic|x86.ActiveCfg = Generic|Win32
+		{0D497554-D408-4061-BA26-2A65F4272841}.Generic|x86.Build.0 = Generic|Win32
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x64.ActiveCfg = Debug|x64
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x64.Build.0 = Debug|x64
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x86.ActiveCfg = Debug|Win32
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Debug|x86.Build.0 = Debug|Win32
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Fast|x64.ActiveCfg = Fast|x64
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Fast|x64.Build.0 = Fast|x64
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Fast|x86.ActiveCfg = Fast|Win32
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x64.ActiveCfg = Generic|x64
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x64.Build.0 = Generic|x64
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x86.ActiveCfg = Generic|Win32
+		{10C4B543-0224-43D3-B84D-390665AA6C25}.Generic|x86.Build.0 = Generic|Win32
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x64.ActiveCfg = Debug|x64
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x64.Build.0 = Debug|x64
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x86.ActiveCfg = Debug|Win32
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Debug|x86.Build.0 = Debug|Win32
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Fast|x64.ActiveCfg = Fast|x64
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Fast|x64.Build.0 = Fast|x64
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Fast|x86.ActiveCfg = Fast|Win32
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x64.ActiveCfg = Generic|x64
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x64.Build.0 = Generic|x64
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x86.ActiveCfg = Generic|Win32
+		{8944AC47-A218-4F4D-8AF1-AF704160A727}.Generic|x86.Build.0 = Generic|Win32
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x64.ActiveCfg = Debug|x64
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x64.Build.0 = Debug|x64
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x86.ActiveCfg = Debug|Win32
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Debug|x86.Build.0 = Debug|Win32
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Fast|x64.ActiveCfg = Fast|x64
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Fast|x64.Build.0 = Fast|x64
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Fast|x86.ActiveCfg = Fast|Win32
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x64.ActiveCfg = Generic|x64
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x64.Build.0 = Generic|x64
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x86.ActiveCfg = Generic|Win32
+		{464B689B-7C93-47A2-B2F5-FE162A4EF404}.Generic|x86.Build.0 = Generic|Win32
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x64.ActiveCfg = Debug|x64
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x64.Build.0 = Debug|x64
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x86.ActiveCfg = Debug|Win32
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Debug|x86.Build.0 = Debug|Win32
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Fast|x64.ActiveCfg = Fast|x64
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Fast|x64.Build.0 = Fast|x64
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Fast|x86.ActiveCfg = Fast|Win32
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x64.ActiveCfg = Generic|x64
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x64.Build.0 = Generic|x64
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x86.ActiveCfg = Generic|Win32
+		{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}.Generic|x86.Build.0 = Generic|Win32
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x64.ActiveCfg = Debug|x64
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x64.Build.0 = Debug|x64
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x86.ActiveCfg = Debug|Win32
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Debug|x86.Build.0 = Debug|Win32
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Fast|x64.ActiveCfg = Fast|x64
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Fast|x64.Build.0 = Fast|x64
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Fast|x86.ActiveCfg = Fast|Win32
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x64.ActiveCfg = Generic|x64
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x64.Build.0 = Generic|x64
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x86.ActiveCfg = Generic|Win32
+		{C9639168-C3FF-4427-BC3B-D907FF11DE73}.Generic|x86.Build.0 = Generic|Win32
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x64.ActiveCfg = Debug|x64
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x64.Build.0 = Debug|x64
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x86.ActiveCfg = Debug|Win32
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Debug|x86.Build.0 = Debug|Win32
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Fast|x64.ActiveCfg = Fast|x64
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Fast|x64.Build.0 = Fast|x64
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Fast|x86.ActiveCfg = Fast|Win32
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x64.ActiveCfg = Generic|x64
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x64.Build.0 = Generic|x64
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x86.ActiveCfg = Generic|Win32
+		{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}.Generic|x86.Build.0 = Generic|Win32
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x64.ActiveCfg = Debug|x64
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x64.Build.0 = Debug|x64
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x86.ActiveCfg = Debug|Win32
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Debug|x86.Build.0 = Debug|Win32
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Fast|x64.ActiveCfg = Fast|x64
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Fast|x64.Build.0 = Fast|x64
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Fast|x86.ActiveCfg = Fast|Win32
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x64.ActiveCfg = Generic|x64
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x64.Build.0 = Generic|x64
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x86.ActiveCfg = Generic|Win32
+		{DC10CB31-A905-402E-B466-46ADCD1AD61C}.Generic|x86.Build.0 = Generic|Win32
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x64.ActiveCfg = Debug|x64
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x64.Build.0 = Debug|x64
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x86.ActiveCfg = Debug|Win32
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Debug|x86.Build.0 = Debug|Win32
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Fast|x64.ActiveCfg = Fast|x64
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Fast|x64.Build.0 = Fast|x64
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Fast|x86.ActiveCfg = Fast|Win32
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x64.ActiveCfg = Generic|x64
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x64.Build.0 = Generic|x64
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x86.ActiveCfg = Generic|Win32
+		{48010B78-5594-4FE9-81AC-909B670C1516}.Generic|x86.Build.0 = Generic|Win32
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x64.ActiveCfg = Debug|x64
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x64.Build.0 = Debug|x64
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x86.ActiveCfg = Debug|Win32
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Debug|x86.Build.0 = Debug|Win32
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Fast|x64.ActiveCfg = Fast|x64
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Fast|x64.Build.0 = Fast|x64
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Fast|x86.ActiveCfg = Fast|Win32
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x64.ActiveCfg = Generic|x64
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x64.Build.0 = Generic|x64
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x86.ActiveCfg = Generic|Win32
+		{5572CD5B-7F2F-4F44-B7AC-844291850C6E}.Generic|x86.Build.0 = Generic|Win32
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x64.ActiveCfg = Debug|x64
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x64.Build.0 = Debug|x64
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x86.ActiveCfg = Debug|Win32
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Debug|x86.Build.0 = Debug|Win32
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Fast|x64.ActiveCfg = Fast|x64
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Fast|x64.Build.0 = Fast|x64
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Fast|x86.ActiveCfg = Fast|Win32
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x64.ActiveCfg = Generic|x64
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x64.Build.0 = Generic|x64
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x86.ActiveCfg = Generic|Win32
+		{E52D3FE9-FD9F-4D93-9712-8172DB469831}.Generic|x86.Build.0 = Generic|Win32
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x64.ActiveCfg = Debug|x64
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x64.Build.0 = Debug|x64
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x86.ActiveCfg = Debug|Win32
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Debug|x86.Build.0 = Debug|Win32
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Fast|x64.ActiveCfg = Fast|x64
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Fast|x64.Build.0 = Fast|x64
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Fast|x86.ActiveCfg = Fast|Win32
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x64.ActiveCfg = Generic|x64
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x64.Build.0 = Generic|x64
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x86.ActiveCfg = Generic|Win32
+		{C0892335-7EB7-48D4-83A1-500953D0526B}.Generic|x86.Build.0 = Generic|Win32
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x64.ActiveCfg = Debug|x64
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x64.Build.0 = Debug|x64
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x86.ActiveCfg = Debug|Win32
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Debug|x86.Build.0 = Debug|Win32
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Fast|x64.ActiveCfg = Fast|x64
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Fast|x64.Build.0 = Fast|x64
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Fast|x86.ActiveCfg = Fast|Win32
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x64.ActiveCfg = Generic|x64
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x64.Build.0 = Generic|x64
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x86.ActiveCfg = Generic|Win32
+		{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}.Generic|x86.Build.0 = Generic|Win32
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x64.ActiveCfg = Debug|x64
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x64.Build.0 = Debug|x64
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x86.ActiveCfg = Debug|Win32
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Debug|x86.Build.0 = Debug|Win32
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Fast|x64.ActiveCfg = Fast|x64
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Fast|x64.Build.0 = Fast|x64
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Fast|x86.ActiveCfg = Fast|Win32
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x64.ActiveCfg = Generic|x64
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x64.Build.0 = Generic|x64
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x86.ActiveCfg = Generic|Win32
+		{F7447653-7518-4BF0-934D-65801C74D42A}.Generic|x86.Build.0 = Generic|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj
new file mode 100644
index 0000000..cb653eb
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj	
@@ -0,0 +1,590 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{05CEF530-F410-4C21-AC70-A7EF991DEE6A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>isoECClib</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>P377</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp377\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <LinkTimeCodeGeneration>
+      </LinkTimeCodeGeneration>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+    <ProjectReference>
+      <LinkLibraryDependencies>
+      </LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P377\AMD64\fp_x64.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P377\generic\fp_generic.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P377\P377.c" />
+    <ClCompile Include="..\..\src\random\random.c" />
+    <ClCompile Include="..\..\src\sha3\fips202.c" />
+    <ClCompile Include="..\..\src\sidh.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\internal.h" />
+    <ClInclude Include="..\..\src\P377\P377_api.h" />
+    <ClInclude Include="..\..\src\P377\P377_internal.h" />
+    <ClInclude Include="..\..\src\random\random.h" />
+    <ClInclude Include="..\..\src\sha3\fips202.h" />
+    <ClInclude Include="..\..\src\config.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters
new file mode 100644
index 0000000..6c8f8bb
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp377.vcxproj.filters	
@@ -0,0 +1,81 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Source Files\generic">
+      <UniqueIdentifier>{c12e408e-2171-41d7-8815-33244cd7b1db}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\x64">
+      <UniqueIdentifier>{e81738a2-8bd8-449a-8918-07266c29f2b7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\random">
+      <UniqueIdentifier>{41f562a7-d335-4517-9c95-c6a4ce94c2f0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\random">
+      <UniqueIdentifier>{d2fd7e3f-38db-40e6-9994-0c979863e36b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\sha3">
+      <UniqueIdentifier>{75ac45a5-2e31-48af-986d-44d27e6a2a42}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\random\random.c">
+      <Filter>Source Files\random</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sha3\fips202.c">
+      <Filter>Source Files\sha3</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sidh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P377\P377.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P377\generic\fp_generic.c">
+      <Filter>Source Files\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P377\AMD64\fp_x64.c">
+      <Filter>Source Files\x64</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\random\random.h">
+      <Filter>Header Files\random</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\sha3\fips202.h">
+      <Filter>Source Files\sha3</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P377\P377_api.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P377\P377_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj
new file mode 100644
index 0000000..db4e574
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj	
@@ -0,0 +1,587 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{E46FD055-7619-4C50-8360-FA3BC2F650FB}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>isoECClib</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>P434</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <LinkTimeCodeGeneration>
+      </LinkTimeCodeGeneration>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+    <ProjectReference>
+      <LinkLibraryDependencies>
+      </LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P434\AMD64\fp_x64.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P434\generic\fp_generic.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P434\P434.c" />
+    <ClCompile Include="..\..\src\random\random.c" />
+    <ClCompile Include="..\..\src\sha3\fips202.c" />
+    <ClCompile Include="..\..\src\sidh.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\internal.h" />
+    <ClInclude Include="..\..\src\P434\P434_api.h" />
+    <ClInclude Include="..\..\src\P434\P434_internal.h" />
+    <ClInclude Include="..\..\src\random\random.h" />
+    <ClInclude Include="..\..\src\sha3\fips202.h" />
+    <ClInclude Include="..\..\src\config.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters
new file mode 100644
index 0000000..37c7bb1
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp434.vcxproj.filters	
@@ -0,0 +1,81 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Source Files\generic">
+      <UniqueIdentifier>{c12e408e-2171-41d7-8815-33244cd7b1db}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\x64">
+      <UniqueIdentifier>{e81738a2-8bd8-449a-8918-07266c29f2b7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\random">
+      <UniqueIdentifier>{41f562a7-d335-4517-9c95-c6a4ce94c2f0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\random">
+      <UniqueIdentifier>{d2fd7e3f-38db-40e6-9994-0c979863e36b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\sha3">
+      <UniqueIdentifier>{75ac45a5-2e31-48af-986d-44d27e6a2a42}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\random\random.c">
+      <Filter>Source Files\random</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sha3\fips202.c">
+      <Filter>Source Files\sha3</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sidh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P434\P434.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P434\generic\fp_generic.c">
+      <Filter>Source Files\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P434\AMD64\fp_x64.c">
+      <Filter>Source Files\x64</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\random\random.h">
+      <Filter>Header Files\random</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\sha3\fips202.h">
+      <Filter>Source Files\sha3</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P434\P434_api.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P434\P434_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj
new file mode 100644
index 0000000..3b570eb
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj	
@@ -0,0 +1,598 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{BBC8647D-B9E2-469F-A9A4-BB55B614ADBE}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>isoECClib</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>P503</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <LinkTimeCodeGeneration>
+      </LinkTimeCodeGeneration>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+    <ProjectReference>
+      <LinkLibraryDependencies>
+      </LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P503\AMD64\fp_x64.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P503\generic\fp_generic.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P503\P503.c" />
+    <ClCompile Include="..\..\src\random\random.c" />
+    <ClCompile Include="..\..\src\sha3\fips202.c" />
+    <ClCompile Include="..\..\src\sidh.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\internal.h" />
+    <ClInclude Include="..\..\src\P503\P503_api.h" />
+    <ClInclude Include="..\..\src\P503\P503_internal.h" />
+    <ClInclude Include="..\..\src\random\random.h" />
+    <ClInclude Include="..\..\src\sha3\fips202.h" />
+    <ClInclude Include="..\..\src\config.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters
new file mode 100644
index 0000000..78d3cbe
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp503.vcxproj.filters	
@@ -0,0 +1,81 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Source Files\generic">
+      <UniqueIdentifier>{c12e408e-2171-41d7-8815-33244cd7b1db}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\x64">
+      <UniqueIdentifier>{e81738a2-8bd8-449a-8918-07266c29f2b7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\random">
+      <UniqueIdentifier>{41f562a7-d335-4517-9c95-c6a4ce94c2f0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\random">
+      <UniqueIdentifier>{d2fd7e3f-38db-40e6-9994-0c979863e36b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\sha3">
+      <UniqueIdentifier>{75ac45a5-2e31-48af-986d-44d27e6a2a42}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\random\random.c">
+      <Filter>Source Files\random</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sha3\fips202.c">
+      <Filter>Source Files\sha3</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sidh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P503\generic\fp_generic.c">
+      <Filter>Source Files\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P503\AMD64\fp_x64.c">
+      <Filter>Source Files\x64</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P503\P503.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\random\random.h">
+      <Filter>Header Files\random</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\sha3\fips202.h">
+      <Filter>Source Files\sha3</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P503\P503_api.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P503\P503_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj
new file mode 100644
index 0000000..285372f
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj	
@@ -0,0 +1,584 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{48010B78-5594-4FE9-81AC-909B670C1516}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>isoECClib</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>P546</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <LinkTimeCodeGeneration>
+      </LinkTimeCodeGeneration>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+    <ProjectReference>
+      <LinkLibraryDependencies>
+      </LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P546\AMD64\fp_x64.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P546\generic\fp_generic.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P546\P546.c" />
+    <ClCompile Include="..\..\src\random\random.c" />
+    <ClCompile Include="..\..\src\sha3\fips202.c" />
+    <ClCompile Include="..\..\src\sidh.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\internal.h" />
+    <ClInclude Include="..\..\src\P546\P546_api.h" />
+    <ClInclude Include="..\..\src\P546\P546_internal.h" />
+    <ClInclude Include="..\..\src\random\random.h" />
+    <ClInclude Include="..\..\src\sha3\fips202.h" />
+    <ClInclude Include="..\..\src\config.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters
new file mode 100644
index 0000000..844bde6
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp546.vcxproj.filters	
@@ -0,0 +1,81 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Source Files\generic">
+      <UniqueIdentifier>{c12e408e-2171-41d7-8815-33244cd7b1db}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\x64">
+      <UniqueIdentifier>{e81738a2-8bd8-449a-8918-07266c29f2b7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\random">
+      <UniqueIdentifier>{41f562a7-d335-4517-9c95-c6a4ce94c2f0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\random">
+      <UniqueIdentifier>{d2fd7e3f-38db-40e6-9994-0c979863e36b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\sha3">
+      <UniqueIdentifier>{75ac45a5-2e31-48af-986d-44d27e6a2a42}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\random\random.c">
+      <Filter>Source Files\random</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sha3\fips202.c">
+      <Filter>Source Files\sha3</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sidh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P546\P546.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P546\AMD64\fp_x64.c">
+      <Filter>Source Files\x64</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P546\generic\fp_generic.c">
+      <Filter>Source Files\generic</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\random\random.h">
+      <Filter>Header Files\random</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\sha3\fips202.h">
+      <Filter>Source Files\sha3</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P546\P546_api.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P546\P546_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj
new file mode 100644
index 0000000..976809f
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj	
@@ -0,0 +1,577 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{ED1BA17A-58EA-4D9F-9B19-7061395E22BB}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>isoECClib</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>P610</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <LinkTimeCodeGeneration>
+      </LinkTimeCodeGeneration>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+    <ProjectReference>
+      <LinkLibraryDependencies>
+      </LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P610\AMD64\fp_x64.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P610\generic\fp_generic.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P610\P610.c" />
+    <ClCompile Include="..\..\src\random\random.c" />
+    <ClCompile Include="..\..\src\sha3\fips202.c" />
+    <ClCompile Include="..\..\src\sidh.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\config.h" />
+    <ClInclude Include="..\..\src\internal.h" />
+    <ClInclude Include="..\..\src\P610\P610_api.h" />
+    <ClInclude Include="..\..\src\P610\P610_internal.h" />
+    <ClInclude Include="..\..\src\random\random.h" />
+    <ClInclude Include="..\..\src\sha3\fips202.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters
new file mode 100644
index 0000000..8d9ac78
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp610.vcxproj.filters	
@@ -0,0 +1,81 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Source Files\generic">
+      <UniqueIdentifier>{c12e408e-2171-41d7-8815-33244cd7b1db}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\x64">
+      <UniqueIdentifier>{e81738a2-8bd8-449a-8918-07266c29f2b7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\random">
+      <UniqueIdentifier>{41f562a7-d335-4517-9c95-c6a4ce94c2f0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\random">
+      <UniqueIdentifier>{d2fd7e3f-38db-40e6-9994-0c979863e36b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\sha3">
+      <UniqueIdentifier>{597545a3-c4e5-4065-9d91-c7bec60b6da4}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\random\random.c">
+      <Filter>Source Files\random</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sha3\fips202.c">
+      <Filter>Source Files\sha3</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P610\AMD64\fp_x64.c">
+      <Filter>Source Files\x64</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P610\generic\fp_generic.c">
+      <Filter>Source Files\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sidh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P610\P610.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\random\random.h">
+      <Filter>Header Files\random</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\sha3\fips202.h">
+      <Filter>Source Files\sha3</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P610\P610_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P610\P610_api.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj
new file mode 100644
index 0000000..cbdb6fb
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj	
@@ -0,0 +1,572 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{F7447653-7518-4BF0-934D-65801C74D42A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>isoECClib</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>P697</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp697\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp697\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\SIDHp697\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp697\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp697\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp610\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\SIDHp697\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <LinkTimeCodeGeneration>
+      </LinkTimeCodeGeneration>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+    <ProjectReference>
+      <LinkLibraryDependencies>
+      </LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P697\AMD64\fp_x64.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P697\generic\fp_generic.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P697\P697.c" />
+    <ClCompile Include="..\..\src\random\random.c" />
+    <ClCompile Include="..\..\src\sha3\fips202.c" />
+    <ClCompile Include="..\..\src\sidh.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\config.h" />
+    <ClInclude Include="..\..\src\internal.h" />
+    <ClInclude Include="..\..\src\P697\P697_api.h" />
+    <ClInclude Include="..\..\src\P697\P697_internal.h" />
+    <ClInclude Include="..\..\src\random\random.h" />
+    <ClInclude Include="..\..\src\sha3\fips202.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters
new file mode 100644
index 0000000..c15458a
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp697.vcxproj.filters	
@@ -0,0 +1,81 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Source Files\generic">
+      <UniqueIdentifier>{c12e408e-2171-41d7-8815-33244cd7b1db}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\x64">
+      <UniqueIdentifier>{e81738a2-8bd8-449a-8918-07266c29f2b7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\random">
+      <UniqueIdentifier>{41f562a7-d335-4517-9c95-c6a4ce94c2f0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\random">
+      <UniqueIdentifier>{d2fd7e3f-38db-40e6-9994-0c979863e36b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\sha3">
+      <UniqueIdentifier>{597545a3-c4e5-4065-9d91-c7bec60b6da4}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\random\random.c">
+      <Filter>Source Files\random</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sha3\fips202.c">
+      <Filter>Source Files\sha3</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sidh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P697\P697.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P697\generic\fp_generic.c">
+      <Filter>Source Files\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P697\AMD64\fp_x64.c">
+      <Filter>Source Files\x64</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\random\random.h">
+      <Filter>Header Files\random</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\sha3\fips202.h">
+      <Filter>Source Files\sha3</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P697\P697_api.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P697\P697_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj
new file mode 100644
index 0000000..c26ea03
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj	
@@ -0,0 +1,598 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8283DD76-E88A-4B63-ABDE-33F014178413}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>isoECClib</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>P751</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <IntrinsicFunctions>false</IntrinsicFunctions>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+    <Lib>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <LinkTimeCodeGeneration>
+      </LinkTimeCodeGeneration>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+    <ProjectReference>
+      <LinkLibraryDependencies>
+      </LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalDependencies>bcrypt.lib</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P751\AMD64\fp_x64.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+      </ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P751\generic\fp_generic.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P751\P751.c" />
+    <ClCompile Include="..\..\src\random\random.c" />
+    <ClCompile Include="..\..\src\sha3\fips202.c" />
+    <ClCompile Include="..\..\src\sidh.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\config.h" />
+    <ClInclude Include="..\..\src\internal.h" />
+    <ClInclude Include="..\..\src\P751\P751_api.h" />
+    <ClInclude Include="..\..\src\P751\P751_internal.h" />
+    <ClInclude Include="..\..\src\random\random.h" />
+    <ClInclude Include="..\..\src\sha3\fips202.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters
new file mode 100644
index 0000000..348c901
--- /dev/null
+++ b/SIKE_sw/Visual Studio/SIDH/SIDHp751.vcxproj.filters	
@@ -0,0 +1,81 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Source Files\generic">
+      <UniqueIdentifier>{c12e408e-2171-41d7-8815-33244cd7b1db}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\x64">
+      <UniqueIdentifier>{e81738a2-8bd8-449a-8918-07266c29f2b7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\random">
+      <UniqueIdentifier>{41f562a7-d335-4517-9c95-c6a4ce94c2f0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\random">
+      <UniqueIdentifier>{d2fd7e3f-38db-40e6-9994-0c979863e36b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\sha3">
+      <UniqueIdentifier>{597545a3-c4e5-4065-9d91-c7bec60b6da4}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\random\random.c">
+      <Filter>Source Files\random</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\ec_isogeny.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fpx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sha3\fips202.c">
+      <Filter>Source Files\sha3</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\sidh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P751\AMD64\fp_x64.c">
+      <Filter>Source Files\x64</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P751\generic\fp_generic.c">
+      <Filter>Source Files\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\P751\P751.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\random\random.h">
+      <Filter>Header Files\random</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\sha3\fips202.h">
+      <Filter>Source Files\sha3</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P751\P751_api.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\P751\P751_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj
new file mode 100644
index 0000000..09521b0
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj	
@@ -0,0 +1,432 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\arith_tests-p377.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp377.vcxproj">
+      <Project>{05cef530-f410-4c21-ac70-a7ef991dee6a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{10C4B543-0224-43D3-B84D-390665AA6C25}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fp_tests</RootNamespace>
+    <ProjectName>arith_tests-P377</ProjectName>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters
new file mode 100644
index 0000000..e5e4541
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P377.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\arith_tests-p377.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj
new file mode 100644
index 0000000..9dd17c1
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj	
@@ -0,0 +1,432 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\arith_tests-p434.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp434.vcxproj">
+      <Project>{10224e47-baef-430e-a8a0-969cc6ceb96b}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8944AC47-A218-4F4D-8AF1-AF704160A727}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fp_tests</RootNamespace>
+    <ProjectName>arith_tests-P434</ProjectName>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters
new file mode 100644
index 0000000..66b8267
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P434.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\arith_tests-p434.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj
new file mode 100644
index 0000000..3a7b23d
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj	
@@ -0,0 +1,432 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\arith_tests-p503.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp503.vcxproj">
+      <Project>{bbc8647d-b9e2-469f-a9a4-bb55b614adbe}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{464B689B-7C93-47A2-B2F5-FE162A4EF404}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fp_tests</RootNamespace>
+    <ProjectName>arith_tests-P503</ProjectName>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters
new file mode 100644
index 0000000..4716cee
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P503.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\arith_tests-p503.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj
new file mode 100644
index 0000000..9da4cc2
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj	
@@ -0,0 +1,432 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\arith_tests-p546.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp546.vcxproj">
+      <Project>{48010b78-5594-4fe9-81ac-909b670c1516}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{5572CD5B-7F2F-4F44-B7AC-844291850C6E}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fp_tests</RootNamespace>
+    <ProjectName>arith_tests-P546</ProjectName>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp503\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters
new file mode 100644
index 0000000..9abce0e
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P546.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\arith_tests-p546.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj
new file mode 100644
index 0000000..b0ed324
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj	
@@ -0,0 +1,432 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\arith_tests-p610.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp610.vcxproj">
+      <Project>{ed1ba17a-58ea-4d9f-9b19-7061395e22bb}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{2A6A9BF4-B07F-4F2F-B418-6C39E54F8C6F}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fp_tests</RootNamespace>
+    <ProjectName>arith_tests-P610</ProjectName>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters
new file mode 100644
index 0000000..fc52aa6
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P610.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\arith_tests-p610.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj
new file mode 100644
index 0000000..4414eae
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj	
@@ -0,0 +1,432 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\arith_tests-p697.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp697.vcxproj">
+      <Project>{f7447653-7518-4bf0-934d-65801c74d42a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{C0892335-7EB7-48D4-83A1-500953D0526B}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fp_tests</RootNamespace>
+    <ProjectName>arith_tests-P697</ProjectName>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters
new file mode 100644
index 0000000..f1efc19
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P697.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\arith_tests-p697.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj
new file mode 100644
index 0000000..28d7031
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj	
@@ -0,0 +1,432 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\arith_tests-p751.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp751.vcxproj">
+      <Project>{8283dd76-e88a-4b63-abde-33f014178413}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{C9639168-C3FF-4427-BC3B-D907FF11DE73}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fp_tests</RootNamespace>
+    <ProjectName>arith_tests-P751</ProjectName>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp751\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_; _GENERIC_;</PreprocessorDefinitions>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <Optimization>Disabled</Optimization>
+    </ClCompile>
+    <Link>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters
new file mode 100644
index 0000000..456616e
--- /dev/null
+++ b/SIKE_sw/Visual Studio/arith_tests/arith_tests-P751.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\arith_tests-p751.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj
new file mode 100644
index 0000000..4508e20
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj	
@@ -0,0 +1,487 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_extras.c" />
+    <ClCompile Include="..\..\tests\test_SIKEp377.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp377.vcxproj">
+      <Project>{05cef530-f410-4c21-ac70-a7ef991dee6a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{0D497554-D408-4061-BA26-2A65F4272841}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>kex_tests</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>test-SIKEp377</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp377\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp434\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <ProjectReference>
+      <LinkLibraryDependencies>true</LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_;  _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters
new file mode 100644
index 0000000..493d88c
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp377.vcxproj.filters	
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp377.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj
new file mode 100644
index 0000000..a7563b7
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj	
@@ -0,0 +1,483 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_extras.c" />
+    <ClCompile Include="..\..\tests\test_SIKEp434.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp434.vcxproj">
+      <Project>{e46fd055-7619-4c50-8360-fa3bc2f650fb}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{53B2CD97-2FE6-4927-86A7-B16E436CFBD5}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>kex_tests</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>test-SIKEp434</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp434\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <ProjectReference>
+      <LinkLibraryDependencies>true</LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_;  _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters
new file mode 100644
index 0000000..c8ef8f7
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp434.vcxproj.filters	
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp434.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj
new file mode 100644
index 0000000..02557a3
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj	
@@ -0,0 +1,483 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp503.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp503.vcxproj">
+      <Project>{bbc8647d-b9e2-469f-a9a4-bb55b614adbe}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{EF9FE361-D94D-4CE0-8873-739A925326A3}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>kex_tests</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>test-SIKEp503</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <ProjectReference>
+      <LinkLibraryDependencies>true</LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_;  _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters
new file mode 100644
index 0000000..a38608b
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp503.vcxproj.filters	
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp503.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj
new file mode 100644
index 0000000..2bb1ce8
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj	
@@ -0,0 +1,483 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_extras.c" />
+    <ClCompile Include="..\..\tests\test_SIKEp546.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp546.vcxproj">
+      <Project>{48010b78-5594-4fe9-81ac-909b670c1516}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{E52D3FE9-FD9F-4D93-9712-8172DB469831}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>kex_tests</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>test-SIKEp546</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp546\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp503\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <ProjectReference>
+      <LinkLibraryDependencies>true</LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_;  _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters
new file mode 100644
index 0000000..90a64f2
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp546.vcxproj.filters	
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp546.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj
new file mode 100644
index 0000000..997f20e
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj	
@@ -0,0 +1,487 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_extras.c" />
+    <ClCompile Include="..\..\tests\test_SIKEp610.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp610.vcxproj">
+      <Project>{ed1ba17a-58ea-4d9f-9b19-7061395e22bb}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{DC10CB31-A905-402E-B466-46ADCD1AD61C}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>kex_tests</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>test-SIKEp610</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <ProjectReference>
+      <LinkLibraryDependencies>true</LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_;  _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters
new file mode 100644
index 0000000..14944a1
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp610.vcxproj.filters	
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp610.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj
new file mode 100644
index 0000000..b596a73
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj	
@@ -0,0 +1,487 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_extras.c" />
+    <ClCompile Include="..\..\tests\test_SIKEp697.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp697.vcxproj">
+      <Project>{f7447653-7518-4bf0-934d-65801c74d42a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{1B4248D6-99FD-47E0-B91F-8EF78F3A5D7B}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>kex_tests</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>test-SIKEp697</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp697\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\SIDHp610\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <ProjectReference>
+      <LinkLibraryDependencies>true</LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_;  _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters
new file mode 100644
index 0000000..8873662
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp697.vcxproj.filters	
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp697.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj
new file mode 100644
index 0000000..68f51c2
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj	
@@ -0,0 +1,487 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|Win32">
+      <Configuration>Fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Fast|x64">
+      <Configuration>Fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|Win32">
+      <Configuration>Generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Generic|x64">
+      <Configuration>Generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|Win32">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-fast|x64">
+      <Configuration>Optimized-fast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|Win32">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Optimized-generic|x64">
+      <Configuration>Optimized-generic</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp751.c" />
+    <ClCompile Include="..\..\tests\test_extras.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\SIDH\SIDHp751.vcxproj">
+      <Project>{8283dd76-e88a-4b63-abde-33f014178413}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{0D570915-7551-4D5F-A2F0-A4A6200185F9}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>kex_tests</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+    <ProjectName>test-SIKEp751</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\test_SIKEp751\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <IgnoreAllDefaultLibraries>
+      </IgnoreAllDefaultLibraries>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <ProjectReference>
+      <LinkLibraryDependencies>true</LinkLibraryDependencies>
+    </ProjectReference>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _X86_; _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Fast|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _FAST_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__; _GENERIC_; _AMD64_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Optimized-generic|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>Disabled</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>__WINDOWS__;_OPTIMIZED_GENERIC_; _AMD64_;  _GENERIC_;</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters
new file mode 100644
index 0000000..668ae12
--- /dev/null
+++ b/SIKE_sw/Visual Studio/kem_tests/test-SIKEp751.vcxproj.filters	
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\tests\test_extras.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_SIKEp751.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\tests\test_sike.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\tests\test_extras.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/SIKE_sw/src/P377/AMD64/fp_x64.c b/SIKE_sw/src/P377/AMD64/fp_x64.c
new file mode 100644
index 0000000..d49c8f8
--- /dev/null
+++ b/SIKE_sw/src/P377/AMD64/fp_x64.c
@@ -0,0 +1,439 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: modular arithmetic optimized for x64 platforms for P377
+*********************************************************************************************/
+
+#include "../P377_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p377[NWORDS_FIELD];
+extern const uint64_t p377p1[NWORDS_FIELD]; 
+extern const uint64_t p377x2[NWORDS_FIELD];   
+extern const uint64_t p377x4[NWORDS_FIELD];
+
+
+__inline void mp_sub377_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
+#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p377x2)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub377_p2_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void mp_sub377_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
+#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p377x4)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub377_p4_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpadd377(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p377.
+  // Inputs: a, b in [0, 2*p377-1] 
+  // Output: c in [0, 2*p377-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p377x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p377x2)[i] & mask, carry, c[i]); 
+    } 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpadd377_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpsub377(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p377.
+  // Inputs: a, b in [0, 2*p377-1] 
+  // Output: c in [0, 2*p377-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p377x2)[i] & mask, borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpsub377_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpneg377(digit_t* a)
+{ // Modular negation, a = -a mod p377.
+  // Input/output: a in [0, 2*p377-1] 
+    unsigned int i, borrow = 0;
+    
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p377x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_377(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p377.
+  // Input : a in [0, 2*p377-1] 
+  // Output: c in [0, 2*p377-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p377
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p377)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+}  
+
+
+void fpcorrection377(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p377)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p377)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+        
+    (void)nwords;
+
+#if (OS_TARGET == OS_WIN)
+    digit_t t = 0;
+    uint128_t uv = {0};
+    unsigned int carry = 0;
+        
+    MULADD128(a[0], b[0], uv, carry, uv);
+    t += carry;
+    c[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[0], uv, carry, uv);
+    t += carry;
+    c[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[0], uv, carry, uv);
+    t += carry;
+    c[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[0], uv, carry, uv);
+    t += carry;
+    c[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[0], uv, carry, uv);
+    t += carry;
+    c[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[0], uv, carry, uv);
+    t += carry;
+    c[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[5], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[5], uv, carry, uv);
+    t += carry;
+    c[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[5], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[5], uv, carry, uv);
+    t += carry;
+    c[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[5], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[5], uv, carry, uv);
+    t += carry;
+    c[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[5], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[5], uv, carry, uv);
+    t += carry;
+    c[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    
+    MULADD128(a[5], b[5], uv, carry, uv);
+    c[10] = uv[0];
+    c[11] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+    
+    mul377_asm(a, b, c);
+
+#endif
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Montgomery reduction exploiting special form of the prime.
+  // mc = ma*R^-1 mod p377x2, where R = 2^384.
+  // If ma < 2^384*p377, the output mc is in the range [0, 2*p377-1].
+  // ma is assumed to be in Montgomery representation.
+        
+#if (OS_TARGET == OS_WIN)
+    unsigned int carry;
+    digit_t t = 0;
+    uint128_t uv = {0};
+    
+    mc[0] = ma[0];
+    mc[1] = ma[1];
+    MUL128(mc[0], ((digit_t*)p377p1)[2], uv);
+    ADDC(0, uv[0], ma[2], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = 0;
+
+    MULADD128(mc[0], ((digit_t*)p377p1)[3], uv, carry, uv);
+    MULADD128(mc[1], ((digit_t*)p377p1)[2], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[3], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p377p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p377p1)[3], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p377p1)[2], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[4], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p377p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p377p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p377p1)[3], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p377p1)[2], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[5], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[1], ((digit_t*)p377p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p377p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p377p1)[3], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p377p1)[2], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[6], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[2], ((digit_t*)p377p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p377p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p377p1)[3], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p377p1)[2], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[7], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[3], ((digit_t*)p377p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p377p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p377p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[8], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[4], ((digit_t*)p377p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p377p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[9], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[5], ((digit_t*)p377p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[10], carry, mc[4]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    ADDC(0, uv[1], ma[11], carry, mc[5]); 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    rdc377_asm(ma, mc);    
+
+#endif
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P377/AMD64/fp_x64_asm.S b/SIKE_sw/src/P377/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000..af1c4e4
--- /dev/null
+++ b/SIKE_sw/src/P377/AMD64/fp_x64_asm.S
@@ -0,0 +1,747 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny cryptography library 
+// Copyright (c) Microsoft Corporation
+//
+// Website: https://github.com/microsoft/PQCrypto-SIDH
+// Released under MIT license 
+//
+// Abstract: field arithmetic in x64 assembly for P377 on Linux
+//*******************************************************************************************  
+
+.intel_syntax noprefix  
+
+// Format function and variable names for Mac OS X
+#if defined(__APPLE__)
+    #define fmt(f)    _##f
+#else
+    #define fmt(f)    f
+#endif
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+// Define addition instructions
+#ifdef _MULX_
+#ifdef _ADX_
+
+#define ADD     adcx
+#define ADC     adcx
+
+#else
+
+#define ADD     add
+#define ADC     adc
+
+#endif    
+#endif
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpadd377_asm)
+fmt(fpadd377_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40]
+
+  mov    rcx, [rip+p377x2]
+  sub    r8, rcx
+  mov    rdi, [rip+p377x2+8]
+  sbb    r9, rdi
+  sbb    r10, rdi
+  mov    rsi, [rip+p377x2+24]
+  sbb    r11, rsi
+  mov    r14, [rip+p377x2+32]
+  sbb    r12, r14
+  mov    r15, [rip+p377x2+40]
+  sbb    r13, r15
+  sbb    rax, 0
+  
+  and    rcx, rax
+  and    rdi, rax
+  and    rsi, rax
+  and    r14, rax
+  and    r15, rax
+  
+  add    r8, rcx  
+  adc    r9, rdi  
+  adc    r10, rdi 
+  adc    r11, rsi 
+  adc    r12, r14   
+  adc    r13, r15
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpsub377_asm)
+fmt(fpsub377_asm):
+  push   r12
+  push   r13
+  
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40]
+  sbb    rax, 0  
+
+  mov    rcx, [rip+p377x2]
+  mov    rdi, [rip+p377x2+8]
+  mov    rsi, [rip+p377x2+24]
+  and    rcx, rax
+  and    rdi, rax
+  and    rsi, rax    
+  add    r8, rcx  
+  adc    r9, rdi  
+  adc    r10, rdi 
+  adc    r11, rsi
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  setc   cl  
+
+  mov    rdi, [rip+p377x2+32]
+  mov    rsi, [rip+p377x2+40]
+  and    rdi, rax
+  and    rsi, rax 
+  bt     rcx, 0  
+  adc    r12, rdi   
+  adc    r13, rsi
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  
+  pop    r13
+  pop    r12
+  ret
+
+
+///////////////////////////////////////////////////////////////// MACRO
+.macro SUB377_PX  P0  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rax, [reg_p1+32]
+  mov    rcx, [reg_p1+40]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    rax, [reg_p2+32] 
+  sbb    rcx, [reg_p2+40]
+
+  mov    rdi, [rip+\P0]
+  mov    rsi, [rip+\P0+8]
+  add    r8, rdi
+  adc    r9, rsi  
+  adc    r10, rsi 
+  mov    rdi, [rip+\P0+24]
+  mov    rsi, [rip+\P0+32]
+  adc    r11, rdi  
+  mov    rdi, [rip+\P0+40]  
+  adc    rax, rsi  
+  adc    rcx, rdi
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rax 
+  mov    [reg_p3+40], rcx
+  .endm
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 2*p377
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p377
+//*********************************************************************** 
+.global fmt(mp_sub377_p2_asm)
+fmt(mp_sub377_p2_asm):
+
+  SUB377_PX  p377x2
+  ret
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 4*p377
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p377
+//*********************************************************************** 
+.global fmt(mp_sub377_p4_asm)
+fmt(mp_sub377_p4_asm):
+
+  SUB377_PX  p377x4
+  ret
+
+
+#ifdef _MULX_
+    
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C and regs T1, T2, T0
+// Temps:   regs T0:T6
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    xor    rax, rax   
+    adox   \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adox   \T1, \T3
+           
+    mov    rdx, 8\M0
+    mulx   \T3, \T4, \M1     // T3:T4 = A1*B0
+    adox   \T2, rax 
+    xor    rax, rax   
+    mulx   \T5, \T6, 8\M1    // T5:T6 = A1*B1
+    adox   \T4, \T0
+    mov    8\C, \T4          // C1_final  
+    adcx   \T3, \T6      
+    mulx   \T6, \T0, 16\M1   // T6:T0 = A1*B2 
+    adox   \T3, \T1  
+    adcx   \T5, \T0     
+    adcx   \T6, rax 
+    adox   \T5, \T2	
+    
+    mov    rdx, 16\M0
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    adox   \T6, rax
+    xor    rax, rax 
+    mulx   \T2, \T4, 8\M1    // T2:T4 = A2*B1
+    adox   \T0, \T3   
+    mov    16\C, \T0         // C2_final 
+    adcx   \T1, \T5    
+    mulx   \T0, \T3, 16\M1   // T0:T3 = A2*B2
+    adcx   \T2, \T6  
+    adcx   \T0, rax
+    adox   \T1, \T4
+    adox   \T2, \T3
+    adox   \T0, rax
+.endm
+
+#else
+
+.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    add    \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adc    \T1, \T3
+           
+    mov    rdx, 8\M0
+    mulx   \T3, \T4, \M1     // T3:T4 = A1*B0
+    adc    \T2, 0   
+    mulx   \T5, \T6, 8\M1    // T5:T6 = A1*B1
+    add    \T4, \T0
+    mov    8\C, \T4          // C1_final
+    adc    \T3, \T1  
+    adc    \T5, \T2	    
+    mulx   \T2, \T1, 16\M1   // T2:T1 = A1*B2
+    adc    \T2, 0    
+
+    add    \T3, \T6  
+    adc    \T5, \T1     
+    adc    \T2, 0
+    
+    mov    rdx, 16\M0
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    add    \T0, \T3   
+    mov    16\C, \T0         // C2_final 
+    mulx   \T4, \T6, 8\M1    // T4:T6 = A2*B1
+    adc    \T1, \T5    
+    adc    \T2, \T4 
+    mulx   \T0, \T3, 16\M1   // T0:T3 = A2*B2 
+    adc    \T0, 0
+    add    \T1, \T6
+    adc    \T2, \T3
+    adc    \T0, 0
+.endm
+#endif
+
+
+//*****************************************************************************
+//  377-bit multiplication using Karatsuba (one level), schoolbook (one level)
+//***************************************************************************** 
+.global fmt(mul377_asm)
+fmt(mul377_asm):    
+    push   r14 
+    push   r15
+    mov    rcx, reg_p3 
+
+    // r8-r10 <- AH + AL, rax <- mask
+    xor    rax, rax
+    mov    r8, [reg_p1]
+    mov    r9, [reg_p1+8]
+    mov    r10, [reg_p1+16]
+    push   rbx 
+    push   rbp
+    add    r8, [reg_p1+24]
+    adc    r9, [reg_p1+32]
+    adc    r10, [reg_p1+40]
+    sbb    rax, 0
+    push   r12
+    push   r13 
+
+    // r11-r13 <- BH + BL, rbx <- mask
+    xor    rbx, rbx
+    mov    r11, [reg_p2]
+    mov    r12, [reg_p2+8]
+    mov    r13, [reg_p2+16]
+    sub    rsp, 48
+    add    r11, [reg_p2+24]
+    adc    r12, [reg_p2+32]
+    adc    r13, [reg_p2+40]
+    sbb    rbx, 0
+    mov    [rsp], r8
+    mov    [rsp+8], r9
+    mov    [rsp+16], r10
+    mov    [rsp+24], r11
+    mov    [rsp+32], r12
+    mov    [rsp+40], r13
+    
+    // r11-r13 <- masked (BH + BL)
+    and    r11, rax
+    and    r12, rax
+    and    r13, rax
+
+    // r8-r10 <- masked (AH + AL)
+    and    r8, rbx
+    and    r9, rbx
+    and    r10, rbx
+
+    // r8-r10 <- masked (AH + AL) + masked (AH + AL)
+    add    r8, r11
+    adc    r9, r12
+    adc    r10, r13
+
+    // [rcx+48] <- (AH+AL) x (BH+BL), low part 
+    MUL192_SCHOOL  [rsp], [rsp+24], [rcx+48], r15, rbx, rbp, r11, r12, r13, r14 
+    mov    [rcx+72], rbx         
+    mov    [rcx+80], rbp         
+    mov    [rcx+88], r15         
+
+    // [rcx] <- AL x BL
+    MUL192_SCHOOL  [reg_p1], [reg_p2], [rcx], r15, rbx, rbp, r11, r12, r13, r14     // Result C0-C2 
+    mov    [rcx+24], rbx         
+    mov    [rcx+32], rbp         
+    mov    [rcx+40], r15         
+
+    // [rsp], rbx, rbp, r15 <- AH x BH 
+    MUL192_SCHOOL  [reg_p1+24], [reg_p2+24], [rsp], r15, rbx, rbp, r11, r12, r13, r14
+    
+    // r8-r10 <- (AH+AL) x (BH+BL), final step
+    add    r8, [rcx+72]
+    adc    r9, [rcx+80]
+    adc    r10, [rcx+88]
+    
+    // r11-r13, r8-r10 <- (AH+AL) x (BH+BL) - ALxBL
+    mov    r11, [rcx+48]
+    mov    r12, [rcx+56]
+    mov    r13, [rcx+64]
+    sub    r11, [rcx]
+    sbb    r12, [rcx+8]
+    sbb    r13, [rcx+16]
+    sbb    r8, [rcx+24]
+    sbb    r9, [rcx+32]
+    sbb    r10, [rcx+40]
+    
+    // r11-r13, r8-r10 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+    mov    rdi, [rsp]
+    mov    rsi, [rsp+8]
+    mov    rdx, [rsp+16]
+    add    rsp, 48  
+    sub    r11, rdi
+    sbb    r12, rsi
+    sbb    r13, rdx
+    sbb    r8, rbx
+    sbb    r9, rbp
+    sbb    r10, r15
+    
+    add    r11, [rcx+24]
+    adc    r12, [rcx+32]
+    adc    r13, [rcx+40]
+    mov    [rcx+24], r11    // Result C3-C5
+    mov    [rcx+32], r12
+    mov    [rcx+40], r13
+    pop    r13
+    pop    r12
+    adc    r8, rdi 
+    adc    r9, rsi
+    adc    r10, rdx
+    mov    [rcx+48], r8    // Result C6-C8
+    mov    [rcx+56], r9 
+    mov    [rcx+64], r10
+    adc    rbx, 0
+    adc    rbp, 0
+    adc    r15, 0
+    mov    [rcx+72], rbx   // Result C9-C11
+    mov    [rcx+80], rbp 
+    mov    [rcx+88], r15    
+    
+    pop    rbp  
+    pop    rbx  
+    pop    r15 
+    pop    r14
+    ret
+
+#else
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+#endif
+
+
+#ifdef _MULX_
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  regs I0, I1 and memory pointer M1
+// Outputs: regs T0:T4
+// Temp:    regs T0:T5
+/////////////////////////////////////////////////////////////////
+
+.macro MUL128x192_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    ADD    \T1, \T4               
+    ADC    \T2, \T5 
+    ADC    \T3, rax   
+    
+    xor    rax, rax
+    mov    rdx, \I1 
+    mulx   \T4, \T5, \M1 
+    ADD    \T1, \T5            // T1 <- C1_final 
+    ADC    \T2, \T4     
+    mulx   \T4, \T5, 8\M1
+    ADC    \T3, rax
+    ADD    \T2, \T5            // T2 <- C2_final  
+    ADC    \T3, \T4       
+    mulx   \T4, \T5, 16\M1  
+    ADC    \T4, rax       
+    ADD    \T3, \T5            // T3 <- C3_final  
+    ADC    \T4, rax            // T4 <- C4_final 
+.endm
+
+  
+//**************************************************************************************
+//  Montgomery reduction, shifted
+//  Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015  
+//  Operation: c [reg_p2] = a [reg_p1]
+//************************************************************************************** 
+.global fmt(rdc377_asm)
+fmt(rdc377_asm):
+
+    // a[0-1] x 2xp377p1_nz --> result: r8:r12
+    mov    rdx, [reg_p1]
+    mov    rcx, [reg_p1+8]
+    mulx   r9, r8, [rip+p377p1x2]   // result r8 
+    push   r15
+    push   rbp
+    push   r14 
+    push   r12
+    mulx   r10, r12, [rip+p377p1x2+8] 
+    push   rbx 
+    push   r13
+    MUL128x192_SCHOOL rdx, rcx, [rip+p377p1x2], r8, r9, r10, r11, r12, r13     
+        
+    xor    rdx, rdx
+    shrd   rdx, r8, 1 
+    shrd   r8, r9, 1 
+    shrd   r9, r10, 1 
+    shrd   r10, r11, 1 
+    shrd   r11, r12, 1 
+    shr    r12, 1
+    add    rdx, [reg_p1+16]
+    adc    r8, [reg_p1+24]  
+    adc    r9, [reg_p1+32] 
+    adc    r10, [reg_p1+40]      
+    mulx   rbx, rcx, [rip+p377p1x2]   // result rcx 
+    adc    r11, [reg_p1+48] 
+    adc    r12, [reg_p1+56]  
+    mov    [reg_p2], r9  
+    mov    [reg_p2+8], r10  
+    mov    [reg_p2+16], r11
+    mov    r9, [reg_p1+64]  
+    mov    r10, [reg_p1+72]
+    mov    r11, [reg_p1+80]   
+    mov    rdi, [reg_p1+88]
+    mulx   rbp, r15, [rip+p377p1x2+8] 
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    adc    rdi, 0 
+
+    // a[2-3] x 2xp377p1_nz --> result: rcx, rbx, rbp, r14:r15
+    MUL128x192_SCHOOL rdx, r8, [rip+p377p1x2], rcx, rbx, rbp, r14, r15, r13
+
+    xor    rdx, rdx
+    shrd   rdx, rcx, 1 
+    shrd   rcx, rbx, 1 
+    shrd   rbx, rbp, 1 
+    shrd   rbp, r14, 1 
+    shrd   r14, r15, 1 
+    shr    r15, 1
+    add    rdx, [reg_p2] 
+    adc    rcx, [reg_p2+8]  
+    adc    rbx, [reg_p2+16] 
+    mov    [reg_p2+16], rbx  
+    adc    r12, rbp
+    mulx   rbp, rbx, [rip+p377p1x2]   // result rbx   
+    adc    r14, r9  
+    adc    r15, r10  
+    mulx   r10, r8, [rip+p377p1x2+8] 
+    adc    r11, 0
+    adc    rdi, 0
+
+    // a[4-5] x 2xp377p1_nz --> result: rbx, rbp, r10:r8
+    MUL128x192_SCHOOL rdx, rcx, [rip+p377p1x2], rbx, rbp, r10, r9, r8, r13  
+
+    xor    rdx, rdx
+    pop    r13
+    shrd   rdx, rbx, 1 
+    shrd   rbx, rbp, 1 
+    shrd   rbp, r10, 1 
+    shrd   r10, r9, 1 
+    shrd   r9, r8, 1 
+    shr    r8, 1
+    add    rdx, [reg_p2+16]
+    adc    rbx, r12 
+    mov    [reg_p2+8], rbx  
+    pop    rbx  
+    pop    r12          
+    adc    rbp, r14    
+    pop    r14  
+    mov    [reg_p2+16], rbp 
+    pop    rbp               
+    adc    r10, r15   
+    pop    r15      
+    mov    [reg_p2+24], r10          
+    adc    r9, r11  
+    adc    r8, rdi
+    mov    [reg_p2], rdx       // Final result c0-c5 
+    mov    [reg_p2+32], r9  
+    mov    [reg_p2+40], r8
+    ret
+
+  #else
+
+  # error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+  #endif
+
+
+//***********************************************************************
+//  377-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_add377_asm)
+fmt(mp_add377_asm): 
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  
+  mov    r8, [reg_p1+32]
+  mov    r9, [reg_p1+40]
+  adc    r8, [reg_p2+32] 
+  adc    r9, [reg_p2+40] 
+  mov    [reg_p3+32], r8
+  mov    [reg_p3+40], r9
+  ret
+
+
+//***************************************************************************
+//  2x377-bit multiprecision subtraction/addition
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + p377*p377*2^16
+//*************************************************************************** 
+.global fmt(mp_subadd377x2_asm)
+fmt(mp_subadd377x2_asm):
+  push   r12
+  push   r13
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    rcx, [rip+p377x16p] 
+  add    r8, rcx 
+  adc    r9, 0 
+  adc    r10, 0 
+  mov    rcx, [rip+p377x16p+24] 
+  adc    r11, rcx 
+  mov    rcx, [rip+p377x16p+32] 
+  adc    r12, rcx 
+  mov    rcx, [rip+p377x16p+40] 
+  adc    r13, rcx 
+  setc   al  
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  setc   cl  
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+   
+  mov    r8, [reg_p1+48]
+  mov    r9, [reg_p1+56] 
+  mov    r10, [reg_p1+64]
+  mov    r11, [reg_p1+72] 
+  mov    r12, [reg_p1+80]
+  mov    r13, [reg_p1+88]
+  bt     rax, 0  
+  adc    r8, [rip+p377x16p+48] 
+  adc    r9, [rip+p377x16p+56]
+  adc    r10, [rip+p377x16p+64] 
+  adc    r11, [rip+p377x16p+72]  
+  adc    r12, [rip+p377x16p+80]
+  adc    r13, [rip+p377x16p+88]  
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+48] 
+  sbb    r9, [reg_p2+56]
+  sbb    r10, [reg_p2+64] 
+  sbb    r11, [reg_p2+72]  
+  sbb    r12, [reg_p2+80]
+  sbb    r13, [reg_p2+88]
+  
+  mov    [reg_p3+48], r8
+  mov    [reg_p3+56], r9
+  mov    [reg_p3+64], r10
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], r12
+  mov    [reg_p3+88], r13
+  
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Double 2x377-bit multiprecision subtraction
+//  Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_dblsub377x2_asm)
+fmt(mp_dblsub377x2_asm):
+  push   r12
+  push   r13
+  
+  mov    r8, [reg_p3]
+  mov    r9, [reg_p3+8]
+  mov    r10, [reg_p3+16]
+  mov    r11, [reg_p3+24]
+  mov    r12, [reg_p3+32]
+  mov    r13, [reg_p3+40]
+  sub    r8, [reg_p1]
+  sbb    r9, [reg_p1+8] 
+  sbb    r10, [reg_p1+16] 
+  sbb    r11, [reg_p1+24] 
+  sbb    r12, [reg_p1+32] 
+  sbb    r13, [reg_p1+40] 
+  setc   al  
+  sub    r8, [reg_p2]
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40]
+  setc   cl  
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+    
+  mov    r8, [reg_p3+48]
+  mov    r9, [reg_p3+56]
+  mov    r10, [reg_p3+64]
+  mov    r11, [reg_p3+72]
+  mov    r12, [reg_p3+80]
+  mov    r13, [reg_p3+88]
+  bt     rax, 0  
+  sbb    r8, [reg_p1+48] 
+  sbb    r9, [reg_p1+56] 
+  sbb    r10, [reg_p1+64] 
+  sbb    r11, [reg_p1+72] 
+  sbb    r12, [reg_p1+80] 
+  sbb    r13, [reg_p1+88]
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+48] 
+  sbb    r9, [reg_p2+56] 
+  sbb    r10, [reg_p2+64] 
+  sbb    r11, [reg_p2+72] 
+  sbb    r12, [reg_p2+80] 
+  sbb    r13, [reg_p2+88] 
+  mov    [reg_p3+48], r8
+  mov    [reg_p3+56], r9
+  mov    [reg_p3+64], r10
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], r12
+  mov    [reg_p3+88], r13
+  
+  pop    r13
+  pop    r12
+  ret
\ No newline at end of file
diff --git a/SIKE_sw/src/P377/P377.c b/SIKE_sw/src/P377/P377.c
new file mode 100644
index 0000000..17a688e
--- /dev/null
+++ b/SIKE_sw/src/P377/P377.c
@@ -0,0 +1,114 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P377
+*********************************************************************************************/ 
+
+#include "P377_api.h" 
+#include "P377_internal.h"
+#include "../internal.h"
+
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). 
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. 
+// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32.
+// For example, a 377-bit field element is represented with Ceil(377 / 64) = 6 64-bit digits or Ceil(377 / 32) = 12 32-bit digits.
+
+//
+// Curve isogeny system "SIDHp377". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p377^2), where A=6, B=1, C=1 and p377 = 2^191*3^117-1
+//
+         
+const uint64_t p377[NWORDS64_FIELD]              = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x0B46D546BC2A5699, 0xA879CC6988CE7CF5, 0x015B702E0C542196 };
+const uint64_t p377x2[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x168DAA8D7854AD32, 0x50F398D3119CF9EA, 0x02B6E05C18A8432D }; 
+const uint64_t p377x4[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x2D1B551AF0A95A65, 0xA1E731A62339F3D4, 0x056DC0B83150865A };  
+const uint64_t p377p1[NWORDS64_FIELD]            = { 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0B46D546BC2A5699, 0xA879CC6988CE7CF5, 0x015B702E0C542196 };
+const uint64_t p377p1x2[NWORDS64_FIELD/2]        = { 0x168DAA8D7854AD33, 0x50F398D3119CF9EA, 0x02B6E05C18A8432D };
+const uint64_t p377x16p[2*NWORDS64_FIELD]        = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x972557287AB52CD0, 0xF0C672CEE630615E, 0xD491FA3E757BCD2A, 
+                                                     0x2830123FBA97E0A3, 0x44E67AC0C81C9117, 0x942C5A8EFDDE690C, 0x63BDE5C206F0021D, 0xAA49E8B73CCD899E, 0x001D7894DFDBF251 }; 
+// Order of Alice's subgroup
+const uint64_t Alice_order[NWORDS64_ORDER]       = { 0x0000000000000000, 0x0000000000000000, 0x8000000000000000 };
+// Order of Bob's subgroup
+const uint64_t Bob_order[NWORDS64_ORDER]         = { 0x168DAA8D7854AD33, 0x50F398D3119CF9EA, 0x02B6E05C18A8432D };
+// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p377^2), expressed in Montgomery representation
+const uint64_t A_gen[6*NWORDS64_FIELD]           = { 0x8AE392AA8312F880, 0xDB7F6BA38CC56011, 0x896F67240AD52C67, 0x21B9C0BD6C0584FF, 0xF064B97DDD0B2BD4, 0x0102EA98B786D4CC,   // XPA0
+                                                     0x583DE90ED3D09845, 0x131B1BDFBBE25620, 0x054B16A62F3D59F1, 0x1C3A458EEFFD4A0B, 0x1FBC000608BE1F7A, 0x00225F4BEEF34209,   // XPA1
+                                                     0x8AA130E98FE00DE5, 0x6B54CC5A0A538778, 0x46D96D4F04F6605D, 0x069A3CAB971973AE, 0x8923D0F2112DA219, 0x0085C1C47AD21A2A,   // XQA0
+                                                     0x50981EA202812D84, 0x61883F048CF1682A, 0x2DBC9EC88567E391, 0xD5E238E99DD189E7, 0x1BFE095BC910EA7D, 0x00203E87957453EB,   // XQA1
+                                                     0x296CA63890082DB3, 0x02E16D4D70C2C55A, 0xD4B8FE9CB9481E99, 0xF95F9798C3BECDFB, 0x71B3A2D8A38CB84B, 0x0118DD7682525B04,   // XRA0
+                                                     0xF64DD26CEC6E9DF5, 0xBC02B5979FF4F94C, 0x5D8B16849129DE49, 0xE44435C64BEFB9E9, 0x1077D183B5A4727B, 0x0019A2DF755CF268 }; // XRA1
+// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p377^2), expressed in Montgomery representation
+const uint64_t B_gen[6*NWORDS64_FIELD]           = { 0x436424EE3C9446F8, 0xB013A914D96E976D, 0x30C376697D926658, 0xE99792AFAA115E68, 0x935421EF522A946B, 0x0032474AECB8799E,   // XPB0 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XPB1
+                                                     0x5EDE445E538850BC, 0x5BA7DAD976595394, 0xF01F46B8519CD118, 0x9DFA5CB5B40775A1, 0xC7E535F99811B56B, 0x0025BF8D8B00A170,   // XQB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XQB1
+                                                     0xA35AA9C8EA887C42, 0xE5A1BF165361C81A, 0x719BB1C6D6C727C7, 0x348590861EB46882, 0xB57273062A50C238, 0x002C53E0163A1C34,   // XRB0
+                                                     0xF12E87A9F00803D8, 0x49C966997253584C, 0x58BBD82219B363ED, 0x6232DFE1A85929F5, 0xC85434A71BF3CC30, 0x005DE7FAB257510D }; // XRB1
+// Montgomery constant Montgomery_R2 = (2^384)^2 mod p377
+const uint64_t Montgomery_R2[NWORDS64_FIELD]     = { 0x826E131D3839C923, 0x54892C7B7D73E7F7, 0x3F8957D221B867A3, 0xD1217CD71D03BB94, 0xDCCBFB71E3AE5457, 0x00FCC56B6CD4B219 };                                                   
+// Value one in Montgomery representation 
+const uint64_t Montgomery_one[NWORDS64_FIELD]    = { 0x00000000000000BC, 0x0000000000000000, 0x0000000000000000, 0xB7FB600DD0E86746, 0x468DE27F885C3C0B, 0x00D99E2EF237555C };
+
+
+// Fixed parameters for isogeny tree computation
+const unsigned int strat_Alice[MAX_Alice-1] = { 
+38, 26, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 11, 7, 4, 2, 1, 1, 2,
+1, 1, 3, 2, 1, 1, 1, 1, 4, 3, 2, 1, 1, 1, 1, 2, 1, 1, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 
+4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 };
+
+const unsigned int strat_Bob[MAX_Bob-1] = { 
+54, 31, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 15, 8, 4, 2, 1, 1,
+2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 23, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 
+1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 9, 6, 4, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1 };
+           
+// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions
+#define fpcopy                        fpcopy377
+#define fpzero                        fpzero377
+#define fpadd                         fpadd377
+#define fpsub                         fpsub377
+#define fpneg                         fpneg377
+#define fpdiv2                        fpdiv2_377
+#define fpcorrection                  fpcorrection377
+#define fpmul_mont                    fpmul377_mont
+#define fpsqr_mont                    fpsqr377_mont
+#define fpinv_mont                    fpinv377_mont
+#define fpinv_chain_mont              fpinv377_chain_mont
+#define fp2copy                       fp2copy377
+#define fp2zero                       fp2zero377
+#define fp2add                        fp2add377
+#define fp2sub                        fp2sub377
+#define mp_sub_p2                     mp_sub377_p2
+#define mp_sub_p4                     mp_sub377_p4
+#define sub_p4                        mp_sub_p4
+#define fp2neg                        fp2neg377
+#define fp2div2                       fp2div2_377
+#define fp2correction                 fp2correction377
+#define fp2mul_mont                   fp2mul377_mont
+#define fp2sqr_mont                   fp2sqr377_mont
+#define fp2inv_mont                   fp2inv377_mont
+#define fp2inv_mont_ct                fp2inv377_mont_ct
+#define fp2inv_mont_bingcd            fp2inv377_mont_bingcd
+#define fpequal_non_constant_time     fpequal377_non_constant_time
+#define mp_add_asm                    mp_add377_asm
+#define mp_subaddx2_asm               mp_subadd377x2_asm
+#define mp_dblsubx2_asm               mp_dblsub377x2_asm
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp377
+#define crypto_kem_enc                crypto_kem_enc_SIKEp377
+#define crypto_kem_dec                crypto_kem_dec_SIKEp377
+#define random_mod_order_A            random_mod_order_A_SIDHp377
+#define random_mod_order_B            random_mod_order_B_SIDHp377
+#define EphemeralKeyGeneration_A      EphemeralKeyGeneration_A_SIDHp377
+#define EphemeralKeyGeneration_B      EphemeralKeyGeneration_B_SIDHp377
+#define EphemeralSecretAgreement_A    EphemeralSecretAgreement_A_SIDHp377
+#define EphemeralSecretAgreement_B    EphemeralSecretAgreement_B_SIDHp377
+
+#include "../fpx.c"
+#include "../ec_isogeny.c"
+#include "../sidh.c"    
+#include "../sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/src/P377/P377_api.h b/SIKE_sw/src/P377/P377_api.h
new file mode 100644
index 0000000..e564224
--- /dev/null
+++ b/SIKE_sw/src/P377/P377_api.h
@@ -0,0 +1,112 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: API header file for P377
+*********************************************************************************************/  
+
+#ifndef P377_API_H
+#define P377_API_H
+    
+
+/*********************** Key encapsulation mechanism API ***********************/
+
+#define CRYPTO_SECRETKEYBYTES     328    // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes
+#define CRYPTO_PUBLICKEYBYTES     288
+#define CRYPTO_BYTES               16    
+#define CRYPTO_CIPHERTEXTBYTES    304    // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes  
+
+// Algorithm name
+#define CRYPTO_ALGNAME "SIKEp377"  
+
+// SIKE's key generation
+// It produces a private key sk and computes the public key pk.
+// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 322 bytes)
+//          public key pk (CRYPTO_PUBLICKEYBYTES = 288 bytes) 
+int crypto_kem_keypair_SIKEp377(unsigned char *pk, unsigned char *sk);
+
+// SIKE's encapsulation
+// Input:   public key pk         (CRYPTO_PUBLICKEYBYTES = 288 bytes)
+// Outputs: shared secret ss      (CRYPTO_BYTES = 10 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 298 bytes)
+int crypto_kem_enc_SIKEp377(unsigned char *ct, unsigned char *ss, const unsigned char *pk);
+
+// SIKE's decapsulation
+// Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = 322 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 298 bytes) 
+// Outputs: shared secret ss      (CRYPTO_BYTES = 10 bytes)
+int crypto_kem_dec_SIKEp377(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);
+
+
+// Encoding of keys for KEM-based isogeny system "SIKEp377" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p377) are encoded in 48 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p377^2), where a and b are defined over GF(p377), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys sk consist of the concatenation of a 10-byte random value, a value in the range [0, 2^Floor(Log(2,3^117))-1] and the public key pk. In the SIKE API, 
+// private keys are encoded in 322 octets in little endian format. 
+// Public keys pk consist of 3 elements in GF(p377^2). In the SIKE API, pk is encoded in 288 octets. 
+// Ciphertexts ct consist of the concatenation of a public key value and a 10-byte value. In the SIKE API, ct is encoded in 288 + 10 = 298 octets.  
+// Shared keys ss consist of a value of 10 octets.
+
+
+/*********************** Ephemeral key exchange API ***********************/
+
+#define SIDH_SECRETKEYBYTES_A    24
+#define SIDH_SECRETKEYBYTES_B    24
+#define SIDH_PUBLICKEYBYTES     288
+#define SIDH_BYTES               96
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2010, 2010.
+// Extended version available at: http://eprint.iacr.org/2010/859  
+
+// Generation of Alice's secret key 
+// Outputs random value in [0, 2^191 - 1] to be used as Alice's private key
+void random_mod_order_A_SIDHp377(unsigned char* random_digits);
+
+// Generation of Bob's secret key 
+// Outputs random value in [0, 2^Floor(Log(2,3^117)) - 1] to be used as Bob's private key
+void random_mod_order_B_SIDHp377(unsigned char* random_digits);
+
+// Alice's ephemeral public key generation
+// Input:  a private key PrivateKeyA in the range [0, 2^191 - 1], stored in 24 bytes. 
+// Output: the public key PublicKeyA consisting of 3 GF(p377^2) elements encoded in 288 bytes.
+int EphemeralKeyGeneration_A_SIDHp377(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^117)) - 1], stored in 24 bytes. 
+// The public key consists of 3 GF(p377^2) elements encoded in 288 bytes.
+int EphemeralKeyGeneration_B_SIDHp377(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^191 - 1], stored in 24 bytes. 
+//         Bob's PublicKeyB consists of 3 GF(p377^2) elements encoded in 288 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p377^2) encoded in 96 bytes.
+int EphemeralSecretAgreement_A_SIDHp377(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^117)) - 1], stored in 24 bytes. 
+//         Alice's PublicKeyA consists of 3 GF(p377^2) elements encoded in 288 bytes.
+// Output: a shared secret SharedSecretB that consists of one element in GF(p377^2) encoded in 96 bytes.
+int EphemeralSecretAgreement_B_SIDHp377(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB);
+
+
+// Encoding of keys for KEX-based isogeny system "SIDHp377" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p377) are encoded in 48 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p377^2), where a and b are defined over GF(p377), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^191-1] and [0, 2^Floor(Log(2,3^117)) - 1], resp. In the SIDH API, 
+// Alice's and Bob's private keys are encoded in 24 octets in little endian format. 
+// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p377^2). In the SIDH API, they are encoded in 288 octets. 
+// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p377^2). In the SIDH API, they are encoded in 96 octets.
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/P377/P377_internal.h b/SIKE_sw/src/P377/P377_internal.h
new file mode 100644
index 0000000..896dbfb
--- /dev/null
+++ b/SIKE_sw/src/P377/P377_internal.h
@@ -0,0 +1,165 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for P377
+*********************************************************************************************/  
+
+#ifndef P377_INTERNAL_H
+#define P377_INTERNAL_H
+
+#include "../config.h"
+ 
+
+#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64)
+    #define NWORDS_FIELD    6               // Number of words of a 377-bit field element
+    #define p377_ZERO_WORDS 2               // Number of "0" digits in the least significant part of p377 + 1     
+#elif (TARGET == TARGET_x86)
+    #define NWORDS_FIELD    12 
+    #define p377_ZERO_WORDS 5
+#endif
+    
+
+// Basic constants
+
+#define NBITS_FIELD             377  
+#define MAXBITS_FIELD           384                
+#define MAXWORDS_FIELD          ((MAXBITS_FIELD+RADIX-1)/RADIX)     // Max. number of words to represent field elements
+#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 377-bit field element 
+#define NBITS_ORDER             192
+#define NWORDS_ORDER            ((NBITS_ORDER+RADIX-1)/RADIX)       // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)               // Number of 64-bit words of a 192-bit element 
+#define MAXBITS_ORDER           NBITS_ORDER                         
+#define ALICE                   0
+#define BOB                     1 
+#define OALICE_BITS             191  
+#define OBOB_BITS               186     
+#define OBOB_EXPON              117    
+#define MASK_ALICE              0x7F
+#define MASK_BOB                0x01 
+#define PRIME                   p377 
+#define PARAM_A                 6  
+#define PARAM_C                 1
+// Fixed parameters for isogeny tree computation
+#define MAX_INT_POINTS_ALICE    7                 
+#define MAX_INT_POINTS_BOB      8      
+#define MAX_Alice               95
+#define MAX_Bob                 117
+#define MSG_BYTES               16
+#define SECRETKEY_A_BYTES       ((OALICE_BITS + 7) / 8)
+#define SECRETKEY_B_BYTES       ((OBOB_BITS - 1 + 7) / 8)
+#define FP2_ENCODED_BYTES       2*((NBITS_FIELD + 7) / 8)
+
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t felm_t[NWORDS_FIELD];                                 // Datatype for representing 377-bit field elements (384-bit max.)
+typedef digit_t dfelm_t[2*NWORDS_FIELD];                              // Datatype for representing double-precision 2x377-bit field elements (2x384-bit max.) 
+typedef felm_t  f2elm_t[2];                                           // Datatype for representing quadratic extension field elements GF(p377^2)
+        
+typedef struct { f2elm_t X; f2elm_t Z; } point_proj;                  // Point representation in projective XZ Montgomery coordinates.
+typedef point_proj point_proj_t[1];
+
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// 377-bit multiprecision addition, c = a+b
+void mp_add377(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_add377_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// 377-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p
+extern void mp_sub377_p2(const digit_t* a, const digit_t* b, digit_t* c);
+extern void mp_sub377_p4(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_sub377_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+void mp_sub377_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+
+// 2x377-bit multiprecision subtraction followed by addition with p377*2^384, c = a-b+(p377*2^384) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_subadd377x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Double 2x377-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
+void mp_dblsub377x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void fpcopy377(const digit_t* a, digit_t* c);
+
+// Zeroing a field element, a = 0
+void fpzero377(digit_t* a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool fpequal377_non_constant_time(const digit_t* a, const digit_t* b); 
+
+// Modular addition, c = a+b mod p377
+extern void fpadd377(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpadd377_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular subtraction, c = a-b mod p377
+extern void fpsub377(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpsub377_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular negation, a = -a mod p377        
+extern void fpneg377(digit_t* a);  
+
+// Modular division by two, c = a/2 mod p377.
+void fpdiv2_377(const digit_t* a, digit_t* c);
+
+// Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1].
+void fpcorrection377(digit_t* a);
+
+// 377-bit Montgomery reduction, c = a mod p
+void rdc377_asm(digit_t* ma, digit_t* mc);
+            
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p377, where R=2^768
+void fpmul377_mont(const digit_t* a, const digit_t* b, digit_t* c);
+void mul377_asm(const digit_t* a, const digit_t* b, digit_t* c);
+   
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p377, where R=2^768
+void fpsqr377_mont(const digit_t* ma, digit_t* mc);
+
+// Field inversion, a = a^-1 in GF(p377)
+void fpinv377_mont(digit_t* a);
+
+// Chain to compute (p377-3)/4 using Montgomery arithmetic
+void fpinv377_chain_mont(digit_t* a);
+
+/************ GF(p^2) arithmetic functions *************/
+    
+// Copy of a GF(p377^2) element, c = a
+void fp2copy377(const f2elm_t a, f2elm_t c);
+
+// Zeroing a GF(p377^2) element, a = 0
+void fp2zero377(f2elm_t a);
+
+// GF(p377^2) negation, a = -a in GF(p377^2)
+void fp2neg377(f2elm_t a);
+
+// GF(p377^2) addition, c = a+b in GF(p377^2)
+extern void fp2add377(const f2elm_t a, const f2elm_t b, f2elm_t c);           
+
+// GF(p377^2) subtraction, c = a-b in GF(p377^2)
+extern void fp2sub377(const f2elm_t a, const f2elm_t b, f2elm_t c); 
+
+// GF(p377^2) division by two, c = a/2  in GF(p377^2) 
+void fp2div2_377(const f2elm_t a, f2elm_t c);
+
+// Modular correction, a = a in GF(p377^2)
+void fp2correction377(f2elm_t a);
+            
+// GF(p377^2) squaring using Montgomery arithmetic, c = a^2 in GF(p377^2)
+void fp2sqr377_mont(const f2elm_t a, f2elm_t c);
+ 
+// GF(p377^2) multiplication using Montgomery arithmetic, c = a*b in GF(p377^2)
+void fp2mul377_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+
+// GF(p377^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void fp2inv377_mont(f2elm_t a);
+
+
+#endif
diff --git a/SIKE_sw/src/P377/generic/fp_generic.c b/SIKE_sw/src/P377/generic/fp_generic.c
new file mode 100644
index 0000000..57d1dc7
--- /dev/null
+++ b/SIKE_sw/src/P377/generic/fp_generic.c
@@ -0,0 +1,259 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: portable modular arithmetic for P377
+*********************************************************************************************/
+
+#include "../P377_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p377[NWORDS64_FIELD];
+extern const uint64_t p377p1[NWORDS64_FIELD]; 
+extern const uint64_t p377x2[NWORDS64_FIELD]; 
+extern const uint64_t p377x4[NWORDS64_FIELD];
+
+
+__inline void mp_sub377_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p377x2)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void mp_sub377_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p377x4)[i], borrow, c[i]); 
+    }
+}  
+
+
+__inline void fpadd377(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p377.
+  // Inputs: a, b in [0, 2*p377-1] 
+  // Output: c in [0, 2*p377-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p377x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p377x2)[i] & mask, carry, c[i]); 
+    }
+} 
+
+
+__inline void fpsub377(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p377.
+  // Inputs: a, b in [0, 2*p377-1] 
+  // Output: c in [0, 2*p377-1] 
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p377x2)[i] & mask, borrow, c[i]); 
+    }
+}
+
+
+__inline void fpneg377(digit_t* a)
+{ // Modular negation, a = -a mod p377.
+  // Input/output: a in [0, 2*p377-1] 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p377x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_377(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p377.
+  // Input : a in [0, 2*p377-1] 
+  // Output: c in [0, 2*p377-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p377
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p377)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+} 
+
+
+void fpcorrection377(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p377)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p377)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result    
+    register digit_t al, ah, bl, bh, temp;
+    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
+
+    al = a & mask_low;                        // Low part
+    ah = a >> (sizeof(digit_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(digit_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                   // C00
+
+    res1 = albl >> (sizeof(digit_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;  
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(digit_t) * 4);
+    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   
+
+    res1 = ahbl >> (sizeof(digit_t) * 4);
+    res2 = albh >> (sizeof(digit_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                   // C10 
+    carry = temp & mask_high; 
+    c[1] ^= (ahbh & mask_high) + carry;       // C11
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
+    unsigned int i, j;
+    digit_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+    
+    for (i = 0; i < nwords; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+
+    for (i = nwords; i < 2*nwords-1; i++) {
+        for (j = i-nwords+1; j < nwords; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+    c[2*nwords-1] = v; 
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p377.
+  // mc = ma*R^-1 mod p377x2, where R = 2^384.
+  // If ma < 2^384*p377, the output mc is in the range [0, 2*p377-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = p377_ZERO_WORDS;
+    digit_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-p377_ZERO_WORDS+1)) { 
+                MUL(mc[j], ((digit_t*)p377p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry; 
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }    
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) { 
+                MUL(mc[j], ((digit_t*)p377p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); 
+    mc[NWORDS_FIELD-1] = v;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P434/AMD64/fp_x64.c b/SIKE_sw/src/P434/AMD64/fp_x64.c
new file mode 100644
index 0000000..8b18850
--- /dev/null
+++ b/SIKE_sw/src/P434/AMD64/fp_x64.c
@@ -0,0 +1,491 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: modular arithmetic optimized for x64 platforms for P434
+*********************************************************************************************/
+
+#include "../P434_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p434[NWORDS_FIELD];
+extern const uint64_t p434p1[NWORDS_FIELD]; 
+extern const uint64_t p434x2[NWORDS_FIELD];
+extern const uint64_t p434x4[NWORDS_FIELD];
+
+
+__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p434x2)[i], borrow, c[i]);
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub434_p2_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p434x4)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub434_p4_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p434.
+  // Inputs: a, b in [0, 2*p434-1] 
+  // Output: c in [0, 2*p434-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p434x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p434x2)[i] & mask, carry, c[i]); 
+    } 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpadd434_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p434.
+  // Inputs: a, b in [0, 2*p434-1] 
+  // Output: c in [0, 2*p434-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p434x2)[i] & mask, borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpsub434_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpneg434(digit_t* a)
+{ // Modular negation, a = -a mod p434.
+  // Input/output: a in [0, 2*p434-1] 
+    unsigned int i, borrow = 0;
+    
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p434x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_434(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p434.
+  // Input : a in [0, 2*p434-1] 
+  // Output: c in [0, 2*p434-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p434
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p434)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+}  
+
+
+void fpcorrection434(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p434)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p434)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+        
+    (void)nwords;
+
+#if (OS_TARGET == OS_WIN)
+    digit_t t = 0;
+    uint128_t uv = {0};
+    unsigned int carry = 0;
+        
+    MULADD128(a[0], b[0], uv, carry, uv);
+    t += carry;
+    c[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[0], uv, carry, uv);
+    t += carry;
+    c[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[0], uv, carry, uv);
+    t += carry;
+    c[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[0], uv, carry, uv);
+    t += carry;
+    c[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[0], uv, carry, uv);
+    t += carry;
+    c[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[0], uv, carry, uv);
+    t += carry;
+    c[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[0], uv, carry, uv);
+    t += carry;
+    c[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[6], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[6], uv, carry, uv);
+    t += carry;
+    c[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[6], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[6], uv, carry, uv);
+    t += carry;
+    c[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[6], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[6], uv, carry, uv);
+    t += carry;
+    c[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[6], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[6], uv, carry, uv);
+    t += carry;
+    c[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[6], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[6], uv, carry, uv);
+    t += carry;
+    c[11] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    
+    MULADD128(a[6], b[6], uv, carry, uv);
+    c[12] = uv[0];
+    c[13] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+    
+    mul434_asm(a, b, c);
+
+#endif
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Montgomery reduction exploiting special form of the prime.
+  // mc = ma*R^-1 mod p434x2, where R = 2^448.
+  // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
+  // ma is assumed to be in Montgomery representation.
+        
+#if (OS_TARGET == OS_WIN)
+    unsigned int carry;
+    digit_t t = 0;
+    uint128_t uv = {0};
+    
+    mc[0] = ma[0];
+    mc[1] = ma[1];
+    mc[2] = ma[2];
+    MUL128(mc[0], ((digit_t*)p434p1)[3], uv);
+    ADDC(0, uv[0], ma[3], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = 0;
+
+    MULADD128(mc[0], ((digit_t*)p434p1)[4], uv, carry, uv);
+    MULADD128(mc[1], ((digit_t*)p434p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[4], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p434p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p434p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p434p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[5], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p434p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p434p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p434p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p434p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[6], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[1], ((digit_t*)p434p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p434p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p434p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p434p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[7], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[2], ((digit_t*)p434p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p434p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p434p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p434p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[8], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[3], ((digit_t*)p434p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p434p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p434p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p434p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[9], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[4], ((digit_t*)p434p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p434p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p434p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[10], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[5], ((digit_t*)p434p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p434p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[11], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[6], ((digit_t*)p434p1)[6], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[12], carry, mc[5]); 
+    ADDC(carry, uv[1], ma[13], carry, mc[6]); 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    rdc434_asm(ma, mc);    
+
+#endif
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P434/AMD64/fp_x64_asm.S b/SIKE_sw/src/P434/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000..b04c0e5
--- /dev/null
+++ b/SIKE_sw/src/P434/AMD64/fp_x64_asm.S
@@ -0,0 +1,1024 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny cryptography library 
+// Copyright (c) Microsoft Corporation
+//
+// Website: https://github.com/microsoft/PQCrypto-SIDH
+// Released under MIT license 
+//
+// Abstract: field arithmetic in x64 assembly for P434 on Linux
+//*******************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Format function and variable names for Mac OS X
+#if defined(__APPLE__)
+    #define fmt(f)    _##f
+#else
+    #define fmt(f)    f
+#endif
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+// Define addition instructions
+#ifdef _MULX_
+#ifdef _ADX_
+
+#define ADD1    adox
+#define ADC1    adox
+#define ADD2    adcx
+#define ADC2    adcx
+
+#else
+
+#define ADD1    add
+#define ADC1    adc
+#define ADD2    add
+#define ADC2    adc
+
+#endif    
+#endif
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpadd434_asm)
+fmt(fpadd434_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  push   rbx
+  push   rbp
+  
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48]
+
+  mov    rbx, [rip+fmt(p434x2)]
+  sub    r8, rbx
+  mov    rcx, [rip+fmt(p434x2)+8]
+  sbb    r9, rcx
+  sbb    r10, rcx
+  mov    rdi, [rip+fmt(p434x2)+24]
+  sbb    r11, rdi
+  mov    rsi, [rip+fmt(p434x2)+32]
+  sbb    r12, rsi
+  mov    rbp, [rip+fmt(p434x2)+40]
+  sbb    r13, rbp
+  mov    r15, [rip+fmt(p434x2)+48]
+  sbb    r14, r15
+  sbb    rax, 0
+  
+  and    rbx, rax
+  and    rcx, rax
+  and    rdi, rax
+  and    rsi, rax
+  and    rbp, rax
+  and    r15, rax
+  
+  add    r8, rbx  
+  adc    r9, rcx  
+  adc    r10, rcx  
+  adc    r11, rdi 
+  adc    r12, rsi 
+  adc    r13, rbp   
+  adc    r14, r15
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13 
+  mov    [reg_p3+48], r14
+  
+  pop    rbp
+  pop    rbx
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpsub434_asm)
+fmt(fpsub434_asm):
+  push   r12
+  push   r13
+  push   r14
+  
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48]
+  sbb    rax, 0
+  
+  mov    rcx, [rip+fmt(p434x2)]
+  mov    rdi, [rip+fmt(p434x2)+8]
+  mov    rsi, [rip+fmt(p434x2)+24]
+  and    rcx, rax
+  and    rdi, rax
+  and    rsi, rax  
+  add    r8, rcx  
+  adc    r9, rdi  
+  adc    r10, rdi  
+  adc    r11, rsi 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  setc   cl  
+
+  mov    r8, [rip+fmt(p434x2)+32]
+  mov    rdi, [rip+fmt(p434x2)+40]
+  mov    rsi, [rip+fmt(p434x2)+48]
+  and    r8, rax
+  and    rdi, rax
+  and    rsi, rax  
+  bt     rcx, 0  
+  adc    r12, r8 
+  adc    r13, rdi   
+  adc    r14, rsi
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+///////////////////////////////////////////////////////////////// MACRO
+.macro SUB434_PX  P0
+  push   r12
+  push   r13
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    rcx, [reg_p1+48]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    rcx, [reg_p2+48]
+
+  mov    rax, [rip+\P0]
+  mov    rdi, [rip+\P0+8]
+  mov    rsi, [rip+\P0+24]
+  add    r8, rax
+  mov    rax, [rip+\P0+32]  
+  adc    r9, rdi  
+  adc    r10, rdi 
+  adc    r11, rsi 
+  mov    rdi, [rip+\P0+40]
+  mov    rsi, [rip+\P0+48]
+  adc    r12, rax   
+  adc    r13, rdi  
+  adc    rcx, rsi
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], rcx
+  
+  pop    r13
+  pop    r12
+  .endm
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 2*p434
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p434
+//*********************************************************************** 
+.global fmt(mp_sub434_p2_asm)
+fmt(mp_sub434_p2_asm):
+
+  SUB434_PX  fmt(p434x2)
+  ret
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 4*p434
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p434
+//*********************************************************************** 
+.global fmt(mp_sub434_p4_asm)
+fmt(mp_sub434_p4_asm):
+
+  SUB434_PX  fmt(p434x4)
+  ret
+
+
+#ifdef _MULX_
+    
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C and regs T1, T3, rax
+// Temps:   regs T0:T6
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    xor    rax, rax   
+    adox   \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adox   \T1, \T3
+           
+    mov    rdx, 8\M0
+    mulx   \T3, \T4, \M1     // T3:T4 = A1*B0
+    adox   \T2, rax 
+    xor    rax, rax   
+    mulx   \T5, \T6, 8\M1    // T5:T6 = A1*B1
+    adox   \T4, \T0
+    mov    8\C, \T4          // C1_final  
+    adcx   \T3, \T6      
+    mulx   \T6, \T0, 16\M1   // T6:T0 = A1*B2 
+    adox   \T3, \T1  
+    adcx   \T5, \T0     
+    adcx   \T6, rax 
+    adox   \T5, \T2	
+    
+    mov    rdx, 16\M0
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    adox   \T6, rax
+    xor    rax, rax 
+    mulx   \T4, \T2, 8\M1    // T4:T2 = A2*B1
+    adox   \T0, \T3   
+    mov    16\C, \T0         // C2_final 
+    adcx   \T1, \T5    
+    mulx   \T0, \T3, 16\M1   // T0:T3 = A2*B2
+    adcx   \T4, \T6  
+    adcx   \T0, rax
+    adox   \T1, \T2
+    adox   \T3, \T4
+    adox   rax, \T0
+.endm 
+    
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C
+// Temps:   regs T0:T9
+/////////////////////////////////////////////////////////////////
+
+.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    xor    rax, rax   
+    adox   \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adox   \T1, \T3        
+    mulx   \T3, \T4, 24\M1   // T3:T4 = A0*B3
+    adox   \T2, \T4 
+           
+    mov    rdx, 8\M0
+    mulx   \T5, \T4, \M1     // T5:T4 = A1*B0
+    adox   \T3, rax 
+    xor    rax, rax   
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A1*B1
+    adox   \T4, \T0
+    mov    8\C, \T4          // C1_final  
+    adcx   \T5, \T7      
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A1*B2
+    adcx   \T6, \T8  
+    adox   \T5, \T1      
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A1*B3
+    adcx   \T7, \T9        
+    adcx   \T8, rax   
+    adox   \T6, \T2
+    
+    mov    rdx, 16\M0
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    adox   \T7, \T3
+    adox   \T8, rax
+    xor    rax, rax 
+    mulx   \T2, \T3, 8\M1    // T2:T3 = A2*B1
+    adox   \T0, \T5   
+    mov    16\C, \T0         // C2_final 
+    adcx   \T1, \T3    
+    mulx   \T3, \T4, 16\M1   // T3:T4 = A2*B2
+    adcx   \T2, \T4 
+    adox   \T1, \T6       
+    mulx   \T4,\T9, 24\M1    // T3:T4 = A2*B3
+    adcx   \T3, \T9        
+    mov    rdx, 24\M0
+    adcx   \T4, rax         
+
+    adox   \T2, \T7
+    adox   \T3, \T8
+    adox   \T4, rax
+
+    mulx   \T5, \T0, \M1     // T5:T0 = A3*B0
+    xor    rax, rax 
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A3*B1
+    adcx   \T5, \T7 
+    adox   \T1, \T0       
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A3*B2
+    adcx   \T6, \T8  
+    adox   \T2, \T5      
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A3*B3
+    adcx   \T7, \T9        
+    adcx   \T8, rax         
+
+    adox   \T3, \T6
+    adox   \T4, \T7
+    adox   \T8, rax
+    mov    24\C, \T1         // C3_final
+    mov    32\C, \T2         // C4_final
+    mov    40\C, \T3         // C5_final
+    mov    48\C, \T4         // C6_final
+    mov    56\C, \T8         // C7_final
+.endm 
+
+#else
+
+.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    add    \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adc    \T1, \T3
+           
+    mov    rdx, 8\M0
+    mulx   \T3, \T4, \M1     // T3:T4 = A1*B0
+    adc    \T2, 0   
+    mulx   \T5, \T6, 8\M1    // T5:T6 = A1*B1
+    add    \T4, \T0
+    mov    8\C, \T4          // C1_final
+    adc    \T3, \T1  
+    adc    \T5, \T2	    
+    mulx   \T2, \T1, 16\M1   // T2:T1 = A1*B2
+    adc    \T2, 0    
+
+    add    \T3, \T6  
+    adc    \T5, \T1     
+    adc    \T2, 0
+    
+    mov    rdx, 16\M0
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    add    \T0, \T3   
+    mov    16\C, \T0         // C2_final 
+    mulx   \T4, \T6, 8\M1    // T4:T6 = A2*B1
+    adc    \T1, \T5    
+    adc    \T2, \T4 
+    mulx   rax, \T3, 16\M1   // rax:T3 = A2*B2 
+    adc    rax, 0
+    add    \T1, \T6
+    adc    \T3, \T2
+    adc    rax, 0
+.endm 
+
+.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    add    \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adc    \T1, \T3         
+    mulx   \T3, \T4, 24\M1   // T3:T4 = A0*B3
+    adc    \T2, \T4        
+    mov    rdx, 8\M0
+    adc    \T3, 0         
+
+    mulx   \T5, \T4, \M1     // T5:T4 = A1*B0
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A1*B1
+    add    \T5, \T7        
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A1*B2
+    adc    \T6, \T8        
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A1*B3
+    adc    \T7, \T9        
+    adc    \T8, 0         
+
+    add    \T4, \T0
+    mov    8\C, \T4          // C1_final
+    adc    \T5, \T1
+    adc    \T6, \T2
+    adc    \T7, \T3
+    mov    rdx, 16\M0
+    adc    \T8, 0
+
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    mulx   \T2, \T3, 8\M1    // T2:T3 = A2*B1
+    add    \T1, \T3        
+    mulx   \T3, \T4, 16\M1   // T3:T4 = A2*B2
+    adc    \T2, \T4        
+    mulx   \T4,\T9, 24\M1    // T3:T4 = A2*B3
+    adc    \T3, \T9        
+    mov    rdx, 24\M0
+    adc    \T4, 0          
+
+    add    \T0, \T5
+    mov    16\C, \T0         // C2_final
+    adc    \T1, \T6
+    adc    \T2, \T7
+    adc    \T3, \T8
+    adc    \T4, 0
+
+    mulx   \T5, \T0, \M1     // T5:T0 = A3*B0
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A3*B1
+    add    \T5, \T7        
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A3*B2
+    adc    \T6, \T8        
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A3*B3
+    adc    \T7, \T9         
+    adc    \T8, 0         
+
+    add    \T1, \T0
+    mov    24\C, \T1         // C3_final
+    adc    \T2, \T5
+    mov    32\C, \T2         // C4_final
+    adc    \T3, \T6
+    mov    40\C, \T3         // C5_final
+    adc    \T4, \T7
+    mov    48\C, \T4         // C6_final
+    adc    \T8, 0
+    mov    56\C, \T8         // C7_final
+.endm
+#endif
+
+
+//*****************************************************************************
+//  434-bit multiplication using Karatsuba (one level), schoolbook (one level)
+//***************************************************************************** 
+.global fmt(mul434_asm)
+fmt(mul434_asm):    
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15
+    mov    rcx, reg_p3 
+
+    // r8-r11 <- AH + AL, rax <- mask
+    xor    rax, rax
+    mov    r8, [reg_p1]
+    mov    r9, [reg_p1+8]
+    mov    r10, [reg_p1+16]
+    mov    r11, [reg_p1+24] 
+    push   rbx 
+    push   rbp
+    sub    rsp, 96
+    add    r8, [reg_p1+32]
+    adc    r9, [reg_p1+40]
+    adc    r10, [reg_p1+48]
+    adc    r11, 0
+    sbb    rax, 0
+    mov    [rsp], r8
+    mov    [rsp+8], r9
+    mov    [rsp+16], r10
+    mov    [rsp+24], r11
+
+    // r12-r15 <- BH + BL, rbx <- mask
+    xor    rbx, rbx
+    mov    r12, [reg_p2]
+    mov    r13, [reg_p2+8]
+    mov    r14, [reg_p2+16]
+    mov    r15, [reg_p2+24]
+    add    r12, [reg_p2+32]
+    adc    r13, [reg_p2+40]
+    adc    r14, [reg_p2+48]
+    adc    r15, 0
+    sbb    rbx, 0
+    mov    [rsp+32], r12
+    mov    [rsp+40], r13
+    mov    [rsp+48], r14
+    mov    [rsp+56], r15
+    
+    // r12-r15 <- masked (BH + BL)
+    and    r12, rax
+    and    r13, rax
+    and    r14, rax
+    and    r15, rax
+
+    // r8-r11 <- masked (AH + AL)
+    and    r8, rbx
+    and    r9, rbx
+    and    r10, rbx
+    and    r11, rbx
+
+    // r8-r11 <- masked (AH + AL) + masked (AH + AL)
+    add    r8, r12
+    adc    r9, r13
+    adc    r10, r14
+    adc    r11, r15
+    mov    [rsp+64], r8
+    mov    [rsp+72], r9
+    mov    [rsp+80], r10
+    mov    [rsp+88], r11
+
+    // [rsp] <- (AH+AL) x (BH+BL), low part 
+    MUL256_SCHOOL  [rsp], [rsp+32], [rsp], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp 
+
+    // [rcx] <- AL x BL
+    MUL256_SCHOOL  [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp     // Result C0-C3
+
+    // [rcx+64], rbx, rbp, rax <- AH x BH 
+    MUL192_SCHOOL  [reg_p1+32], [reg_p2+32], [rcx+64], r8, rbx, r10, rbp, r12, r13, r14
+    
+    // r8-r11 <- (AH+AL) x (BH+BL), final step
+    mov    r8, [rsp+64]
+    mov    r9, [rsp+72]
+    mov    r10, [rsp+80]
+    mov    r11, [rsp+88]
+    mov    rdx, [rsp+32]
+    add    r8, rdx
+    mov    rdx, [rsp+40]
+    adc    r9, rdx
+    mov    rdx, [rsp+48]
+    adc    r10, rdx
+    mov    rdx, [rsp+56]
+    adc    r11, rdx
+    
+    // r8-r15 <- (AH+AL) x (BH+BL) - ALxBL
+    mov    r12, [rsp]
+    mov    r13, [rsp+8]
+    mov    r14, [rsp+16]
+    mov    r15, [rsp+24]
+    sub    r12, [rcx]
+    sbb    r13, [rcx+8]
+    sbb    r14, [rcx+16]
+    sbb    r15, [rcx+24]
+    sbb    r8, [rcx+32]
+    sbb    r9, [rcx+40]
+    sbb    r10, [rcx+48]
+    sbb    r11, [rcx+56]
+    
+    // r8-r15 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+    sub    r12, [rcx+64]
+    sbb    r13, [rcx+72]
+    sbb    r14, [rcx+80]
+    sbb    r15, rbx
+    sbb    r8, rbp
+    sbb    r9, rax
+    sbb    r10, 0
+    sbb    r11, 0
+    
+    add    r12, [rcx+32]
+    mov    [rcx+32], r12    // Result C4-C7
+    adc    r13, [rcx+40]
+    mov    [rcx+40], r13 
+    adc    r14, [rcx+48]
+    mov    [rcx+48], r14 
+    adc    r15, [rcx+56]
+    mov    [rcx+56], r15
+    adc    r8, [rcx+64] 
+    mov    [rcx+64], r8    // Result C8-C15
+    adc    r9, [rcx+72]
+    mov    [rcx+72], r9 
+    adc    r10, [rcx+80]
+    mov    [rcx+80], r10
+    adc    r11, rbx
+    mov    [rcx+88], r11
+    adc    rbp, 0
+    mov    [rcx+96], rbp 
+    adc    rax, 0
+    mov    [rcx+104], rax
+    
+    add    rsp, 96    
+    pop    rbp  
+    pop    rbx
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+#else
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+#endif
+
+
+#ifdef _MULX_
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  reg I0 and memory pointer M1
+// Outputs: regs T0:T4
+// Temps:   regs T0:T5
+/////////////////////////////////////////////////////////////////
+.macro MUL64x256_SCHOOL I0, M1, T0, T1, T2, T3, T4, T5 
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    ADD1   \T1, \T4            // T1 <- C1_final   
+    ADC1   \T2, \T5            // T2 <- C2_final 
+    mulx   \T4, \T5, 24\M1
+    ADC1   \T3, \T5            // T3 <- C3_final
+    ADC1   \T4, rax            // T4 <- C4_final
+.endm
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  regs I0 and I1, and memory pointer M1
+// Outputs: regs T0:T5
+// Temps:   regs T0:T5
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    ADD1   \T1, \T4               
+    ADC1   \T2, \T5     
+    mulx   \T4, \T5, 24\M1
+    ADC1   \T3, \T5 
+    ADC1   \T4, rax   
+    
+    xor    rax, rax
+    mov    rdx, \I1 
+    mulx   \I1, \T5, \M1 
+    ADD2   \T1, \T5            // T1 <- C1_final 
+    ADC2   \T2, \I1     
+    mulx   \T5, \I1, 8\M1
+    ADC2   \T3, \T5 
+    ADD1   \T2, \I1        
+    mulx   \T5, \I1, 16\M1
+    ADC2   \T4, \T5 
+    ADC1   \T3, \I1     
+    mulx   \T5, \I1, 24\M1   
+    ADC2   \T5, rax         
+    ADC1   \T4, \I1  
+    ADC1   \T5, rax 
+.endm
+
+#else
+
+.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5 
+    mulx   \T2, \T4, 8\M1
+    mulx   \T3, \T5, 16\M1 
+    add    \T1, \T4               
+    adc    \T2, \T5     
+    mulx   \T4, \T5, 24\M1
+    adc    \T3, \T5 
+    adc    \T4, 0   
+    
+    mov    rdx, \I1 
+    mulx   \I1, \T5, \M1 
+    add    \T1, \T5            // T1 <- C1_final 
+    adc    \T2, \I1     
+    mulx   \T5, \I1, 8\M1
+    adc    \T3, \T5       
+    mulx   \T5, rax, 16\M1
+    adc    \T4, \T5     
+    mulx   \T5, rdx, 24\M1 
+    adc    \T5, 0
+    add    \T2, \I1  
+    adc    \T3, rax        
+    adc    \T4, rdx  
+    adc    \T5, 0 
+.endm
+#endif
+
+  
+//**************************************************************************************
+//  Montgomery reduction
+//  Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
+//  Operation: c [reg_p2] = a [reg_p1]
+//************************************************************************************** 
+.global fmt(rdc434_asm)
+fmt(rdc434_asm):
+    push   r14
+
+    // a[0-1] x p434p1_nz --> result: r8:r13 
+    mov    rdx, [reg_p1]
+    mov    r14, [reg_p1+8]  
+    mulx   r9, r8, [rip+fmt(p434p1)+24]   // result r8    
+    push   r12
+    push   r13
+    push   r15
+    push   rbp
+    push   rbx 
+    MUL128x256_SCHOOL rdx, r14, [rip+fmt(p434p1)+24], r8, r9, r10, r11, r12, r13     
+
+    mov    rdx, [reg_p1+16]   
+    mov    rcx, [reg_p1+72]
+    add    r8, [reg_p1+24]  
+    adc    r9, [reg_p1+32]  
+    adc    r10, [reg_p1+40]   
+    adc    r11, [reg_p1+48]   
+    adc    r12, [reg_p1+56]   
+    adc    r13, [reg_p1+64] 
+    adc    rcx, 0 
+    mulx   rbp, rbx, [rip+fmt(p434p1)+24]   // result rbx
+    mov    [reg_p2], r9  
+    mov    [reg_p2+8], r10  
+    mov    [reg_p2+16], r11  
+    mov    [reg_p2+24], r12  
+    mov    [reg_p2+32], r13 
+    mov    r9, [reg_p1+80]  
+    mov    r10, [reg_p1+88]  
+    mov    r11, [reg_p1+96]
+    mov    rdi, [reg_p1+104]
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    adc    rdi, 0
+
+    // a[2-3] x p434p1_nz --> result: rbx, rbp, r12:r15
+    MUL128x256_SCHOOL rdx, r8, [rip+fmt(p434p1)+24], rbx, rbp, r12, r13, r14, r15
+
+    mov    rdx, [reg_p2]
+    add    rbx, [reg_p2+8]  
+    adc    rbp, [reg_p2+16]  
+    adc    r12, [reg_p2+24]   
+    adc    r13, [reg_p2+32]  
+    adc    r14, rcx  
+    mov    rcx, 0 
+    adc    r15, r9
+    adc    rcx, r10
+    mulx   r9, r8, [rip+fmt(p434p1)+24]   // result r8
+    mov    [reg_p2], rbp 
+    mov    [reg_p2+8], r12  
+    mov    [reg_p2+16], r13 
+    adc    r11, 0
+    adc    rdi, 0 
+
+    // a[4-5] x p434p1_nz --> result: r8:r13
+    MUL128x256_SCHOOL rdx, rbx, [rip+fmt(p434p1)+24], r8, r9, r10, rbp, r12, r13  
+
+    mov    rdx, [reg_p2]
+    add    r8, [reg_p2+8]  
+    adc    r9, [reg_p2+16]  
+    adc    r10, r14   
+    adc    rbp, r15 
+    adc    r12, rcx 
+    adc    r13, r11   
+    adc    rdi, 0  
+    mulx   r15, r14, [rip+fmt(p434p1)+24]  // result r14 
+    mov    [reg_p2], r8        // Final result c0-c1
+    mov    [reg_p2+8], r9    
+
+    // a[6-7] x p434p1_nz --> result: r14:r15, r8:r9, r11
+    MUL64x256_SCHOOL rdx, [rip+fmt(p434p1)+24], r14, r15, r8, r9, r11, rcx  
+    
+    // Final result c2:c6
+    add    r14, r10  
+    adc    r15, rbp 
+    pop    rbx
+    pop    rbp 
+    adc    r8, r12   
+    adc    r9, r13  
+    adc    r11, rdi 
+    mov    [reg_p2+16], r14  
+    mov    [reg_p2+24], r15  
+    pop    r15
+    pop    r13
+    mov    [reg_p2+32], r8  
+    mov    [reg_p2+40], r9  
+    mov    [reg_p2+48], r11
+
+    pop    r12
+    pop    r14
+    ret
+
+  #else
+
+  # error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+  #endif
+
+
+//***********************************************************************
+//  434-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_add434_asm)
+fmt(mp_add434_asm): 
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  
+  mov    r8, [reg_p1+32]
+  mov    r9, [reg_p1+40]
+  mov    r10, [reg_p1+48]
+  adc    r8, [reg_p2+32] 
+  adc    r9, [reg_p2+40] 
+  adc    r10, [reg_p2+48] 
+  mov    [reg_p3+32], r8
+  mov    [reg_p3+40], r9
+  mov    [reg_p3+48], r10
+  ret
+
+
+//***************************************************************************
+//  2x434-bit multiprecision subtraction/addition
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]. If c < 0, add p434*2^448
+//*************************************************************************** 
+.global fmt(mp_subadd434x2_asm)
+fmt(mp_subadd434x2_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15 
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48]
+  mov    r10, [reg_p1+56] 
+  mov    r11, [reg_p1+64]
+  mov    r12, [reg_p1+72] 
+  sbb    r8, [reg_p2+40] 
+  sbb    r9, [reg_p2+48] 
+  sbb    r10, [reg_p2+56]
+  sbb    r11, [reg_p2+64] 
+  sbb    r12, [reg_p2+72]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  
+  mov    r13, [reg_p1+80]
+  mov    r14, [reg_p1+88] 
+  mov    r15, [reg_p1+96]
+  mov    rcx, [reg_p1+104]
+  sbb    r13, [reg_p2+80]
+  sbb    r14, [reg_p2+88]
+  sbb    r15, [reg_p2+96] 
+  sbb    rcx, [reg_p2+104] 
+  sbb    rax, 0
+  
+  // Add p434 anded with the mask in rax 
+  mov    r8, [rip+fmt(p434)]
+  mov    r9, [rip+fmt(p434)+24]
+  mov    r10, [rip+fmt(p434)+32]
+  mov    rdi, [rip+fmt(p434)+40]
+  mov    rsi, [rip+fmt(p434)+48]
+  and    r8, rax
+  and    r9, rax
+  and    r10, rax
+  and    rdi, rax
+  and    rsi, rax
+  mov    rax, [reg_p3+56]
+  add    rax, r8
+  adc    r11, r8
+  adc    r12, r8
+  adc    r13, r9
+  adc    r14, r10
+  adc    r15, rdi
+  adc    rcx, rsi
+  
+  mov    [reg_p3+56], rax
+  mov    [reg_p3+64], r11
+  mov    [reg_p3+72], r12
+  mov    [reg_p3+80], r13
+  mov    [reg_p3+88], r14
+  mov    [reg_p3+96], r15
+  mov    [reg_p3+104], rcx
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Double 2x434-bit multiprecision subtraction
+//  Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_dblsub434x2_asm)
+fmt(mp_dblsub434x2_asm):
+  push   r12
+  push   r13
+  push   r14
+  
+  mov    r8, [reg_p3]
+  mov    r9, [reg_p3+8]
+  mov    r10, [reg_p3+16]
+  mov    r11, [reg_p3+24]
+  mov    r12, [reg_p3+32]
+  mov    r13, [reg_p3+40]
+  mov    r14, [reg_p3+48]
+  sub    r8, [reg_p1]
+  sbb    r9, [reg_p1+8] 
+  sbb    r10, [reg_p1+16] 
+  sbb    r11, [reg_p1+24] 
+  sbb    r12, [reg_p1+32] 
+  sbb    r13, [reg_p1+40] 
+  sbb    r14, [reg_p1+48]
+  setc   al  
+  sub    r8, [reg_p2]
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48]
+  setc   cl  
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+    
+  mov    r8, [reg_p3+56]
+  mov    r9, [reg_p3+64]
+  mov    r10, [reg_p3+72]
+  mov    r11, [reg_p3+80]
+  mov    r12, [reg_p3+88]
+  mov    r13, [reg_p3+96]
+  mov    r14, [reg_p3+104]
+  bt     rax, 0  
+  sbb    r8, [reg_p1+56] 
+  sbb    r9, [reg_p1+64] 
+  sbb    r10, [reg_p1+72] 
+  sbb    r11, [reg_p1+80] 
+  sbb    r12, [reg_p1+88] 
+  sbb    r13, [reg_p1+96] 
+  sbb    r14, [reg_p1+104]
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+56] 
+  sbb    r9, [reg_p2+64] 
+  sbb    r10, [reg_p2+72] 
+  sbb    r11, [reg_p2+80] 
+  sbb    r12, [reg_p2+88] 
+  sbb    r13, [reg_p2+96] 
+  sbb    r14, [reg_p2+104] 
+  mov    [reg_p3+56], r8
+  mov    [reg_p3+64], r9
+  mov    [reg_p3+72], r10
+  mov    [reg_p3+80], r11
+  mov    [reg_p3+88], r12
+  mov    [reg_p3+96], r13
+  mov    [reg_p3+104], r14
+  
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
\ No newline at end of file
diff --git a/SIKE_sw/src/P434/P434.c b/SIKE_sw/src/P434/P434.c
new file mode 100644
index 0000000..761713d
--- /dev/null
+++ b/SIKE_sw/src/P434/P434.c
@@ -0,0 +1,133 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P434
+*********************************************************************************************/  
+
+#include "P434_api.h" 
+#include "P434_internal.h"
+#include "../internal.h"
+
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). 
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. 
+// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32.
+// For example, a 434-bit field element is represented with Ceil(434 / 64) = 7 64-bit digits or Ceil(434 / 32) = 14 32-bit digits.
+
+//
+// Curve isogeny system "SIDHp434". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p434^2), where A=6, B=1, C=1 and p434 = 2^216*3^137-1
+//
+         
+const uint64_t p434[NWORDS64_FIELD]              = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF, 
+                                                     0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344 }; 
+const uint64_t p434p1[NWORDS64_FIELD]            = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000,
+                                                     0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344 };  
+const uint64_t p434x2[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF,
+                                                     0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688 };
+const uint64_t p434x4[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xF705D9EB8BFFFFFF, 
+                                                     0xEF1971E0C562BA8F, 0xB3F17F5A07148159, 0x0008D07C9C5DCD11 };
+const uint64_t p434x16p[2*NWORDS64_FIELD]        = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x47D130A3A0000000, 
+                                                     0x873470F9D4EA2B80, 0x6074052FC75BF530, 0x54497C1B1D119772, 0xC55F373D2CDCA412, 
+                                                     0x732CA2221C664B96, 0x6445AB96AF6359A5, 0x221708AB42ABE1B4, 0xAE3D3D0063244F01, 
+                                                     0x18B920F2ECF68816, 0x0000004DB194809D };
+// Order of Alice's subgroup
+const uint64_t Alice_order[NWORDS64_ORDER]       = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000001000000 }; 
+// Order of Bob's subgroup
+const uint64_t Bob_order[NWORDS64_ORDER]         = { 0x58AEA3FDC1767AE3, 0xC520567BC65C7831, 0x1773446CFC5FD681, 0x0000000002341F27 };
+// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p434^2), expressed in Montgomery representation
+const uint64_t A_gen[6*NWORDS64_FIELD]           = { 0x05ADF455C5C345BF, 0x91935C5CC767AC2B, 0xAFE4E879951F0257, 0x70E792DC89FA27B1, 
+                                                     0xF797F526BB48C8CD, 0x2181DB6131AF621F, 0x00000A1C08B1ECC4,    // XPA0
+                                                     0x74840EB87CDA7788, 0x2971AA0ECF9F9D0B, 0xCB5732BDF41715D5, 0x8CD8E51F7AACFFAA, 
+                                                     0xA7F424730D7E419F, 0xD671EB919A179E8C, 0x0000FFA26C5A924A,    // XPA1
+                                                     0xFEC6E64588B7273B, 0xD2A626D74CBBF1C6, 0xF8F58F07A78098C7, 0xE23941F470841B03, 
+                                                     0x1B63EDA2045538DD, 0x735CFEB0FFD49215, 0x0001C4CB77542876,    // XQA0
+                                                     0xADB0F733C17FFDD6, 0x6AFFBD037DA0A050, 0x680EC43DB144E02F, 0x1E2E5D5FF524E374,
+                                                     0xE2DDA115260E2995, 0xA6E4B552E2EDE508, 0x00018ECCDDF4B53E,    // XQA1
+                                                     0x01BA4DB518CD6C7D, 0x2CB0251FE3CC0611, 0x259B0C6949A9121B, 0x60E17AC16D2F82AD, 
+                                                     0x3AA41F1CE175D92D, 0x413FBE6A9B9BC4F3, 0x00022A81D8D55643,    // XRA0
+                                                     0xB8ADBC70FC82E54A, 0xEF9CDDB0D5FADDED, 0x5820C734C80096A0, 0x7799994BAA96E0E4, 
+                                                     0x044961599E379AF8, 0xDB2B94FBF09F27E2, 0x0000B87FC716C0C6 };  // XRA1
+// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p434^2), expressed in Montgomery representation
+const uint64_t B_gen[6*NWORDS64_FIELD]           = { 0x6E5497556EDD48A3, 0x2A61B501546F1C05, 0xEB919446D049887D, 0x5864A4A69D450C4F, 
+                                                     0xB883F276A6490D2B, 0x22CC287022D5F5B9, 0x0001BED4772E551F,    // XPB0 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000,    // XPB1
+                                                     0xFAE2A3F93D8B6B8E, 0x494871F51700FE1C, 0xEF1A94228413C27C, 0x498FF4A4AF60BD62, 
+                                                     0xB00AD2A708267E8A, 0xF4328294E017837F, 0x000034080181D8AE,    // XQB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000,    // XQB1
+                                                     0x283B34FAFEFDC8E4, 0x9208F44977C3E647, 0x7DEAE962816F4E9A, 0x68A2BA8AA262EC9D, 
+                                                     0x8176F112EA43F45B, 0x02106D022634F504, 0x00007E8A50F02E37,    // XRB0
+                                                     0xB378B7C1DA22CCB1, 0x6D089C99AD1D9230, 0xEBE15711813E2369, 0x2B35A68239D48A53, 
+                                                     0x445F6FD138407C93, 0xBEF93B29A3F6B54B, 0x000173FA910377D3 };  // XRB1
+// Montgomery constant Montgomery_R2 = (2^448)^2 mod p434
+const uint64_t Montgomery_R2[NWORDS64_FIELD]     = { 0x28E55B65DCD69B30, 0xACEC7367768798C2, 0xAB27973F8311688D, 0x175CC6AF8D6C7C0B,
+                                                     0xABCD92BF2DDE347E, 0x69E16A61C7686D9A, 0x000025A89BCDD12A };                                                   
+// Value one in Montgomery representation 
+const uint64_t Montgomery_one[NWORDS64_FIELD]    = { 0x000000000000742C, 0x0000000000000000, 0x0000000000000000, 0xB90FF404FC000000, 
+                                                     0xD801A4FB559FACD4, 0xE93254545F77410C, 0x0000ECEEA7BD2EDA };
+
+
+// Fixed parameters for isogeny tree computation
+const unsigned int strat_Alice[MAX_Alice-1] = { 
+48, 28, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 
+1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 
+1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1 };
+
+const unsigned int strat_Bob[MAX_Bob-1] = { 
+66, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 
+2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 32, 16, 8, 4, 3, 1, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 
+1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 };
+           
+// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions
+#define fpcopy                        fpcopy434
+#define fpzero                        fpzero434
+#define fpadd                         fpadd434
+#define fpsub                         fpsub434
+#define fpneg                         fpneg434
+#define fpdiv2                        fpdiv2_434
+#define fpcorrection                  fpcorrection434
+#define fpmul_mont                    fpmul434_mont
+#define fpsqr_mont                    fpsqr434_mont
+#define fpinv_mont                    fpinv434_mont
+#define fpinv_chain_mont              fpinv434_chain_mont
+#define fp2copy                       fp2copy434
+#define fp2zero                       fp2zero434
+#define fp2add                        fp2add434
+#define fp2sub                        fp2sub434
+#define mp_sub_p2                     mp_sub434_p2
+#define mp_sub_p4                     mp_sub434_p4
+#define sub_p4                        mp_sub_p4
+#define fp2neg                        fp2neg434
+#define fp2div2                       fp2div2_434
+#define fp2correction                 fp2correction434
+#define fp2mul_mont                   fp2mul434_mont
+#define fp2sqr_mont                   fp2sqr434_mont
+#define fp2inv_mont                   fp2inv434_mont
+#define fp2inv_mont_ct                fp2inv434_mont_ct
+#define fp2inv_mont_bingcd            fp2inv434_mont_bingcd
+#define fpequal_non_constant_time     fpequal434_non_constant_time
+#define mp_add_asm                    mp_add434_asm
+#define mp_subaddx2_asm               mp_subadd434x2_asm
+#define mp_dblsubx2_asm               mp_dblsub434x2_asm
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp434
+#define crypto_kem_enc                crypto_kem_enc_SIKEp434
+#define crypto_kem_dec                crypto_kem_dec_SIKEp434
+#define random_mod_order_A            random_mod_order_A_SIDHp434
+#define random_mod_order_B            random_mod_order_B_SIDHp434
+#define EphemeralKeyGeneration_A      EphemeralKeyGeneration_A_SIDHp434
+#define EphemeralKeyGeneration_B      EphemeralKeyGeneration_B_SIDHp434
+#define EphemeralSecretAgreement_A    EphemeralSecretAgreement_A_SIDHp434
+#define EphemeralSecretAgreement_B    EphemeralSecretAgreement_B_SIDHp434
+
+#include "../fpx.c"
+#include "../ec_isogeny.c"
+#include "../sidh.c"    
+#include "../sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/src/P434/P434_api.h b/SIKE_sw/src/P434/P434_api.h
new file mode 100644
index 0000000..ba78408
--- /dev/null
+++ b/SIKE_sw/src/P434/P434_api.h
@@ -0,0 +1,112 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: API header file for P434
+*********************************************************************************************/  
+
+#ifndef P434_API_H
+#define P434_API_H
+    
+
+/*********************** Key encapsulation mechanism API ***********************/
+
+#define CRYPTO_SECRETKEYBYTES     374    // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes
+#define CRYPTO_PUBLICKEYBYTES     330
+#define CRYPTO_BYTES               16
+#define CRYPTO_CIPHERTEXTBYTES    346    // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes  
+
+// Algorithm name
+#define CRYPTO_ALGNAME "SIKEp434"  
+
+// SIKE's key generation
+// It produces a private key sk and computes the public key pk.
+// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 374 bytes)
+//          public key pk (CRYPTO_PUBLICKEYBYTES = 330 bytes) 
+int crypto_kem_keypair_SIKEp434(unsigned char *pk, unsigned char *sk);
+
+// SIKE's encapsulation
+// Input:   public key pk         (CRYPTO_PUBLICKEYBYTES = 330 bytes)
+// Outputs: shared secret ss      (CRYPTO_BYTES = 16 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 346 bytes)
+int crypto_kem_enc_SIKEp434(unsigned char *ct, unsigned char *ss, const unsigned char *pk);
+
+// SIKE's decapsulation
+// Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = 374 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 346 bytes) 
+// Outputs: shared secret ss      (CRYPTO_BYTES = 16 bytes)
+int crypto_kem_dec_SIKEp434(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);
+
+
+// Encoding of keys for KEM-based isogeny system "SIKEp434" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p434) are encoded in 55 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys sk consist of the concatenation of a 16-byte random value, a value in the range [0, 2^Floor(Log(2,3^137))-1] and the public key pk. In the SIKE API, 
+// private keys are encoded in 374 octets in little endian format. 
+// Public keys pk consist of 3 elements in GF(p434^2). In the SIKE API, pk is encoded in 330 octets. 
+// Ciphertexts ct consist of the concatenation of a public key value and a 16-byte value. In the SIKE API, ct is encoded in 330 + 16 = 346 octets.  
+// Shared keys ss consist of a value of 16 octets.
+
+
+/*********************** Ephemeral key exchange API ***********************/
+
+#define SIDH_SECRETKEYBYTES_A    27
+#define SIDH_SECRETKEYBYTES_B    28
+#define SIDH_PUBLICKEYBYTES     330
+#define SIDH_BYTES              110
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016.
+// Extended version available at: http://eprint.iacr.org/2016/859  
+
+// Generation of Alice's secret key 
+// Outputs random value in [0, 2^216 - 1] to be used as Alice's private key
+void random_mod_order_A_SIDHp434(unsigned char* random_digits);
+
+// Generation of Bob's secret key 
+// Outputs random value in [0, 2^Floor(Log(2,3^137)) - 1] to be used as Bob's private key
+void random_mod_order_B_SIDHp434(unsigned char* random_digits);
+
+// Alice's ephemeral public key generation
+// Input:  a private key PrivateKeyA in the range [0, 2^216 - 1], stored in 27 bytes. 
+// Output: the public key PublicKeyA consisting of 3 GF(p434^2) elements encoded in 330 bytes.
+int EphemeralKeyGeneration_A_SIDHp434(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. 
+// The public key consists of 3 GF(p434^2) elements encoded in 330 bytes.
+int EphemeralKeyGeneration_B_SIDHp434(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^216 - 1], stored in 27 bytes. 
+//         Bob's PublicKeyB consists of 3 GF(p434^2) elements encoded in 330 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p434^2) encoded in 110 bytes.
+int EphemeralSecretAgreement_A_SIDHp434(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. 
+//         Alice's PublicKeyA consists of 3 GF(p434^2) elements encoded in 330 bytes.
+// Output: a shared secret SharedSecretB that consists of one element in GF(p434^2) encoded in 110 bytes.
+int EphemeralSecretAgreement_B_SIDHp434(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB);
+
+
+// Encoding of keys for KEX-based isogeny system "SIDHp434" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p434) are encoded in 55 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^216-1] and [0, 2^Floor(Log(2,3^137)) - 1], resp. In the SIDH API, 
+// Alice's and Bob's private keys are encoded in 27 and 28 octets, resp., in little endian format. 
+// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p434^2). In the SIDH API, they are encoded in 330 octets. 
+// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p434^2). In the SIDH API, they are encoded in 110 octets.
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/P434/P434_internal.h b/SIKE_sw/src/P434/P434_internal.h
new file mode 100644
index 0000000..357a487
--- /dev/null
+++ b/SIKE_sw/src/P434/P434_internal.h
@@ -0,0 +1,175 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for P434
+*********************************************************************************************/  
+
+#ifndef P434_INTERNAL_H
+#define P434_INTERNAL_H
+
+#include "../config.h"
+
+
+#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64)
+    #define NWORDS_FIELD    7               // Number of words of a 434-bit field element
+    #define p434_ZERO_WORDS 3               // Number of "0" digits in the least significant part of p434 + 1      
+#elif (TARGET == TARGET_x86)
+    #define NWORDS_FIELD    14 
+    #define p434_ZERO_WORDS 6
+#endif
+    
+
+// Basic constants
+
+#define NBITS_FIELD             434  
+#define MAXBITS_FIELD           448                
+#define MAXWORDS_FIELD          ((MAXBITS_FIELD+RADIX-1)/RADIX)     // Max. number of words to represent field elements
+#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 434-bit field element 
+#define NBITS_ORDER             256
+#define NWORDS_ORDER            ((NBITS_ORDER+RADIX-1)/RADIX)       // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)               // Number of 64-bit words of a 224-bit element 
+#define MAXBITS_ORDER           NBITS_ORDER
+#define ALICE                   0
+#define BOB                     1 
+#define OALICE_BITS             216  
+#define OBOB_BITS               218     
+#define OBOB_EXPON              137    
+#define MASK_ALICE              0xFF 
+#define MASK_BOB                0x01 
+#define PRIME                   p434 
+#define PARAM_A                 6  
+#define PARAM_C                 1
+// Fixed parameters for isogeny tree computation
+#define MAX_INT_POINTS_ALICE    7        
+#define MAX_INT_POINTS_BOB      8      
+#define MAX_Alice               108
+#define MAX_Bob                 137
+#define MSG_BYTES               16
+#define SECRETKEY_A_BYTES       ((OALICE_BITS + 7) / 8)
+#define SECRETKEY_B_BYTES       ((OBOB_BITS - 1 + 7) / 8)
+#define FP2_ENCODED_BYTES       2*((NBITS_FIELD + 7) / 8)
+
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t felm_t[NWORDS_FIELD];                                 // Datatype for representing 434-bit field elements (448-bit max.)
+typedef digit_t dfelm_t[2*NWORDS_FIELD];                              // Datatype for representing double-precision 2x434-bit field elements (2x448-bit max.) 
+typedef felm_t  f2elm_t[2];                                           // Datatype for representing quadratic extension field elements GF(p434^2)
+        
+typedef struct { f2elm_t X; f2elm_t Z; } point_proj;                  // Point representation in projective XZ Montgomery coordinates.
+typedef point_proj point_proj_t[1]; 
+
+#ifdef COMPRESS
+    typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj;  // Point representation in full projective XYZ Montgomery coordinates 
+    typedef point_full_proj point_full_proj_t[1]; 
+
+    typedef struct { f2elm_t x; f2elm_t y; } point_affine;                // Point representation in affine coordinates.
+    typedef point_affine point_t[1]; 
+
+    typedef f2elm_t publickey_t[3];      
+#endif
+
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// 434-bit multiprecision addition, c = a+b
+void mp_add434(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_add434_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// 434-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p
+extern void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c);
+extern void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_sub434_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+void mp_sub434_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+
+// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_subadd434x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Double 2x434-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
+void mp_dblsub434x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void fpcopy434(const digit_t* a, digit_t* c);
+
+// Zeroing a field element, a = 0
+void fpzero434(digit_t* a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool fpequal434_non_constant_time(const digit_t* a, const digit_t* b); 
+
+// Modular addition, c = a+b mod p434
+extern void fpadd434(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpadd434_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular subtraction, c = a-b mod p434
+extern void fpsub434(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpsub434_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular negation, a = -a mod p434        
+extern void fpneg434(digit_t* a);  
+
+// Modular division by two, c = a/2 mod p434.
+void fpdiv2_434(const digit_t* a, digit_t* c);
+
+// Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1].
+void fpcorrection434(digit_t* a);
+
+// 434-bit Montgomery reduction, c = a mod p
+void rdc434_asm(digit_t* ma, digit_t* mc);
+            
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
+void fpmul434_mont(const digit_t* a, const digit_t* b, digit_t* c);
+void mul434_asm(const digit_t* a, const digit_t* b, digit_t* c);
+   
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
+void fpsqr434_mont(const digit_t* ma, digit_t* mc);
+
+// Field inversion, a = a^-1 in GF(p434)
+void fpinv434_mont(digit_t* a);
+
+// Chain to compute (p434-3)/4 using Montgomery arithmetic
+void fpinv434_chain_mont(digit_t* a);
+
+/************ GF(p^2) arithmetic functions *************/
+    
+// Copy of a GF(p434^2) element, c = a
+void fp2copy434(const f2elm_t a, f2elm_t c);
+
+// Zeroing a GF(p434^2) element, a = 0
+void fp2zero434(f2elm_t a);
+
+// GF(p434^2) negation, a = -a in GF(p434^2)
+void fp2neg434(f2elm_t a);
+
+// GF(p434^2) addition, c = a+b in GF(p434^2)
+extern void fp2add434(const f2elm_t a, const f2elm_t b, f2elm_t c);           
+
+// GF(p434^2) subtraction, c = a-b in GF(p434^2)
+extern void fp2sub434(const f2elm_t a, const f2elm_t b, f2elm_t c); 
+
+// GF(p434^2) division by two, c = a/2  in GF(p434^2) 
+void fp2div2_434(const f2elm_t a, f2elm_t c);
+
+// Modular correction, a = a in GF(p434^2)
+void fp2correction434(f2elm_t a);
+            
+// GF(p434^2) squaring using Montgomery arithmetic, c = a^2 in GF(p434^2)
+void fp2sqr434_mont(const f2elm_t a, f2elm_t c);
+ 
+// GF(p434^2) multiplication using Montgomery arithmetic, c = a*b in GF(p434^2)
+void fp2mul434_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+
+// GF(p434^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void fp2inv434_mont(f2elm_t a);
+
+
+#endif
diff --git a/SIKE_sw/src/P434/generic/fp_generic.c b/SIKE_sw/src/P434/generic/fp_generic.c
new file mode 100644
index 0000000..5d585c5
--- /dev/null
+++ b/SIKE_sw/src/P434/generic/fp_generic.c
@@ -0,0 +1,259 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: portable modular arithmetic for P434
+*********************************************************************************************/
+
+#include "../P434_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p434[NWORDS64_FIELD];
+extern const uint64_t p434p1[NWORDS64_FIELD]; 
+extern const uint64_t p434x2[NWORDS64_FIELD];
+extern const uint64_t p434x4[NWORDS64_FIELD];
+
+
+__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p434x2)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p. 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p434x4)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void fpadd434(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p434.
+  // Inputs: a, b in [0, 2*p434-1] 
+  // Output: c in [0, 2*p434-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p434x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p434x2)[i] & mask, carry, c[i]); 
+    }
+} 
+
+
+__inline void fpsub434(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p434.
+  // Inputs: a, b in [0, 2*p434-1] 
+  // Output: c in [0, 2*p434-1] 
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p434x2)[i] & mask, borrow, c[i]); 
+    }
+}
+
+
+__inline void fpneg434(digit_t* a)
+{ // Modular negation, a = -a mod p434.
+  // Input/output: a in [0, 2*p434-1] 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p434x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_434(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p434.
+  // Input : a in [0, 2*p434-1] 
+  // Output: c in [0, 2*p434-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p434
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p434)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+} 
+
+
+void fpcorrection434(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p434)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p434)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result    
+    register digit_t al, ah, bl, bh, temp;
+    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
+
+    al = a & mask_low;                        // Low part
+    ah = a >> (sizeof(digit_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(digit_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                   // C00
+
+    res1 = albl >> (sizeof(digit_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;  
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(digit_t) * 4);
+    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   
+
+    res1 = ahbl >> (sizeof(digit_t) * 4);
+    res2 = albh >> (sizeof(digit_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                   // C10 
+    carry = temp & mask_high; 
+    c[1] ^= (ahbh & mask_high) + carry;       // C11
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
+    unsigned int i, j;
+    digit_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+    
+    for (i = 0; i < nwords; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+
+    for (i = nwords; i < 2*nwords-1; i++) {
+        for (j = i-nwords+1; j < nwords; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+    c[2*nwords-1] = v; 
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
+  // mc = ma*R^-1 mod p434x2, where R = 2^448.
+  // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = p434_ZERO_WORDS;
+    digit_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-p434_ZERO_WORDS+1)) { 
+                MUL(mc[j], ((digit_t*)p434p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry; 
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }    
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) { 
+                MUL(mc[j], ((digit_t*)p434p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); 
+    mc[NWORDS_FIELD-1] = v;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P503/AMD64/fp_x64.c b/SIKE_sw/src/P503/AMD64/fp_x64.c
new file mode 100644
index 0000000..ead9e6c
--- /dev/null
+++ b/SIKE_sw/src/P503/AMD64/fp_x64.c
@@ -0,0 +1,572 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: modular arithmetic optimized for x64 platforms for P503
+*********************************************************************************************/
+
+#include "../P503_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p503[NWORDS_FIELD];
+extern const uint64_t p503p1[NWORDS_FIELD];
+extern const uint64_t p503x2[NWORDS_FIELD];
+extern const uint64_t p503x4[NWORDS_FIELD];
+
+
+__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p503x2)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub503_p2_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p503x4)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub503_p4_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p503.
+  // Inputs: a, b in [0, 2*p503-1] 
+  // Output: c in [0, 2*p503-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p503x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p503x2)[i] & mask, carry, c[i]); 
+    } 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpadd503_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p503.
+  // Inputs: a, b in [0, 2*p503-1] 
+  // Output: c in [0, 2*p503-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p503x2)[i] & mask, borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpsub503_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpneg503(digit_t* a)
+{ // Modular negation, a = -a mod p503.
+  // Input/output: a in [0, 2*p503-1] 
+    unsigned int i, borrow = 0;
+    
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p503x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_503(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p503.
+  // Input : a in [0, 2*p503-1] 
+  // Output: c in [0, 2*p503-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p503
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p503)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+}  
+
+
+void fpcorrection503(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p503)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p503)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+        
+    (void)nwords;
+
+#if (OS_TARGET == OS_WIN)
+    digit_t t = 0;
+    uint128_t uv = {0};
+    unsigned int carry = 0;
+        
+    MULADD128(a[0], b[0], uv, carry, uv);
+    t += carry;
+    c[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[0], uv, carry, uv);
+    t += carry;
+    c[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[0], uv, carry, uv);
+    t += carry;
+    c[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[0], uv, carry, uv);
+    t += carry;
+    c[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[0], uv, carry, uv);
+    t += carry;
+    c[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[0], uv, carry, uv);
+    t += carry;
+    c[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[0], uv, carry, uv);
+    t += carry;
+    c[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[0], uv, carry, uv);
+    t += carry;
+    c[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[7], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[7], uv, carry, uv);
+    t += carry;
+    c[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[7], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[7], uv, carry, uv);
+    t += carry;
+    c[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[7], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[7], uv, carry, uv);
+    t += carry;
+    c[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[7], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[7], uv, carry, uv);
+    t += carry;
+    c[11] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;    
+    
+    MULADD128(a[7], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[7], uv, carry, uv);
+    t += carry;
+    c[12] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[7], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[7], uv, carry, uv);
+    t += carry;
+    c[13] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    
+    MULADD128(a[7], b[7], uv, carry, uv);
+    c[14] = uv[0];
+    c[15] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+    
+    mul503_asm(a, b, c);
+
+#endif
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Montgomery reduction exploiting special form of the prime.
+  // mc = ma*R^-1 mod p503x2, where R = 2^512.
+  // If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
+  // ma is assumed to be in Montgomery representation.
+        
+#if (OS_TARGET == OS_WIN)
+    unsigned int carry;
+    digit_t t = 0;
+    uint128_t uv = {0};
+    
+    mc[0] = ma[0];
+    mc[1] = ma[1];
+    mc[2] = ma[2];
+    MUL128(mc[0], ((digit_t*)p503p1)[3], uv);
+    ADDC(0, uv[0], ma[3], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = 0;
+
+    MULADD128(mc[0], ((digit_t*)p503p1)[4], uv, carry, uv);
+    MULADD128(mc[1], ((digit_t*)p503p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[4], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p503p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p503p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[5], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p503p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p503p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[6], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p503p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p503p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[7], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[1], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p503p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p503p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[8], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[2], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p503p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p503p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[9], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[3], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p503p1)[4], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p503p1)[3], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[10], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[4], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p503p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[11], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[5], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p503p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[12], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[6], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p503p1)[6], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[13], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[7], ((digit_t*)p503p1)[7], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[14], carry, mc[6]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    ADDC(0, uv[1], ma[15], carry, mc[7]); 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    rdc503_asm(ma, mc);    
+
+#endif
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P503/AMD64/fp_x64_asm.S b/SIKE_sw/src/P503/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000..82ccd72
--- /dev/null
+++ b/SIKE_sw/src/P503/AMD64/fp_x64_asm.S
@@ -0,0 +1,1824 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny cryptography library 
+// Copyright (c) Microsoft Corporation
+//
+// Website: https://github.com/microsoft/PQCrypto-SIDH
+// Released under MIT license 
+//
+// Abstract: field arithmetic in x64 assembly for P503 on Linux
+//*******************************************************************************************  
+
+.intel_syntax noprefix
+
+// Format function and variable names for Mac OS X
+#if defined(__APPLE__)
+    #define fmt(f)    _##f
+#else
+    #define fmt(f)    f
+#endif
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+// Define addition instructions
+#ifdef _MULX_
+#ifdef _ADX_
+
+#define ADD1    adox
+#define ADC1    adox
+#define ADD2    adcx
+#define ADC2    adcx
+
+#else
+
+#define ADD1    add
+#define ADC1    adc
+#define ADD2    add
+#define ADC2    adc
+
+#endif   
+#endif
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpadd503_asm)
+fmt(fpadd503_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+
+  mov    rcx, [rip+fmt(p503x2)]
+  sub    r8, rcx
+  mov    rcx, [rip+fmt(p503x2)+8]
+  sbb    r9, rcx
+  sbb    r10, rcx
+  mov    rcx, [rip+fmt(p503x2)+24]
+  sbb    r11, rcx
+  mov    rcx, [rip+fmt(p503x2)+32]
+  sbb    r12, rcx
+  mov    rcx, [rip+fmt(p503x2)+40]
+  sbb    r13, rcx
+  mov    rcx, [rip+fmt(p503x2)+48]
+  sbb    r14, rcx
+  mov    rcx, [rip+fmt(p503x2)+56]
+  sbb    r15, rcx
+  sbb    rax, 0
+  
+  mov    rdi, [rip+fmt(p503x2)]
+  and    rdi, rax
+  mov    rsi, [rip+fmt(p503x2)+8]
+  and    rsi, rax
+  mov    rcx, [rip+fmt(p503x2)+24]
+  and    rcx, rax
+  
+  add    r8, rdi  
+  adc    r9, rsi  
+  adc    r10, rsi 
+  adc    r11, rcx 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  setc   cl
+
+  mov    r8, [rip+fmt(p503x2)+32]
+  and    r8, rax
+  mov    r9, [rip+fmt(p503x2)+40]
+  and    r9, rax
+  mov    r10, [rip+fmt(p503x2)+48]
+  and    r10, rax
+  mov    r11, [rip+fmt(p503x2)+56]
+  and    r11, rax
+  
+  bt     rcx, 0
+  adc    r12, r8   
+  adc    r13, r9  
+  adc    r14, r10  
+  adc    r15, r11  
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13 
+  mov    [reg_p3+48], r14 
+  mov    [reg_p3+56], r15 
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpsub503_asm)
+fmt(fpsub503_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  sbb    rax, 0
+  
+  mov    rdi, [rip+fmt(p503x2)]
+  and    rdi, rax
+  mov    rsi, [rip+fmt(p503x2)+8]
+  and    rsi, rax
+  mov    rcx, [rip+fmt(p503x2)+24]
+  and    rcx, rax
+  
+  add    r8, rdi  
+  adc    r9, rsi  
+  adc    r10, rsi 
+  adc    r11, rcx 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  setc   cl
+
+  mov    r8, [rip+fmt(p503x2)+32]
+  and    r8, rax
+  mov    r9, [rip+fmt(p503x2)+40]
+  and    r9, rax
+  mov    r10, [rip+fmt(p503x2)+48]
+  and    r10, rax
+  mov    r11, [rip+fmt(p503x2)+56]
+  and    r11, rax
+  
+  bt     rcx, 0
+  adc    r12, r8   
+  adc    r13, r9  
+  adc    r14, r10  
+  adc    r15, r11  
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13 
+  mov    [reg_p3+48], r14 
+  mov    [reg_p3+56], r15 
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+///////////////////////////////////////////////////////////////// MACRO
+.macro SUB503_PX  P0
+  push   r12
+  push   r13
+  push   r14
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    rcx, [reg_p1+56]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    rcx, [reg_p2+56]
+
+  mov    rax, [rip+\P0]
+  mov    rdi, [rip+\P0+8]
+  mov    rsi, [rip+\P0+24]
+  add    r8, rax
+  mov    rax, [rip+\P0+32]  
+  adc    r9, rdi  
+  adc    r10, rdi 
+  adc    r11, rsi 
+  adc    r12, rax
+  mov    rdi, [rip+\P0+40]
+  mov    rsi, [rip+\P0+48]
+  mov    rax, [rip+\P0+56]
+  adc    r13, rdi  
+  adc    r14, rsi
+  adc    rcx, rax   
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], rcx
+  
+  pop    r14
+  pop    r13
+  pop    r12
+  .endm
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 2*p503
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p503
+//*********************************************************************** 
+.global fmt(mp_sub503_p2_asm)
+fmt(mp_sub503_p2_asm):
+
+  SUB503_PX  fmt(p503x2)
+  ret
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 4*p503
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p503
+//*********************************************************************** 
+.global fmt(mp_sub503_p4_asm)
+fmt(mp_sub503_p4_asm):
+
+  SUB503_PX  fmt(p503x4)
+  ret
+
+
+#ifdef _MULX_
+    
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C and regs T1, T2, T3, T4, T8
+// Temps:   regs T0:T9
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    xor    rax, rax   
+    adox   \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adox   \T1, \T3        
+    mulx   \T3, \T4, 24\M1   // T3:T4 = A0*B3
+    adox   \T2, \T4 
+           
+    mov    rdx, 8\M0
+    mulx   \T5, \T4, \M1     // T5:T4 = A1*B0
+    adox   \T3, rax 
+    xor    rax, rax   
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A1*B1
+    adox   \T4, \T0
+    mov    8\C, \T4          // C1_final  
+    adcx   \T5, \T7      
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A1*B2
+    adcx   \T6, \T8  
+    adox   \T5, \T1      
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A1*B3
+    adcx   \T7, \T9        
+    adcx   \T8, rax   
+    adox   \T6, \T2
+    
+    mov    rdx, 16\M0
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    adox   \T7, \T3
+    adox   \T8, rax
+    xor    rax, rax 
+    mulx   \T2, \T3, 8\M1    // T2:T3 = A2*B1
+    adox   \T0, \T5   
+    mov    16\C, \T0         // C2_final 
+    adcx   \T1, \T3    
+    mulx   \T3, \T4, 16\M1   // T3:T4 = A2*B2
+    adcx   \T2, \T4 
+    adox   \T1, \T6       
+    mulx   \T4,\T9, 24\M1    // T3:T4 = A2*B3
+    adcx   \T3, \T9        
+    mov    rdx, 24\M0
+    adcx   \T4, rax         
+
+    adox   \T2, \T7
+    adox   \T3, \T8
+    adox   \T4, rax
+
+    mulx   \T5, \T0, \M1     // T5:T0 = A3*B0
+    xor    rax, rax 
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A3*B1
+    adcx   \T5, \T7 
+    adox   \T1, \T0       
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A3*B2
+    adcx   \T6, \T8  
+    adox   \T2, \T5      
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A3*B3
+    adcx   \T7, \T9        
+    adcx   \T8, rax         
+
+    adox   \T3, \T6
+    adox   \T4, \T7
+    adox   \T8, rax
+.endm 
+
+#else
+
+.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    add    \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adc    \T1, \T3         
+    mulx   \T3, \T4, 24\M1   // T3:T4 = A0*B3
+    adc    \T2, \T4        
+    mov    rdx, 8\M0
+    adc    \T3, 0         
+
+    mulx   \T5, \T4, \M1     // T5:T4 = A1*B0
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A1*B1
+    add    \T5, \T7        
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A1*B2
+    adc    \T6, \T8        
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A1*B3
+    adc    \T7, \T9        
+    adc    \T8, 0         
+
+    add    \T4, \T0
+    mov    8\C, \T4          // C1_final
+    adc    \T5, \T1
+    adc    \T6, \T2
+    adc    \T7, \T3
+    mov    rdx, 16\M0
+    adc    \T8, 0
+
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    mulx   \T2, \T3, 8\M1    // T2:T3 = A2*B1
+    add    \T1, \T3        
+    mulx   \T3, \T4, 16\M1   // T3:T4 = A2*B2
+    adc    \T2, \T4        
+    mulx   \T4,\T9, 24\M1    // T3:T4 = A2*B3
+    adc    \T3, \T9        
+    mov    rdx, 24\M0
+    adc    \T4, 0          
+
+    add    \T0, \T5
+    mov    16\C, \T0         // C2_final
+    adc    \T1, \T6
+    adc    \T2, \T7
+    adc    \T3, \T8
+    adc    \T4, 0
+
+    mulx   \T5, \T0, \M1     // T5:T0 = A3*B0
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A3*B1
+    add    \T5, \T7        
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A3*B2
+    adc    \T6, \T8        
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A3*B3
+    adc    \T7, \T9         
+    adc    \T8, 0         
+
+    add    \T1, \T0
+    adc    \T2, \T5
+    adc    \T3, \T6
+    adc    \T4, \T7
+    adc    \T8, 0
+.endm
+#endif
+
+
+//*****************************************************************************
+//  503-bit multiplication using Karatsuba (one level), schoolbook (one level)
+//***************************************************************************** 
+.global fmt(mul503_asm)
+fmt(mul503_asm):    
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15
+    mov    rcx, reg_p3 
+
+    // r8-r11 <- AH + AL, rax <- mask
+    xor    rax, rax
+    mov    r8, [reg_p1]
+    mov    r9, [reg_p1+8]
+    mov    r10, [reg_p1+16]
+    mov    r11, [reg_p1+24] 
+    push   rbx 
+    push   rbp
+    sub    rsp, 96
+    add    r8, [reg_p1+32]
+    adc    r9, [reg_p1+40]
+    adc    r10, [reg_p1+48]
+    adc    r11, [reg_p1+56]
+    sbb    rax, 0
+    mov    [rsp], r8
+    mov    [rsp+8], r9
+    mov    [rsp+16], r10
+    mov    [rsp+24], r11
+
+    // r12-r15 <- BH + BL, rbx <- mask
+    xor    rbx, rbx
+    mov    r12, [reg_p2]
+    mov    r13, [reg_p2+8]
+    mov    r14, [reg_p2+16]
+    mov    r15, [reg_p2+24]
+    add    r12, [reg_p2+32]
+    adc    r13, [reg_p2+40]
+    adc    r14, [reg_p2+48]
+    adc    r15, [reg_p2+56]
+    sbb    rbx, 0
+    mov    [rsp+32], r12
+    mov    [rsp+40], r13
+    mov    [rsp+48], r14
+    mov    [rsp+56], r15
+    
+    // r12-r15 <- masked (BH + BL)
+    and    r12, rax
+    and    r13, rax
+    and    r14, rax
+    and    r15, rax
+
+    // r8-r11 <- masked (AH + AL)
+    and    r8, rbx
+    and    r9, rbx
+    and    r10, rbx
+    and    r11, rbx
+
+    // r8-r11 <- masked (AH + AL) + masked (AH + AL)
+    add    r8, r12
+    adc    r9, r13
+    adc    r10, r14
+    adc    r11, r15
+    mov    [rsp+64], r8
+    mov    [rsp+72], r9
+    mov    [rsp+80], r10
+    mov    [rsp+88], r11
+
+    // [rcx+64], r9-r12, rbx <- (AH+AL) x (BH+BL), low part 
+    MUL256_SCHOOL  [rsp], [rsp+32], [rcx+64], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp 
+    mov    [rcx+88], r9  
+    mov    [rcx+96], r10 
+    mov    [rcx+104], r11
+    mov    [rcx+112], r12
+    mov    [rcx+120], rbx
+
+    // [rcx], r9-r12, rbx <- AL x BL
+    MUL256_SCHOOL  [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp     // Result C0-C3 
+    mov    [rcx+24], r9  
+    mov    [rcx+32], r10 
+    mov    [rcx+40], r11
+    mov    [rcx+48], r12
+    mov    [rcx+56], rbx
+
+    // [rsp], rbx, rbp, r13-r15 <- AH x BH 
+    MUL256_SCHOOL  [reg_p1+32], [reg_p2+32], [rsp], r8, rbx, rbp, r13, r14, r9, r10, r11, r15, r12
+    
+    // r8-r11 <- (AH+AL) x (BH+BL), final step
+    mov    r8, [rsp+64]
+    mov    r9, [rsp+72]
+    mov    r10, [rsp+80]
+    mov    r11, [rsp+88]
+    mov    rax, [rcx+96]
+    add    r8, rax
+    mov    rax, [rcx+104]
+    adc    r9, rax
+    mov    rax, [rcx+112]
+    adc    r10, rax
+    mov    rax, [rcx+120]
+    adc    r11, rax
+    
+    // r8-r12, rdi, rsi, rdx <- (AH+AL) x (BH+BL) - ALxBL
+    mov    r12, [rcx+64]
+    mov    rdi, [rcx+72]
+    mov    rsi, [rcx+80]
+    mov    rdx, [rcx+88]
+    sub    r12, [rcx]
+    sbb    rdi, [rcx+8]
+    sbb    rsi, [rcx+16]
+    sbb    rdx, [rcx+24]
+    sbb    r8, [rcx+32]
+    sbb    r9, [rcx+40]
+    sbb    r10, [rcx+48]
+    sbb    r11, [rcx+56]
+    
+    // r8-r12, rdi, rsi, rdx <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+    sub    r12, [rsp]
+    sbb    rdi, [rsp+8]
+    sbb    rsi, [rsp+16]
+    sbb    rdx, rbx
+    sbb    r8, rbp
+    sbb    r9, r13
+    sbb    r10, r14
+    sbb    r11, r15
+    
+    add    r12, [rcx+32]
+    mov    [rcx+32], r12    // Result C4-C7
+    adc    rdi, [rcx+40]
+    mov    [rcx+40], rdi 
+    adc    rsi, [rcx+48]
+    mov    [rcx+48], rsi 
+    adc    rdx, [rcx+56]
+    mov    [rcx+56], rdx 
+    mov    rax, [rsp]
+    adc    r8, rax 
+    mov    [rcx+64], r8    // Result C8-C15
+    mov    rax, [rsp+8]
+    adc    r9, rax
+    mov    [rcx+72], r9 
+    mov    rax, [rsp+16]
+    adc    r10, rax
+    mov    [rcx+80], r10 
+    adc    r11, rbx
+    mov    [rcx+88], r11
+    adc    rbp, 0
+    mov    [rcx+96], rbp
+    adc    r13, 0
+    mov    [rcx+104], r13
+    adc    r14, 0
+    mov    [rcx+112], r14
+    adc    r15, 0
+    mov    [rcx+120], r15  
+    
+    add    rsp, 96    
+    pop    rbp  
+    pop    rbx
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+#else
+
+//***********************************************************************
+//  Integer multiplication
+//  Based on Karatsuba method
+//  Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
+//  NOTE: a=c or b=c are not allowed
+//***********************************************************************
+.global fmt(mul503_asm)
+fmt(mul503_asm):
+  push   r12
+  push   r13
+  push   r14
+  mov    rcx, reg_p3
+  
+  // rcx[0-3] <- AH+AL
+  xor    rax, rax
+  mov    r8, [reg_p1+32]
+  mov    r9, [reg_p1+40]
+  mov    r10, [reg_p1+48]
+  mov    r11, [reg_p1+56]
+  add    r8, [reg_p1] 
+  adc    r9, [reg_p1+8] 
+  adc    r10, [reg_p1+16] 
+  adc    r11, [reg_p1+24] 
+  push   r15  
+  mov    [rcx], r8
+  mov    [rcx+8], r9
+  mov    [rcx+16], r10
+  mov    [rcx+24], r11
+  sbb    rax, 0 
+  sub    rsp, 80           // Allocating space in stack
+       
+  // r12-r15 <- BH+BL
+  xor    rdx, rdx
+  mov    r12, [reg_p2+32]
+  mov    r13, [reg_p2+40]
+  mov    r14, [reg_p2+48]
+  mov    r15, [reg_p2+56]
+  add    r12, [reg_p2] 
+  adc    r13, [reg_p2+8] 
+  adc    r14, [reg_p2+16] 
+  adc    r15, [reg_p2+24] 
+  sbb    rdx, 0 
+  mov    [rsp+64], rax
+  mov    [rsp+72], rdx
+  
+  // (rsp[0-3],r8,r9,r10,r11) <- (AH+AL)*(BH+BL)
+  mov    rax, [rcx]
+  mul    r12
+  mov    [rsp], rax        // c0
+  mov    r8, rdx
+  
+  xor    r9, r9
+  mov    rax, [rcx]
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  
+  xor    r10, r10
+  mov    rax, [rcx+8] 
+  mul    r12
+  add    r8, rax
+  mov    [rsp+8], r8       // c1 
+  adc    r9, rdx
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rcx] 
+  mul    r14
+  add    r9, rax 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+16] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+8] 
+  mul    r13
+  add    r9, rax
+  mov    [rsp+16], r9      // c2 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rcx] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+24] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+8] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+16] 
+  mul    r13
+  add    r10, rax
+  mov    [rsp+24], r10     // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rcx+8] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+24] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+16] 
+  mul    r14
+  add    r8, rax
+  mov    [rsp+32], r8      // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r11, r11
+  mov    rax, [rcx+16]
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r11, 0
+
+  mov    rax, [rcx+24] 
+  mul    r14
+  add    r9, rax          // c5 
+  adc    r10, rdx
+  adc    r11, 0
+
+  mov    rax, [rcx+24] 
+  mul    r15
+  add    r10, rax         // c6 
+  adc    r11, rdx         // c7 
+  
+  mov    rax, [rsp+64]
+  and    r12, rax
+  and    r13, rax
+  and    r14, rax
+  and    r15, rax
+  add    r12, r8
+  adc    r13, r9
+  adc    r14, r10
+  adc    r15, r11
+
+  mov    rax, [rsp+72]  
+  mov    r8, [rcx]
+  mov    r9, [rcx+8]
+  mov    r10, [rcx+16]
+  mov    r11, [rcx+24]
+  and    r8, rax
+  and    r9, rax
+  and    r10, rax
+  and    r11, rax
+  add    r8, r12
+  adc    r9, r13
+  adc    r10, r14
+  adc    r11, r15
+  mov    [rsp+32], r8
+  mov    [rsp+40], r9
+  mov    [rsp+48], r10
+  mov    [rsp+56], r11
+  
+  // rcx[0-7] <- AL*BL
+  mov    r11, [reg_p1]
+  mov    rax, [reg_p2] 
+  mul    r11
+  xor    r9, r9
+  mov    [rcx], rax        // c0
+  mov    r8, rdx
+  
+  mov    r14, [reg_p1+16] 
+  mov    rax, [reg_p2+8]
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+8] 
+  mov    rax, [reg_p2] 
+  mul    r12
+  add    r8, rax
+  mov    [rcx+8], r8       // c1 
+  adc    r9, rdx
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+16] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r13, [reg_p2] 
+  mov    rax, r14 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+8] 
+  mul    r12
+  add    r9, rax
+  mov    [rcx+16], r9      // c2 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+24] 
+  mul    r11
+  mov    r15, [reg_p1+24] 
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, r15 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+8] 
+  mul    r14
+  add    r10, rax
+  mov    [rcx+24], r10     // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+24] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+8] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r14
+  add    r8, rax
+  mov    [rcx+32], r8     // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+24]
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [reg_p2+16] 
+  mul    r15
+  add    r9, rax
+  mov    [rcx+40], r9      // c5 
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [reg_p2+24] 
+  mul    r15
+  add    r10, rax
+  mov    [rcx+48], r10     // c6 
+  adc    r8, rdx   
+  mov    [rcx+56], r8      // c7 
+
+  // rcx[8-15] <- AH*BH
+  mov    r11, [reg_p1+32]
+  mov    rax, [reg_p2+32] 
+  mul    r11
+  xor    r9, r9
+  mov    [rcx+64], rax     // c0
+  mov    r8, rdx
+  
+  mov    r14, [reg_p1+48] 
+  mov    rax, [reg_p2+40]
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+40] 
+  mov    rax, [reg_p2+32] 
+  mul    r12
+  add    r8, rax
+  mov    [rcx+72], r8      // c1 
+  adc    r9, rdx
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+48] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r13, [reg_p2+32] 
+  mov    rax, r14 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+40] 
+  mul    r12
+  add    r9, rax
+  mov    [rcx+80], r9      // c2 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+56] 
+  mul    r11
+  mov    r15, [reg_p1+56] 
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, r15 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+48] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+40] 
+  mul    r14
+  add    r10, rax
+  mov    [rcx+88], r10     // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+56] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+40] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+48] 
+  mul    r14
+  add    r8, rax
+  mov    [rcx+96], r8      // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+56]
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [reg_p2+48] 
+  mul    r15
+  add    r9, rax
+  mov    [rcx+104], r9     // c5 
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [reg_p2+56] 
+  mul    r15
+  add    r10, rax
+  mov    [rcx+112], r10    // c6 
+  adc    r8, rdx   
+  mov    [rcx+120], r8     // c7 
+      
+  // [r8-r15] <- (AH+AL)*(BH+BL) - AL*BL 
+  mov    r8,  [rsp]
+  sub    r8,  [rcx] 
+  mov    r9,  [rsp+8]
+  sbb    r9,  [rcx+8]
+  mov    r10, [rsp+16]
+  sbb    r10, [rcx+16]
+  mov    r11, [rsp+24]
+  sbb    r11, [rcx+24] 
+  mov    r12, [rsp+32]
+  sbb    r12, [rcx+32]
+  mov    r13, [rsp+40]
+  sbb    r13, [rcx+40] 
+  mov    r14, [rsp+48]
+  sbb    r14, [rcx+48] 
+  mov    r15, [rsp+56]
+  sbb    r15, [rcx+56]
+      
+  // [r8-r15] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH
+  mov    rax, [rcx+64]
+  sub    r8,  rax 
+  mov    rax, [rcx+72]
+  sbb    r9,  rax
+  mov    rax, [rcx+80]
+  sbb    r10, rax
+  mov    rax, [rcx+88]
+  sbb    r11, rax 
+  mov    rax, [rcx+96]
+  sbb    r12, rax
+  mov    rdx, [rcx+104]
+  sbb    r13, rdx
+  mov    rdi, [rcx+112]
+  sbb    r14, rdi 
+  mov    rsi, [rcx+120]
+  sbb    r15, rsi 
+      
+  // Final result
+  add    r8,  [rcx+32] 
+  mov    [rcx+32], r8
+  adc    r9,  [rcx+40]
+  mov    [rcx+40], r9
+  adc    r10, [rcx+48]
+  mov    [rcx+48], r10
+  adc    r11, [rcx+56]
+  mov    [rcx+56], r11
+  adc    r12, [rcx+64]
+  mov    [rcx+64], r12
+  adc    r13, [rcx+72]
+  mov    [rcx+72], r13
+  adc    r14, [rcx+80] 
+  mov    [rcx+80], r14
+  adc    r15, [rcx+88] 
+  mov    [rcx+88], r15
+  adc    rax, 0
+  mov    [rcx+96], rax
+  adc    rdx, 0
+  mov    [rcx+104], rdx
+  adc    rdi, 0
+  mov    [rcx+112], rdi
+  adc    rsi, 0
+  mov    [rcx+120], rsi
+    
+  add    rsp, 80           // Restoring space in stack
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+#endif
+
+
+#ifdef _MULX_
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  regs I0 and I1, and memory pointer M1
+// Outputs: regs T0:T5
+// Temps:   regs T0:T5
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    ADD1   \T1, \T4               
+    ADC1   \T2, \T5     
+    mulx   \T4, \T5, 24\M1
+    ADC1   \T3, \T5 
+    ADC1   \T4, rax   
+    
+    xor    rax, rax
+    mov    rdx, \I1 
+    mulx   \I1, \T5, \M1 
+    ADD2   \T1, \T5            // T1 <- C1_final 
+    ADC2   \T2, \I1     
+    mulx   \T5, \I1, 8\M1
+    ADC2   \T3, \T5 
+    ADD1   \T2, \I1        
+    mulx   \T5, \I1, 16\M1
+    ADC2   \T4, \T5 
+    ADC1   \T3, \I1     
+    mulx   \T5, \I1, 24\M1   
+    ADC2   \T5, rax         
+    ADC1   \T4, \I1  
+    ADC1   \T5, rax 
+.endm
+
+#else
+
+.macro MUL128x256_SCHOOL I0, I1, M1, T0, T1, T2, T3, T4, T5 
+    mulx   \T2, \T4, 8\M1
+    mulx   \T3, \T5, 16\M1 
+    add    \T1, \T4               
+    adc    \T2, \T5     
+    mulx   \T4, \T5, 24\M1
+    adc    \T3, \T5 
+    adc    \T4, 0   
+    
+    mov    rdx, \I1 
+    mulx   \I1, \T5, \M1 
+    add    \T1, \T5            // T1 <- C1_final 
+    adc    \T2, \I1     
+    mulx   \T5, \I1, 8\M1
+    adc    \T3, \T5       
+    mulx   \T5, rax, 16\M1
+    adc    \T4, \T5     
+    mulx   \T5, rdx, 24\M1 
+    adc    \T5, 0
+    add    \T2, \I1  
+    adc    \T3, rax        
+    adc    \T4, rdx  
+    adc    \T5, 0 
+.endm
+#endif
+
+  
+//**************************************************************************************
+//  Montgomery reduction
+//  Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015  
+//  Operation: c [reg_p2] = a [reg_p1]
+//************************************************************************************** 
+.global fmt(rdc503_asm)
+fmt(rdc503_asm):
+
+    // a[0-1] x 64xp503p1_nz --> result: r8:r13  
+    mov    rdx, [reg_p1]
+    mov    rcx, [reg_p1+8]  
+    mulx   r9, r8, [rip+fmt(p503p1x64)]   // result r8  
+    push   rbx
+    push   rbp
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15
+    MUL128x256_SCHOOL rdx, rcx, [rip+fmt(p503p1x64)], r8, r9, r10, r11, r12, r13     
+
+    xor    r15, r15
+    shrd   r15, r8, 6 
+    shrd   r8, r9, 6 
+    shrd   r9, r10, 6 
+    shrd   r10, r11, 6 
+    shrd   r11, r12, 6 
+    shrd   r12, r13, 6 
+    shr    r13, 6
+    mov    rdx, [reg_p1+16] 
+    mov    r14, [reg_p1+80] 
+    add    r15, [reg_p1+24]
+    adc    r8, [reg_p1+32]  
+    adc    r9, [reg_p1+40]  
+    adc    r10, [reg_p1+48]   
+    adc    r11, [reg_p1+56]   
+    adc    r12, [reg_p1+64]   
+    adc    r13, [reg_p1+72]  
+    mulx   rbx, rcx, [rip+fmt(p503p1x64)]   // result rcx
+    adc    r14, 0
+    mov    [reg_p2], r8  
+    mov    [reg_p2+8], r9  
+    mov    [reg_p2+16], r10  
+    mov    [reg_p2+24], r11  
+    mov    [reg_p2+32], r12   
+    mov    [reg_p2+40], r13    
+    mov    [reg_p2+48], r14 
+    mov    r9, [reg_p1+88]  
+    mov    r10, [reg_p1+96]  
+    mov    r11, [reg_p1+104]  
+    mov    r12, [reg_p1+112]
+    mov    rdi, [reg_p1+120]
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    adc    r12, 0
+    adc    rdi, 0
+
+    // a[2-3] x 64xp503p1_nz --> result: rcx, rbx, rbp, r14, r8, r13
+    MUL128x256_SCHOOL rdx, r15, [rip+fmt(p503p1x64)], rcx, rbx, rbp, r14, r8, r13 
+
+    xor    r15, r15
+    shrd   r15, rcx, 6 
+    shrd   rcx, rbx, 6 
+    shrd   rbx, rbp, 6 
+    shrd   rbp, r14, 6 
+    shrd   r14, r8, 6  
+    shrd   r8, r13, 6 
+    shr    r13, 6
+    mov    rdx, [reg_p2]
+    add    r15, [reg_p2+8]
+    adc    rcx, [reg_p2+16]  
+    adc    rbx, [reg_p2+24]  
+    adc    rbp, [reg_p2+32]   
+    adc    r14, [reg_p2+40]  
+    adc    r8, [reg_p2+48]
+    mov    [reg_p2+16], rcx  
+    mov    [reg_p2+24], rbx  
+    mov    [reg_p2+32], rbp   
+    mov    [reg_p2+40], r14 
+    mov    [reg_p2+48], r8 
+    mulx   rbp, rbx, [rip+fmt(p503p1x64)]   // result rbx    
+    adc    r9, r13 
+    adc    r10, 0
+    adc    r11, 0
+    adc    r12, 0
+    adc    rdi, 0
+
+    // a[4-5] x 64xp503p1_nz --> result: rbx, rbp, r14, r8, r13, rcx
+    MUL128x256_SCHOOL rdx, r15, [rip+fmt(p503p1x64)], rbx, rbp, r14, r8, r13, rcx  
+
+    xor    r15, r15
+    shrd   r15, rbx, 6 
+    shrd   rbx, rbp, 6 
+    shrd   rbp, r14, 6 
+    shrd   r14, r8, 6 
+    shrd   r8, r13, 6 
+    shrd   r13, rcx, 6 
+    shr    rcx, 6
+    mov    rdx, [reg_p2+16]
+    add    r15, [reg_p2+24]
+    adc    rbx, [reg_p2+32]  
+    adc    rbp, [reg_p2+40]  
+    adc    r14, [reg_p2+48]  
+    mov    [reg_p2], rbx              // Final result c0
+    mov    [reg_p2+8], rbp   
+    mov    [reg_p2+16], r14
+    adc    r9, r8 
+    adc    r10, r13  
+    mulx   r14, rbp, [rip+fmt(p503p1x64)]   // result rbp  
+    adc    r11, rcx
+    adc    r12, 0
+    adc    rdi, 0
+
+    // a[6-7] x 64xp503p1_nz --> result: rbp, r14, r8, r13, rcx, rbx
+    MUL128x256_SCHOOL rdx, r15, [rip+fmt(p503p1x64)], rbp, r14, r8, r13, rcx, rbx  
+    
+    xor    r15, r15
+    shrd   r15, rbp, 6 
+    shrd   rbp, r14, 6 
+    shrd   r14, r8, 6 
+    shrd   r8, r13, 6 
+    shrd   r13, rcx, 6 
+    shrd   rcx, rbx, 6 
+    shr    rbx, 6 
+    add    r15, [reg_p2+8]
+    adc    rbp, [reg_p2+16] 
+    mov    [reg_p2+8], r15       // Final result c1-c7
+    mov    [reg_p2+16], rbp  
+    adc    r9, r14 
+    adc    r10, r8 
+    adc    r11, r13
+    adc    r12, rcx
+    adc    rdi, rbx    
+    mov    [reg_p2+24], r9  
+    mov    [reg_p2+32], r10  
+    mov    [reg_p2+40], r11   
+    mov    [reg_p2+48], r12
+    mov    [reg_p2+56], rdi
+
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    pop    rbp
+    pop    rbx
+    ret
+    
+  #else
+  
+//***********************************************************************
+//  Montgomery reduction
+//  Based on comba method
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//*********************************************************************** 
+.global fmt(rdc503_asm)
+fmt(rdc503_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15 
+
+  mov    r11, [reg_p1]
+  mov    rax, [rip+fmt(p503p1)+24] 
+  mul    r11
+  xor    r8, r8
+  add    rax, [reg_p1+24]
+  mov    [reg_p2+24], rax    // z3
+  adc    r8, rdx
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p503p1)+32] 
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+8]
+  mov    rax, [rip+fmt(p503p1)+24] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+32]
+  mov    [reg_p2+32], r8    // z4
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p503p1)+40] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p503p1)+32] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    r13, [reg_p1+16]
+  mov    rax, [rip+fmt(p503p1)+24] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+40]
+  mov    [reg_p2+40], r9    // z5
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p503p1)+40] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p503p1)+32]
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r14, [reg_p2+24]
+  mov    rax, [rip+fmt(p503p1)+24] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+48]
+  mov    [reg_p2+48], r10   // z6
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p503p1)+40] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p503p1)+32] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r15, [reg_p2+32]
+  mov    rax, [rip+fmt(p503p1)+24] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+56]
+  mov    [reg_p2+56], r8    // z7
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p503p1)+40] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p503p1)+32] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rcx, [reg_p2+40]
+  mov    rax, [rip+fmt(p503p1)+24] 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+64]
+  mov    [reg_p2], r9        // z0
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [rip+fmt(p503p1)+40]
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [rip+fmt(p503p1)+32]
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r13, [reg_p2+48]
+  mov    rax, [rip+fmt(p503p1)+24]
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+72]
+  mov    [reg_p2+8], r10     // z1
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p503p1)+40] 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p503p1)+32] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r14, [reg_p2+56]
+  mov    rax, [rip+fmt(p503p1)+24] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+80]
+  mov    [reg_p2+16], r8     // z2
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p503p1)+40] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p503p1)+32] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+88]
+  mov    [reg_p2+24], r9     // z3
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p503p1)+40] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+96]
+  mov    [reg_p2+32], r10    // z4
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+
+  mov    rax, [rip+fmt(p503p1)+48] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+104]    // z5
+  mov    [reg_p2+40], r8     // z5
+  adc    r9, 0
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p503p1)+56] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  add    r9, [reg_p1+112]    // z6
+  mov    [reg_p2+48], r9     // z6
+  adc    r10, 0  
+  add    r10, [reg_p1+120]   // z7
+  mov    [reg_p2+56], r10    // z7
+
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+  #endif
+
+
+//***********************************************************************
+//  503-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_add503_asm)
+fmt(mp_add503_asm): 
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  
+  mov    r8, [reg_p1+32]
+  mov    r9, [reg_p1+40]
+  mov    r10, [reg_p1+48]
+  mov    r11, [reg_p1+56]
+  adc    r8, [reg_p2+32] 
+  adc    r9, [reg_p2+40] 
+  adc    r10, [reg_p2+48] 
+  adc    r11, [reg_p2+56]
+  mov    [reg_p3+32], r8
+  mov    [reg_p3+40], r9
+  mov    [reg_p3+48], r10
+  mov    [reg_p3+56], r11
+  ret
+
+
+//***********************************************************************
+//  2x503-bit multiprecision subtraction/addition
+//  Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
+//*********************************************************************** 
+.global fmt(mp_subadd503x2_asm)
+fmt(mp_subadd503x2_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+
+  mov    r8, [reg_p1+32]
+  mov    r9, [reg_p1+40]
+  mov    r10, [reg_p1+48] 
+  mov    r11, [reg_p1+56]
+  sbb    r8, [reg_p2+32] 
+  sbb    r9, [reg_p2+40] 
+  sbb    r10, [reg_p2+48]
+  sbb    r11, [reg_p2+56] 
+  mov    [reg_p3+32], r8
+  mov    [reg_p3+40], r9
+  mov    [reg_p3+48], r10
+  mov    [reg_p3+56], r11
+
+  mov    r8, [reg_p1+64]
+  mov    r9, [reg_p1+72]
+  mov    r10, [reg_p1+80] 
+  mov    r11, [reg_p1+88]
+  sbb    r8, [reg_p2+64] 
+  sbb    r9, [reg_p2+72] 
+  sbb    r10, [reg_p2+80]
+  sbb    r11, [reg_p2+88] 
+  mov    [reg_p3+64], r8
+  mov    [reg_p3+72], r9
+  mov    [reg_p3+80], r10
+  mov    [reg_p3+88], r11
+  
+  mov    r12, [reg_p1+96]
+  mov    r13, [reg_p1+104] 
+  mov    r14, [reg_p1+112]
+  mov    r15, [reg_p1+120]
+  sbb    r12, [reg_p2+96]
+  sbb    r13, [reg_p2+104]
+  sbb    r14, [reg_p2+112]  
+  sbb    r15, [reg_p2+120] 
+  sbb    rax, 0
+  
+  // Add p503 anded with the mask in rax 
+  mov    r8, [rip+fmt(p503)]
+  mov    r9, [rip+fmt(p503)+24]
+  mov    r10, [rip+fmt(p503)+32]
+  mov    r11, [rip+fmt(p503)+40]
+  mov    rdi, [rip+fmt(p503)+48]
+  mov    rsi, [rip+fmt(p503)+56]
+  and    r8, rax
+  and    r9, rax
+  and    r10, rax
+  and    r11, rax
+  and    rdi, rax
+  and    rsi, rax
+  mov    rax, [reg_p3+64]
+  add    rax, r8
+  mov    [reg_p3+64], rax
+  mov    rax, [reg_p3+72]
+  adc    rax, r8
+  mov    [reg_p3+72], rax
+  mov    rax, [reg_p3+80]
+  adc    rax, r8
+  mov    [reg_p3+80], rax
+  mov    rax, [reg_p3+88]
+  adc    rax, r9
+  mov    [reg_p3+88], rax
+  adc    r12, r10
+  adc    r13, r11
+  adc    r14, rdi
+  adc    r15, rsi
+  
+  mov    [reg_p3+96], r12
+  mov    [reg_p3+104], r13
+  mov    [reg_p3+112], r14
+  mov    [reg_p3+120], r15
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Double 2x503-bit multiprecision subtraction
+//  Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_dblsub503x2_asm)
+fmt(mp_dblsub503x2_asm):
+  push   r12
+  push   r13
+  
+  mov    r8, [reg_p3]
+  mov    r9, [reg_p3+8]
+  mov    r10, [reg_p3+16]
+  mov    r11, [reg_p3+24]
+  mov    r12, [reg_p3+32]
+  mov    r13, [reg_p3+40]
+  sub    r8, [reg_p1]
+  sbb    r9, [reg_p1+8] 
+  sbb    r10, [reg_p1+16] 
+  sbb    r11, [reg_p1+24] 
+  sbb    r12, [reg_p1+32] 
+  sbb    r13, [reg_p1+40]
+  setc   al 
+  sub    r8, [reg_p2]
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  setc   cl
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  
+  mov    r8, [reg_p3+48]
+  mov    r9, [reg_p3+56]
+  mov    r10, [reg_p3+64]
+  mov    r11, [reg_p3+72]
+  mov    r12, [reg_p3+80]
+  mov    r13, [reg_p3+88]
+  bt     rax, 0  
+  sbb    r8, [reg_p1+48] 
+  sbb    r9, [reg_p1+56]
+  sbb    r10, [reg_p1+64] 
+  sbb    r11, [reg_p1+72] 
+  sbb    r12, [reg_p1+80] 
+  sbb    r13, [reg_p1+88]
+  setc   al 
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+48] 
+  sbb    r9, [reg_p2+56]
+  sbb    r10, [reg_p2+64] 
+  sbb    r11, [reg_p2+72] 
+  sbb    r12, [reg_p2+80] 
+  sbb    r13, [reg_p2+88] 
+  setc   cl
+  mov    [reg_p3+48], r8
+  mov    [reg_p3+56], r9
+  mov    [reg_p3+64], r10
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], r12
+  mov    [reg_p3+88], r13
+    
+  mov    r8, [reg_p3+96]
+  mov    r9, [reg_p3+104]
+  mov    r10, [reg_p3+112]
+  mov    r11, [reg_p3+120]
+  bt     rax, 0  
+  sbb    r8, [reg_p1+96] 
+  sbb    r9, [reg_p1+104] 
+  sbb    r10, [reg_p1+112] 
+  sbb    r11, [reg_p1+120]
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+96] 
+  sbb    r9, [reg_p2+104] 
+  sbb    r10, [reg_p2+112] 
+  sbb    r11, [reg_p2+120]
+  mov    [reg_p3+96], r8
+  mov    [reg_p3+104], r9
+  mov    [reg_p3+112], r10
+  mov    [reg_p3+120], r11
+  
+  pop    r13
+  pop    r12
+  ret
\ No newline at end of file
diff --git a/SIKE_sw/src/P503/P503.c b/SIKE_sw/src/P503/P503.c
new file mode 100644
index 0000000..dd30b4b
--- /dev/null
+++ b/SIKE_sw/src/P503/P503.c
@@ -0,0 +1,138 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P503
+*********************************************************************************************/  
+
+#include "P503_api.h" 
+#include "P503_internal.h"
+#include "../internal.h"
+
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). 
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. 
+// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32.
+// For example, a 503-bit field element is represented with Ceil(503 / 64) = 8 64-bit digits or Ceil(503 / 32) = 16 32-bit digits.
+
+//
+// Curve isogeny system "SIDHp503". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p503^2), where A=6, B=1, C=1 and p503 = 2^250*3^159-1
+//
+         
+const uint64_t p503[NWORDS64_FIELD]              = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF, 
+                                                     0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E };
+const uint64_t p503p1[NWORDS64_FIELD]            = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
+                                                     0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E };
+const uint64_t p503x2[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
+                                                     0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C }; 
+const uint64_t p503x4[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xAFFFFFFFFFFFFFFF, 
+                                                     0x4C216F6888479E82, 0x6E6FDB21EDF9F6BC, 0x81171AF769DE9340, 0x01019BD506047879 };
+const uint64_t p503p1x64[NWORDS64_FIELD/2]       = { 0xC216F6888479E82B, 0xE6FDB21EDF9F6BC4, 0x1171AF769DE93406, 0x1019BD5060478798 };  
+const uint64_t p503x16p[2*NWORDS64_FIELD]        = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 
+                                                     0x9EF484BBBDC30BEA, 0x8C8126F090304A1D, 0xF7472844B10B65FC, 0x30F32157CFDC3C33, 
+                                                     0x1463AB4329A333F7, 0xDFC933977C47D3A4, 0x338A3767F6F2520B, 0x4F8CB7565CCC13FA, 
+                                                     0xDE43B73AACD2189B, 0xBCF845CAC5405FBD, 0x516D02A09E684B7A, 0x0001033A4091BB86 };
+// Order of Alice's subgroup
+const uint64_t Alice_order[NWORDS64_ORDER]       = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0400000000000000 }; 
+// Order of Bob's subgroup
+const uint64_t Bob_order[NWORDS64_ORDER]         = { 0xC216F6888479E82B, 0xE6FDB21EDF9F6BC4, 0x1171AF769DE93406, 0x1019BD5060478798 };
+// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} in GF(p503^2), expressed in Montgomery representation
+const uint64_t A_gen[6*NWORDS64_FIELD]           = { 0x5D083011589AD893, 0xADFD8D2CB67D0637, 0x330C9AC34FFB6361, 0xF0D47489A2E805A2,
+                                                     0x27E2789259C6B8DC, 0x63866A2C121931B9, 0x8D4C65A7137DCF44, 0x003A183AE5967B3F,   // XPA0
+                                                     0x7E3541B8C96D1519, 0xD3ADAEEC0D61A26C, 0xC0A2219CE7703DD9, 0xFF3E46658FCDBC52,
+                                                     0xD5B38DEAE6E196FF, 0x1AAC826364956D58, 0xEC9F4875B9A5F27A, 0x001B0B475AB99843,   // XPA1
+                                                     0x4D83695107D03BAD, 0x221F3299005E2FCF, 0x78E6AE22F30DECF2, 0x6D982DB5111253E4,
+                                                     0x504C80A8AB4526A8, 0xEFD0C3AA210BB024, 0xCB77483501DC6FCF, 0x001052544A96BDF3,   // XQA0
+                                                     0x0D74FE3402BCAE47, 0xDF5B8CDA832D8AED, 0xB86BCF06E4BD837E, 0x892A2933A0FA1F63,
+                                                     0x9F88FC67B6CCB461, 0x822926EA9DDA3AC8, 0xEAC8DDE5855425ED, 0x000618FE6DA37A80,   // XQA1
+                                                     0x1D9D32D2DC877C17, 0x5517CD8F71D5B02B, 0x395AFB8F6B60C117, 0x3AE31AC85F9098C8,
+                                                     0x5F5341C198450848, 0xF8C609DBEA435C6A, 0xD832BC7EDC7BA5E4, 0x002AD98AA6968BF5,   // XRA0
+                                                     0xC466CAB0F73C2E5B, 0x7B1817148FB2CF9C, 0x873E87C099E470A0, 0xBB17AC6D17A7BAC1,
+                                                     0xA146FDCD0F2E2A58, 0x88B311E9CEAB6201, 0x37604CF5C7951757, 0x0006804071C74BF9 }; // XRA1
+// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p503^2), expressed in Montgomery representation
+const uint64_t B_gen[6*NWORDS64_FIELD]           = { 0xDF630FC5FB2468DB, 0xC30C5541C102040E, 0x3CDC9987B76511FC, 0xF54B5A09353D0CDD, 
+                                                     0x3ADBA8E00703C42F, 0x8253F9303DDC95D0, 0x62D30778763ABFD7, 0x001CD00FB581CD55,   // XPB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XPB1
+                                                     0x2E3457A12B429261, 0x311F94E89627DCF8, 0x5B71C98FD1DB73F6, 0x3671DB7DCFC21541, 
+                                                     0xB6D1484C9FE0CF4F, 0x19CD110717356E35, 0xF4F9FB00AC9919DF, 0x0035BC124D38A70B,   // XQB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XQB1
+                                                     0x2E08BB99413D2952, 0xD3021467CD088D72, 0x21017AF859752245, 0x26314ED8FFD9DE5C, 
+                                                     0x4AF43C73344B6686, 0xCFA1F91149DF0993, 0xF327A95365587A89, 0x000DBF54E03D3906,   // XRB0
+                                                     0x03E03FF342F5F304, 0x993D604D7B4B6E56, 0x80412F4D9280E71F, 0x0FFDC9EF990B3982,
+                                                     0xE584E64C51604931, 0x1374F42AC8B0BBD7, 0x07D5BC37DFA41A5F, 0x00396CCFD61FD34C }; // XRB1
+// Montgomery constant Montgomery_R2 = (2^512)^2 mod p503
+const uint64_t Montgomery_R2[NWORDS64_FIELD]     = { 0x5289A0CF641D011F, 0x9B88257189FED2B9, 0xA3B365D58DC8F17A, 0x5BC57AB6EFF168EC,
+                                                     0x9E51998BD84D4423, 0xBF8999CBAC3B5695, 0x46E9127BCE14CDB6, 0x003F6CFCE8B81771 };                                                   
+// Value one in Montgomery representation 
+const uint64_t Montgomery_one[NWORDS64_FIELD]    = { 0x00000000000003F9, 0x0000000000000000, 0x0000000000000000, 0xB400000000000000, 
+                                                     0x63CB1A6EA6DED2B4, 0x51689D8D667EB37D, 0x8ACD77C71AB24142, 0x0026FBAEC60F5953 };
+
+
+// Fixed parameters for isogeny tree computation
+const unsigned int strat_Alice[MAX_Alice-1] = { 
+61, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 
+4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 
+1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 29, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 
+1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 
+1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1 };
+
+const unsigned int strat_Bob[MAX_Bob-1] = { 
+71, 38, 21, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, 
+1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9, 
+5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 
+1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 
+2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, 
+1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 };
+           
+// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions
+#define fpcopy                        fpcopy503
+#define fpzero                        fpzero503
+#define fpadd                         fpadd503
+#define fpsub                         fpsub503
+#define fpneg                         fpneg503
+#define fpdiv2                        fpdiv2_503
+#define fpcorrection                  fpcorrection503
+#define fpmul_mont                    fpmul503_mont
+#define fpsqr_mont                    fpsqr503_mont
+#define fpinv_mont                    fpinv503_mont
+#define fpinv_chain_mont              fpinv503_chain_mont
+#define fp2copy                       fp2copy503
+#define fp2zero                       fp2zero503
+#define fp2add                        fp2add503
+#define fp2sub                        fp2sub503
+#define mp_sub_p2                     mp_sub503_p2
+#define mp_sub_p4                     mp_sub503_p4
+#define sub_p4                        mp_sub_p4
+#define fp2neg                        fp2neg503
+#define fp2div2                       fp2div2_503
+#define fp2correction                 fp2correction503
+#define fp2mul_mont                   fp2mul503_mont
+#define fp2sqr_mont                   fp2sqr503_mont
+#define fp2inv_mont                   fp2inv503_mont
+#define fp2inv_mont_ct                fp2inv503_mont_ct
+#define fp2inv_mont_bingcd            fp2inv503_mont_bingcd
+#define mp_add_asm                    mp_add503_asm
+#define mp_subaddx2_asm               mp_subadd503x2_asm
+#define mp_dblsubx2_asm               mp_dblsub503x2_asm
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp503
+#define crypto_kem_enc                crypto_kem_enc_SIKEp503
+#define crypto_kem_dec                crypto_kem_dec_SIKEp503
+#define random_mod_order_A            random_mod_order_A_SIDHp503
+#define random_mod_order_B            random_mod_order_B_SIDHp503
+#define EphemeralKeyGeneration_A      EphemeralKeyGeneration_A_SIDHp503
+#define EphemeralKeyGeneration_B      EphemeralKeyGeneration_B_SIDHp503
+#define EphemeralSecretAgreement_A    EphemeralSecretAgreement_A_SIDHp503
+#define EphemeralSecretAgreement_B    EphemeralSecretAgreement_B_SIDHp503
+
+#include "../fpx.c"
+#include "../ec_isogeny.c"
+#include "../sidh.c"    
+#include "../sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/src/P503/P503_api.h b/SIKE_sw/src/P503/P503_api.h
new file mode 100644
index 0000000..a4bc296
--- /dev/null
+++ b/SIKE_sw/src/P503/P503_api.h
@@ -0,0 +1,112 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: API header file for P503
+*********************************************************************************************/  
+
+#ifndef P503_API_H
+#define P503_API_H
+    
+
+/*********************** Key encapsulation mechanism API ***********************/
+
+#define CRYPTO_SECRETKEYBYTES     434    // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes
+#define CRYPTO_PUBLICKEYBYTES     378
+#define CRYPTO_BYTES               24
+#define CRYPTO_CIPHERTEXTBYTES    402    // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes  
+
+// Algorithm name
+#define CRYPTO_ALGNAME "SIKEp503"  
+
+// SIKE's key generation
+// It produces a private key sk and computes the public key pk.
+// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 434 bytes)
+//          public key pk (CRYPTO_PUBLICKEYBYTES = 378 bytes) 
+int crypto_kem_keypair_SIKEp503(unsigned char *pk, unsigned char *sk);
+
+// SIKE's encapsulation
+// Input:   public key pk         (CRYPTO_PUBLICKEYBYTES = 378 bytes)
+// Outputs: shared secret ss      (CRYPTO_BYTES = 24 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 402 bytes)
+int crypto_kem_enc_SIKEp503(unsigned char *ct, unsigned char *ss, const unsigned char *pk);
+
+// SIKE's decapsulation
+// Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = 434 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 402 bytes) 
+// Outputs: shared secret ss      (CRYPTO_BYTES = 24 bytes)
+int crypto_kem_dec_SIKEp503(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);
+
+
+// Encoding of keys for KEM-based isogeny system "SIKEp503" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^Floor(Log(2,3^159))-1] and the public key pk. In the SIKE API, 
+// private keys are encoded in 434 octets in little endian format. 
+// Public keys pk consist of 3 elements in GF(p503^2). In the SIKE API, pk is encoded in 378 octets. 
+// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 378 + 24 = 402 octets.  
+// Shared keys ss consist of a value of 24 octets.
+
+
+/*********************** Ephemeral key exchange API ***********************/
+
+#define SIDH_SECRETKEYBYTES_A    32
+#define SIDH_SECRETKEYBYTES_B    32
+#define SIDH_PUBLICKEYBYTES     378
+#define SIDH_BYTES              126
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016.
+// Extended version available at: http://eprint.iacr.org/2016/859  
+
+// Generation of Alice's secret key 
+// Outputs random value in [0, 2^250 - 1] to be used as Alice's private key
+void random_mod_order_A_SIDHp503(unsigned char* random_digits);
+
+// Generation of Bob's secret key 
+// Outputs random value in [0, 2^Floor(Log(2,3^159)) - 1] to be used as Bob's private key
+void random_mod_order_B_SIDHp503(unsigned char* random_digits);
+
+// Alice's ephemeral public key generation
+// Input:  a private key PrivateKeyA in the range [0, 2^250 - 1], stored in 32 bytes. 
+// Output: the public key PublicKeyA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
+int EphemeralKeyGeneration_A_SIDHp503(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. 
+// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
+int EphemeralKeyGeneration_B_SIDHp503(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. 
+//         Bob's PublicKeyB consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p503^2) encoded in 126 bytes.
+int EphemeralSecretAgreement_A_SIDHp503(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. 
+//         Alice's PublicKeyA consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret SharedSecretB that consists of one element in GF(p503^2) encoded in 126 bytes.
+int EphemeralSecretAgreement_B_SIDHp503(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB);
+
+
+// Encoding of keys for KEX-based isogeny system "SIDHp503" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^250-1] and [0, 2^Floor(Log(2,3^159)) - 1], resp. In the SIDH API, 
+// Alice's and Bob's private keys are encoded in 32 octets in little endian format. 
+// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p503^2). In the SIDH API, they are encoded in 378 octets. 
+// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p503^2). In the SIDH API, they are encoded in 126 octets.
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/P503/P503_internal.h b/SIKE_sw/src/P503/P503_internal.h
new file mode 100644
index 0000000..5a42c0f
--- /dev/null
+++ b/SIKE_sw/src/P503/P503_internal.h
@@ -0,0 +1,175 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for P503
+*********************************************************************************************/  
+
+#ifndef P503_INTERNAL_H
+#define P503_INTERNAL_H
+
+#include "../config.h"
+ 
+
+#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64)
+    #define NWORDS_FIELD    8               // Number of words of a 503-bit field element
+    #define p503_ZERO_WORDS 3               // Number of "0" digits in the least significant part of p503 + 1    
+#elif (TARGET == TARGET_x86)
+    #define NWORDS_FIELD    16 
+    #define p503_ZERO_WORDS 7
+#endif
+    
+
+// Basic constants
+
+#define NBITS_FIELD             503  
+#define MAXBITS_FIELD           512                
+#define MAXWORDS_FIELD          ((MAXBITS_FIELD+RADIX-1)/RADIX)     // Max. number of words to represent field elements
+#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 503-bit field element 
+#define NBITS_ORDER             256
+#define NWORDS_ORDER            ((NBITS_ORDER+RADIX-1)/RADIX)       // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)               // Number of 64-bit words of a 256-bit element 
+#define MAXBITS_ORDER           NBITS_ORDER
+#define ALICE                   0
+#define BOB                     1 
+#define OALICE_BITS             250  
+#define OBOB_BITS               253     
+#define OBOB_EXPON              159    
+#define MASK_ALICE              0x03 
+#define MASK_BOB                0x0F 
+#define PRIME                   p503 
+#define PARAM_A                 6  
+#define PARAM_C                 1
+// Fixed parameters for isogeny tree computation
+#define MAX_INT_POINTS_ALICE    7        
+#define MAX_INT_POINTS_BOB      8      
+#define MAX_Alice               125
+#define MAX_Bob                 159
+#define MSG_BYTES               24
+#define SECRETKEY_A_BYTES       ((OALICE_BITS + 7) / 8)
+#define SECRETKEY_B_BYTES       ((OBOB_BITS - 1 + 7) / 8)
+#define FP2_ENCODED_BYTES       2*((NBITS_FIELD + 7) / 8)
+
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t felm_t[NWORDS_FIELD];                                 // Datatype for representing 503-bit field elements (512-bit max.)
+typedef digit_t dfelm_t[2*NWORDS_FIELD];                              // Datatype for representing double-precision 2x503-bit field elements (2x512-bit max.) 
+typedef felm_t  f2elm_t[2];                                           // Datatype for representing quadratic extension field elements GF(p503^2)
+        
+typedef struct { f2elm_t X; f2elm_t Z; } point_proj;                  // Point representation in projective XZ Montgomery coordinates.
+typedef point_proj point_proj_t[1]; 
+
+#ifdef COMPRESS
+    typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj;  // Point representation in full projective XYZ Montgomery coordinates 
+    typedef point_full_proj point_full_proj_t[1]; 
+
+    typedef struct { f2elm_t x; f2elm_t y; } point_affine;                // Point representation in affine coordinates.
+    typedef point_affine point_t[1]; 
+
+    typedef f2elm_t publickey_t[3];      
+#endif
+
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// 503-bit multiprecision addition, c = a+b
+void mp_add503(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_add503_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// 503-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p
+extern void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c);
+extern void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_sub503_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+void mp_sub503_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+
+// 2x503-bit multiprecision subtraction followed by addition with p503*2^512, c = a-b+(p503*2^512) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_subadd503x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Double 2x503-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
+void mp_dblsub503x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void fpcopy503(const digit_t* a, digit_t* c);
+
+// Zeroing a field element, a = 0
+void fpzero503(digit_t* a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool fpequal503_non_constant_time(const digit_t* a, const digit_t* b); 
+
+// Modular addition, c = a+b mod p503
+extern void fpadd503(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpadd503_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular subtraction, c = a-b mod p503
+extern void fpsub503(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpsub503_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular negation, a = -a mod p503        
+extern void fpneg503(digit_t* a);  
+
+// Modular division by two, c = a/2 mod p503.
+void fpdiv2_503(const digit_t* a, digit_t* c);
+
+// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
+void fpcorrection503(digit_t* a);
+
+// 503-bit Montgomery reduction, c = a mod p
+void rdc503_asm(digit_t* ma, digit_t* mc);
+            
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
+void fpmul503_mont(const digit_t* a, const digit_t* b, digit_t* c);
+void mul503_asm(const digit_t* a, const digit_t* b, digit_t* c);
+   
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
+void fpsqr503_mont(const digit_t* ma, digit_t* mc);
+
+// Field inversion, a = a^-1 in GF(p503)
+void fpinv503_mont(digit_t* a);
+
+// Chain to compute (p503-3)/4 using Montgomery arithmetic
+void fpinv503_chain_mont(digit_t* a);
+
+/************ GF(p^2) arithmetic functions *************/
+    
+// Copy of a GF(p503^2) element, c = a
+void fp2copy503(const f2elm_t a, f2elm_t c);
+
+// Zeroing a GF(p503^2) element, a = 0
+void fp2zero503(f2elm_t a);
+
+// GF(p503^2) negation, a = -a in GF(p503^2)
+void fp2neg503(f2elm_t a);
+
+// GF(p503^2) addition, c = a+b in GF(p503^2)
+extern void fp2add503(const f2elm_t a, const f2elm_t b, f2elm_t c);           
+
+// GF(p503^2) subtraction, c = a-b in GF(p503^2)
+extern void fp2sub503(const f2elm_t a, const f2elm_t b, f2elm_t c); 
+
+// GF(p503^2) division by two, c = a/2  in GF(p503^2) 
+void fp2div2_503(const f2elm_t a, f2elm_t c);
+
+// Modular correction, a = a in GF(p503^2)
+void fp2correction503(f2elm_t a);
+            
+// GF(p503^2) squaring using Montgomery arithmetic, c = a^2 in GF(p503^2)
+void fp2sqr503_mont(const f2elm_t a, f2elm_t c);
+ 
+// GF(p503^2) multiplication using Montgomery arithmetic, c = a*b in GF(p503^2)
+void fp2mul503_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+
+// GF(p503^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void fp2inv503_mont(f2elm_t a);
+
+
+#endif
diff --git a/SIKE_sw/src/P503/generic/fp_generic.c b/SIKE_sw/src/P503/generic/fp_generic.c
new file mode 100644
index 0000000..1215ed7
--- /dev/null
+++ b/SIKE_sw/src/P503/generic/fp_generic.c
@@ -0,0 +1,259 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: portable modular arithmetic for P503
+*********************************************************************************************/
+
+#include "../P503_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p503[NWORDS64_FIELD];
+extern const uint64_t p503p1[NWORDS64_FIELD]; 
+extern const uint64_t p503x2[NWORDS64_FIELD];
+extern const uint64_t p503x4[NWORDS64_FIELD];
+
+
+__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p503x2)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p503x4)[i], borrow, c[i]); 
+    }
+}
+
+
+__inline void fpadd503(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p503.
+  // Inputs: a, b in [0, 2*p503-1] 
+  // Output: c in [0, 2*p503-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p503x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p503x2)[i] & mask, carry, c[i]); 
+    }
+} 
+
+
+__inline void fpsub503(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p503.
+  // Inputs: a, b in [0, 2*p503-1] 
+  // Output: c in [0, 2*p503-1] 
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p503x2)[i] & mask, borrow, c[i]); 
+    }
+}
+
+
+__inline void fpneg503(digit_t* a)
+{ // Modular negation, a = -a mod p503.
+  // Input/output: a in [0, 2*p503-1] 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p503x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_503(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p503.
+  // Input : a in [0, 2*p503-1] 
+  // Output: c in [0, 2*p503-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p503
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p503)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+} 
+
+
+void fpcorrection503(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p503)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p503)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result    
+    register digit_t al, ah, bl, bh, temp;
+    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
+
+    al = a & mask_low;                        // Low part
+    ah = a >> (sizeof(digit_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(digit_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                   // C00
+
+    res1 = albl >> (sizeof(digit_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;  
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(digit_t) * 4);
+    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   
+
+    res1 = ahbl >> (sizeof(digit_t) * 4);
+    res2 = albh >> (sizeof(digit_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                   // C10 
+    carry = temp & mask_high; 
+    c[1] ^= (ahbh & mask_high) + carry;       // C11
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
+    unsigned int i, j;
+    digit_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+    
+    for (i = 0; i < nwords; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+
+    for (i = nwords; i < 2*nwords-1; i++) {
+        for (j = i-nwords+1; j < nwords; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+    c[2*nwords-1] = v; 
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
+  // mc = ma*R^-1 mod p503x2, where R = 2^512.
+  // If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = p503_ZERO_WORDS;
+    digit_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-p503_ZERO_WORDS+1)) { 
+                MUL(mc[j], ((digit_t*)p503p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry; 
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }    
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) { 
+                MUL(mc[j], ((digit_t*)p503p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); 
+    mc[NWORDS_FIELD-1] = v;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P546/AMD64/fp_x64.c b/SIKE_sw/src/P546/AMD64/fp_x64.c
new file mode 100644
index 0000000..f8fe479
--- /dev/null
+++ b/SIKE_sw/src/P546/AMD64/fp_x64.c
@@ -0,0 +1,634 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: modular arithmetic optimized for x64 platforms for P546
+*********************************************************************************************/
+
+#include "../P546_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p546[NWORDS_FIELD];
+extern const uint64_t p546p1[NWORDS_FIELD];
+extern const uint64_t p546x2[NWORDS_FIELD];
+extern const uint64_t p546x4[NWORDS_FIELD];
+
+
+__inline void mp_sub546_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p546x2)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub546_p2_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void mp_sub546_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p546x4)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub546_p4_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpadd546(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p546.
+  // Inputs: a, b in [0, 2*p546-1] 
+  // Output: c in [0, 2*p546-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p546x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p546x2)[i] & mask, carry, c[i]); 
+    } 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpadd546_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpsub546(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p546.
+  // Inputs: a, b in [0, 2*p546-1] 
+  // Output: c in [0, 2*p546-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p546x2)[i] & mask, borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpsub546_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpneg546(digit_t* a)
+{ // Modular negation, a = -a mod p546.
+  // Input/output: a in [0, 2*p546-1] 
+    unsigned int i, borrow = 0;
+    
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p546x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_546(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p546.
+  // Input : a in [0, 2*p546-1] 
+  // Output: c in [0, 2*p546-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p546
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p546)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+}  
+
+
+void fpcorrection546(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p546-1] to [0, p546-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p546)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p546)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+        
+    (void)nwords;
+
+#if (OS_TARGET == OS_WIN)
+    digit_t t = 0;
+    uint128_t uv = {0};
+    unsigned int carry = 0;
+        
+    MULADD128(a[0], b[0], uv, carry, uv);
+    t += carry;
+    c[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[0], uv, carry, uv);
+    t += carry;
+    c[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[0], uv, carry, uv);
+    t += carry;
+    c[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[0], uv, carry, uv);
+    t += carry;
+    c[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[0], uv, carry, uv);
+    t += carry;
+    c[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[0], uv, carry, uv);
+    t += carry;
+    c[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[0], uv, carry, uv);
+    t += carry;
+    c[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[0], uv, carry, uv);
+    t += carry;
+    c[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[0], uv, carry, uv);
+    t += carry;
+    c[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[8], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[8], uv, carry, uv);
+    t += carry;
+    c[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[8], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[8], uv, carry, uv);
+    t += carry;
+    c[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[8], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[8], uv, carry, uv);
+    t += carry;
+    c[11] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;    
+    
+    MULADD128(a[8], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[8], uv, carry, uv);
+    t += carry;
+    c[12] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[8], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[8], uv, carry, uv);
+    t += carry;
+    c[13] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[8], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[8], uv, carry, uv);
+    t += carry;
+    c[14] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[8], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[8], uv, carry, uv);
+    t += carry;
+    c[15] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    
+    MULADD128(a[8], b[8], uv, carry, uv);
+    c[16] = uv[0];
+    c[17] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+    
+    mul546_asm(a, b, c);
+
+#endif
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Montgomery reduction exploiting special form of the prime.
+  // mc = ma*R^-1 mod p546x2, where R = 2^576.
+  // If ma < 2^576*p546, the output mc is in the range [0, 2*p546-1].
+  // ma is assumed to be in Montgomery representation.
+        
+#if (OS_TARGET == OS_WIN)
+    unsigned int carry;
+    digit_t t = 0;
+    uint128_t uv = {0};
+    
+    mc[0] = ma[0];
+    mc[1] = ma[1];
+    mc[2] = ma[2];
+    mc[3] = ma[3];
+    MUL128(mc[0], ((digit_t*)p546p1)[4], uv);
+    ADDC(0, uv[0], ma[4], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = 0;
+
+    MULADD128(mc[0], ((digit_t*)p546p1)[5], uv, carry, uv);
+    MULADD128(mc[1], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[5], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[6], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[7], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[8], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[1], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[9], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[2], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[10], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[3], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[11], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[4], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p546p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[12], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[5], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p546p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[13], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[6], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p546p1)[6], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[14], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[7], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p546p1)[7], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[15], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[8], ((digit_t*)p546p1)[8], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[16], carry, mc[7]); 
+    ADDC(carry, uv[1], ma[17], carry, mc[8]); 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    rdc546_asm(ma, mc);    
+
+#endif
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P546/AMD64/fp_x64_asm.S b/SIKE_sw/src/P546/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000..26ea865
--- /dev/null
+++ b/SIKE_sw/src/P546/AMD64/fp_x64_asm.S
@@ -0,0 +1,1353 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny cryptography library 
+// Copyright (c) Microsoft Corporation
+//
+// Website: https://github.com/microsoft/PQCrypto-SIDH
+// Released under MIT license 
+//
+// Abstract: field arithmetic in x64 assembly for P546 on Linux 
+//*******************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Format function and variable names for Mac OS X
+#if defined(__APPLE__)
+    #define fmt(f)    _##f
+#else
+    #define fmt(f)    f
+#endif
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpadd546_asm)
+fmt(fpadd546_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+  adc    rcx, [reg_p2+64]
+
+  mov    rax, [rip+fmt(p546x2)]
+  sub    r8, rax
+  mov    rax, [rip+fmt(p546x2)+8]
+  sbb    r9, rax
+  sbb    r10, rax
+  sbb    r11, rax
+  mov    rax, [rip+fmt(p546x2)+32]
+  sbb    r12, rax
+  mov    rax, [rip+fmt(p546x2)+40]
+  sbb    r13, rax
+  mov    rax, [rip+fmt(p546x2)+48]
+  sbb    r14, rax
+  mov    rax, [rip+fmt(p546x2)+56]
+  sbb    r15, rax
+  mov    rax, [rip+fmt(p546x2)+64]
+  sbb    rcx, rax
+  mov    [reg_p3+64], rcx
+  mov    rax, 0
+  sbb    rax, 0
+  
+  mov    rsi, [rip+fmt(p546x2)]
+  and    rsi, rax
+  mov    rdi, [rip+fmt(p546x2)+8]
+  and    rdi, rax
+  
+  add    r8, rsi  
+  adc    r9, rdi 
+  adc    r10, rdi
+  adc    r11, rdi
+  mov    [reg_p3], r8 
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  setc   cl
+  
+  mov    rdi, [rip+fmt(p546x2)+32]
+  and    rdi, rax
+  mov    rsi, [rip+fmt(p546x2)+40]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p546x2)+48]
+  and    r8, rax
+  mov    r9, [rip+fmt(p546x2)+56]
+  and    r9, rax
+  mov    r10, [rip+fmt(p546x2)+64]
+  and    r10, rax
+  
+  bt     rcx, 0
+  adc    r12, rdi
+  adc    r13, rsi  
+  adc    r14, r8
+  adc    r15, r9
+  mov    rsi, [reg_p3+64]
+  adc    rsi, r10
+  mov    [reg_p3+32], r12  
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15  
+  mov    [reg_p3+64], rsi
+
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpsub546_asm)
+fmt(fpsub546_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  sbb    rcx, [reg_p2+64]
+  mov    [reg_p3+64], rcx
+  mov    rax, 0
+  sbb    rax, 0
+    
+  mov    rsi, [rip+fmt(p546x2)]
+  and    rsi, rax
+  mov    rdi, [rip+fmt(p546x2)+8]
+  and    rdi, rax
+  
+  add    r8, rsi  
+  adc    r9, rdi 
+  adc    r10, rdi 
+  adc    r11, rdi
+  mov    [reg_p3], r8 
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  setc   cl
+  
+  mov    rdi, [rip+fmt(p546x2)+32]
+  and    rdi, rax
+  mov    rsi, [rip+fmt(p546x2)+40]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p546x2)+48]
+  and    r8, rax
+  mov    r9, [rip+fmt(p546x2)+56]
+  and    r9, rax
+  mov    r10, [rip+fmt(p546x2)+64]
+  and    r10, rax
+  
+  bt     rcx, 0
+  adc    r12, rdi
+  adc    r13, rsi  
+  adc    r14, r8
+  adc    r15, r9
+  mov    rsi, [reg_p3+64]
+  adc    rsi, r10
+  mov    [reg_p3+32], r12  
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15  
+  mov    [reg_p3+64], rsi
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret 
+
+
+///////////////////////////////////////////////////////////////// MACRO
+.macro SUB546_PX  P0
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56]
+  mov    rax, [reg_p1+64]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40]
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56] 
+  sbb    rax, [reg_p2+64]
+
+  mov    rdi, [rip+\P0]
+  mov    rsi, [rip+\P0+8]
+  add    r8, rdi  
+  adc    r9, rsi  
+  adc    r10, rsi 
+  adc    r11, rsi 
+  mov    rcx, [rip+\P0+32]
+  mov    rdi, [rip+\P0+40]
+  mov    rsi, [rip+\P0+48]
+  adc    r12, rcx   
+  adc    r13, rdi     
+  adc    r14, rsi 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    rdi, [rip+\P0+56]
+  mov    rsi, [rip+\P0+64]
+  adc    r15, rdi  
+  adc    rax, rsi 
+  mov    [reg_p3+56], r15 
+  mov    [reg_p3+64], rax
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  .endm
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 2*p546
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p546
+//*********************************************************************** 
+.global fmt(mp_sub546_p2_asm)
+fmt(mp_sub546_p2_asm):
+
+  SUB546_PX  fmt(p546x2)
+  ret
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 4*p546
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p546
+//*********************************************************************** 
+.global fmt(mp_sub546_p4_asm)
+fmt(mp_sub546_p4_asm):
+
+  SUB546_PX  fmt(p546x4)
+  ret
+
+
+#ifdef _MULX_
+
+/////////////////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C
+// Temps:   regs T0:T7
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    adox   \T0, \T3               
+    adox   \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adox   \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T1, \T6     
+    adox   \T5, rax        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adcx   \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T2, \T7 
+    adcx   \T4, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T4, \T6  
+    adcx   \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T1, \T5 
+    adox   \T0, \T7   
+    mulx   \T5, \T6, 32\M1 
+    adcx   \T5, rax         
+    adox   \T1, \T6  
+    adox   \T5, rax         
+    
+    mov    rdx, 16\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adcx   \T4, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T4, \T7 
+    adcx   \T0, \T6        
+    mulx   \T2, \T6, 16\M1
+    adox   \T0, \T6 
+    adcx   \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adcx   \T5, \T2          
+    adox   \T1, \T7   
+    mulx   \T2, \T6, 32\M1   
+    adcx   \T2, rax 
+    adox   \T5, \T6 
+    adox   \T2, rax           
+    
+    mov    rdx, 24\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T4, \T7 
+    mov    24\C, \T4           // C3_final 
+    adcx   \T0, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T0, \T7
+    adcx   \T1, \T6        
+    mulx   \T4, \T6, 16\M1
+    adox   \T1, \T6  
+    adcx   \T5, \T4     
+    mulx   \T4, \T7, 24\M1   
+    adcx   \T2, \T4        
+    adox   \T5, \T7   
+    mulx   \T4, \T6, 32\M1   
+    adcx   \T4, rax 
+    adox   \T2, \T6  
+    adox   \T4, rax         
+    
+    mov    rdx, 32\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    32\C, \T0           // C4_final 
+    adcx   \T1, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T1, \T7 
+    adcx   \T5, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T5, \T6 
+    adcx   \T2, \T0     
+    mulx   \T0, \T7, 24\M1   
+    adcx   \T4, \T0 
+    adox   \T2, \T7  
+    mulx   \T0, \T6, 32\M1   
+    adcx   \T0, rax           
+    adox   \T4, \T6 
+    adox   \T0, rax 
+
+    mov    40\C, \T1 
+    mov    48\C, \T5 
+    mov    56\C, \T2 
+    mov    64\C, \T4
+    mov    72\C, \T0
+.endm
+
+.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    xor    rax, rax   
+    adox   \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adox   \T1, \T3        
+    mulx   \T3, \T4, 24\M1   // T3:T4 = A0*B3
+    adox   \T2, \T4 
+           
+    mov    rdx, 8\M0
+    mulx   \T5, \T4, \M1     // T5:T4 = A1*B0
+    adox   \T3, rax 
+    xor    rax, rax   
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A1*B1
+    adox   \T4, \T0
+    mov    8\C, \T4          // C1_final  
+    adcx   \T5, \T7      
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A1*B2
+    adcx   \T6, \T8  
+    adox   \T5, \T1      
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A1*B3
+    adcx   \T7, \T9        
+    adcx   \T8, rax   
+    adox   \T6, \T2
+    
+    mov    rdx, 16\M0
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    adox   \T7, \T3
+    adox   \T8, rax
+    xor    rax, rax 
+    mulx   \T2, \T3, 8\M1    // T2:T3 = A2*B1
+    adox   \T0, \T5   
+    mov    16\C, \T0         // C2_final 
+    adcx   \T1, \T3    
+    mulx   \T3, \T4, 16\M1   // T3:T4 = A2*B2
+    adcx   \T2, \T4 
+    adox   \T1, \T6       
+    mulx   \T4,\T9, 24\M1    // T3:T4 = A2*B3
+    adcx   \T3, \T9        
+    mov    rdx, 24\M0
+    adcx   \T4, rax         
+
+    adox   \T2, \T7
+    adox   \T3, \T8
+    adox   \T4, rax
+
+    mulx   \T5, \T0, \M1     // T5:T0 = A3*B0
+    xor    rax, rax 
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A3*B1
+    adcx   \T5, \T7 
+    adox   \T1, \T0       
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A3*B2
+    adcx   \T6, \T8  
+    adox   \T2, \T5      
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A3*B3
+    adcx   \T7, \T9        
+    adcx   \T8, rax         
+
+    adox   \T3, \T6
+    adox   \T4, \T7
+    adox   \T8, rax
+    mov    24\C, \T1         // C3_final
+    mov    32\C, \T2         // C4_final
+    mov    40\C, \T3         // C5_final
+    mov    48\C, \T4         // C6_final
+    mov    56\C, \T8         // C7_final
+.endm
+
+#else
+
+.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    mulx   \T4, \T5, 16\M1 
+    add    \T0, \T3               
+    adc    \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adc    \T3, \T4         
+    mulx   \T5, \T6, 32\M1 
+    adc    \T1, \T6     
+    adc    \T5, 0        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adc    \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adc    \T3, \T6        
+    mulx   \T0, \T4, 16\M1
+    adc    \T0, \T1     
+    mulx   \T1, \T6, 24\M1   
+    adc    \T5, \T1  
+    mulx   \T1, rax, 32\M1     
+    adc    \T1, 0 
+        
+    add    \T2, \T7 
+    adc    \T3, \T4  
+    adc    \T0, \T6  
+    adc    \T5, rax  
+    adc    \T1, 0         
+    
+    mov    rdx, 16\M0 
+    mulx   \T4, \T6, \M1 
+    add    \T2, \T6 
+    mov    16\C, \T2           // C2_final 
+    adc    \T3, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T0, \T6        
+    mulx   \T2, \T4, 16\M1 
+    adc    \T2, \T5     
+    mulx   \T5, \T6, 24\M1   
+    adc    \T1, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0 
+        
+    add    \T3, \T7
+    adc    \T0, \T4  
+    adc    \T2, \T6  
+    adc    \T1, rax 
+    adc    \T5, 0          
+    
+    mov    rdx, 24\M0
+    mulx   \T4, \T6, \M1 
+    add    \T3, \T6 
+    mov    24\C, \T3           // C3_final 
+    adc    \T0, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T2, \T6        
+    mulx   \T3, \T4, 16\M1 
+    adc    \T1, \T3     
+    mulx   \T3, \T6, 24\M1   
+    adc    \T3, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0
+        
+    add    \T0, \T7
+    adc    \T2, \T4  
+    adc    \T1, \T6  
+    adc    \T3, rax 
+    adc    \T5, 0       
+    
+    mov    rdx, 32\M0 
+    mulx   \T4, \T6, \M1 
+    add    \T0, \T6 
+    mov    32\C, \T0           // C4_final 
+    adc    \T2, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T1, \T6        
+    mulx   \T0, \T4, 16\M1 
+    adc    \T3, \T0     
+    mulx   \T0, \T6, 24\M1   
+    adc    \T0, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0
+        
+    add    \T2, \T7 
+    adc    \T1, \T4  
+    adc    \T3, \T6 
+    adc    \T0, rax 
+    adc    \T5, 0 
+    mov    40\C, \T2 
+    mov    48\C, \T1 
+    mov    56\C, \T3 
+    mov    64\C, \T0
+    mov    72\C, \T5 
+.endm 
+
+.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1     // T0:T1 = A0*B0
+    mov    \C, \T1           // C0_final
+    mulx   \T1, \T2, 8\M1    // T1:T2 = A0*B1
+    add    \T0, \T2        
+    mulx   \T2, \T3, 16\M1   // T2:T3 = A0*B2
+    adc    \T1, \T3         
+    mulx   \T3, \T4, 24\M1   // T3:T4 = A0*B3
+    adc    \T2, \T4        
+    mov    rdx, 8\M0
+    adc    \T3, 0         
+
+    mulx   \T5, \T4, \M1     // T5:T4 = A1*B0
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A1*B1
+    add    \T5, \T7        
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A1*B2
+    adc    \T6, \T8        
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A1*B3
+    adc    \T7, \T9        
+    adc    \T8, 0         
+
+    add    \T4, \T0
+    mov    8\C, \T4          // C1_final
+    adc    \T5, \T1
+    adc    \T6, \T2
+    adc    \T7, \T3
+    mov    rdx, 16\M0
+    adc    \T8, 0
+
+    mulx   \T1, \T0, \M1     // T1:T0 = A2*B0
+    mulx   \T2, \T3, 8\M1    // T2:T3 = A2*B1
+    add    \T1, \T3        
+    mulx   \T3, \T4, 16\M1   // T3:T4 = A2*B2
+    adc    \T2, \T4        
+    mulx   \T4,\T9, 24\M1    // T3:T4 = A2*B3
+    adc    \T3, \T9        
+    mov    rdx, 24\M0
+    adc    \T4, 0          
+
+    add    \T0, \T5
+    mov    16\C, \T0         // C2_final
+    adc    \T1, \T6
+    adc    \T2, \T7
+    adc    \T3, \T8
+    adc    \T4, 0
+
+    mulx   \T5, \T0, \M1     // T5:T0 = A3*B0
+    mulx   \T6, \T7, 8\M1    // T6:T7 = A3*B1
+    add    \T5, \T7        
+    mulx   \T7, \T8, 16\M1   // T7:T8 = A3*B2
+    adc    \T6, \T8        
+    mulx   \T8, \T9, 24\M1   // T8:T9 = A3*B3
+    adc    \T7, \T9         
+    adc    \T8, 0         
+
+    add    \T1, \T0
+    mov    24\C, \T1         // C3_final
+    adc    \T2, \T5
+    mov    32\C, \T2         // C4_final
+    adc    \T3, \T6
+    mov    40\C, \T3         // C5_final
+    adc    \T4, \T7
+    mov    48\C, \T4         // C6_final
+    adc    \T8, 0
+    mov    56\C, \T8         // C7_final
+.endm
+#endif
+
+
+//*****************************************************************************
+//  546-bit multiplication using Karatsuba (one level), schoolbook (two levels)
+//***************************************************************************** 
+.global fmt(mul546_asm)
+fmt(mul546_asm):    
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15
+    mov    rcx, reg_p3 
+
+    // [rsp] <- AH + AL, rax <- mask
+    xor    rax, rax
+    mov    r8, [reg_p1]
+    mov    r9, [reg_p1+8]
+    mov    r10, [reg_p1+16]
+    mov    r11, [reg_p1+24] 
+    mov    r12, [reg_p1+32] 
+    push   rbx  
+    push   rbp
+    sub    rsp, 160
+    add    r8, [reg_p1+40]
+    adc    r9, [reg_p1+48]
+    adc    r10, [reg_p1+56]
+    adc    r11, [reg_p1+64]
+    adc    r12, 0
+    sbb    rax, 0
+    mov    [rsp], r8
+    mov    [rsp+8], r9
+    mov    [rsp+16], r10
+    mov    [rsp+24], r11
+    mov    [rsp+32], r12
+
+    // [rsp+40] <- BH + BL, rdx <- mask
+    xor    rdx, rdx
+    mov    r8, [reg_p2]
+    mov    r15, [reg_p2+8]
+    mov    rbx, [reg_p2+16]
+    mov    r13, [reg_p2+24] 
+    mov    r14, [reg_p2+32]    
+    add    r8, [reg_p2+40]
+    adc    r15, [reg_p2+48]
+    adc    rbx, [reg_p2+56]
+    adc    r13, [reg_p2+64]
+    adc    r14, 0
+    sbb    rdx, 0
+    mov    [rsp+40], r8
+    mov    [rsp+48], r15
+    mov    [rsp+56], rbx
+    mov    [rsp+64], r13
+    mov    [rsp+72], r14     
+    
+    // [rcx] <- masked (BH + BL)
+    and    r8, rax
+    and    r15, rax
+    and    rbx, rax
+    and    r13, rax
+    and    r14, rax    
+    mov    [rcx], r8
+
+    // r8-r12 <- masked (AH + AL)
+    mov    r8, [rsp]
+    and    r8, rdx
+    and    r9, rdx
+    and    r10, rdx
+    and    r11, rdx
+    and    r12, rdx
+
+    // [rcx+80] <- masked (AH + AL) + masked (BH + BL)
+    mov    rax, [rcx]
+    add    r8, rax
+    adc    r9, r15
+    adc    r10, rbx
+    adc    r11, r13
+    adc    r12, r14        
+    mov    [rcx+80], r8
+    mov    [rcx+88], r9
+    mov    [rcx+96], r10
+    mov    [rcx+104], r11
+    mov    [rcx+112], r12
+
+    // [rcx] <- AL x BL
+    MUL320_SCHOOL  [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15     // Result C0-C4 
+
+    // [rsp+80] <- (AH+AL) x (BH+BL), low part 
+    MUL320_SCHOOL  [rsp], [rsp+40], [rsp+80], r8, r9, r10, r11, r12, r13, r14, r15
+
+    // [rsp] <- AH x BH 
+    MUL256_SCHOOL  [reg_p1+40], [reg_p2+40], [rsp], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp
+    
+    // r8-r12 <- (AH+AL) x (BH+BL), final step
+    mov    r8, [rcx+80]
+    mov    r9, [rcx+88]
+    mov    r10, [rcx+96]
+    mov    r11, [rcx+104]
+    mov    r12, [rcx+112]
+    mov    rax, [rsp+120]
+    add    r8, rax
+    mov    rax, [rsp+128]
+    adc    r9, rax
+    mov    rax, [rsp+136]
+    adc    r10, rax
+    mov    rax, [rsp+144]
+    adc    r11, rax
+    mov    rax, [rsp+152]
+    adc    r12, rax
+    
+    // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL
+    mov    rdi, [rsp+80]
+    sub    rdi, [rcx]
+    mov    rdx, [rsp+88]
+    sbb    rdx, [rcx+8]
+    mov    rbx, [rsp+96]
+    sbb    rbx, [rcx+16]
+    mov    r13, [rsp+104]
+    sbb    r13, [rcx+24]
+    mov    r14, [rsp+112]     
+    sbb    r14, [rcx+32]  
+    sbb    r8, [rcx+40]
+    sbb    r9, [rcx+48]
+    sbb    r10, [rcx+56]
+    sbb    r11, [rcx+64]
+    sbb    r12, [rcx+72]
+    
+    // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+    sub    rdi, [rsp]
+    sbb    rdx, [rsp+8]
+    sbb    rbx, [rsp+16]
+    sbb    r13, [rsp+24]
+    sbb    r14, [rsp+32]  
+    sbb    r8, [rsp+40]
+    sbb    r9, [rsp+48]
+    sbb    r10, [rsp+56]
+    sbb    r11, 0
+    sbb    r12, 0
+    
+    mov    rax, [rcx+40]
+    add    rax, rdi
+    mov    [rcx+40], rax    // Result C5-C9
+    mov    rax, [rcx+48]
+    adc    rax, rdx
+    mov    [rcx+48], rax 
+    mov    rax, [rcx+56]
+    adc    rax, rbx
+    mov    [rcx+56], rax 
+    mov    rax, [rcx+64]
+    adc    rax, r13
+    mov    [rcx+64], rax 
+    mov    rax, [rcx+72]
+    adc    rax, r14           
+    mov    [rcx+72], rax 
+    mov    rax, [rsp]
+    adc    r8, rax 
+    mov    [rcx+80], r8    // Result C10-C19
+    mov    rax, [rsp+8]
+    adc    r9, rax
+    mov    [rcx+88], r9 
+    mov    rax, [rsp+16]
+    adc    r10, rax
+    mov    [rcx+96], r10 
+    mov    rax, [rsp+24]
+    adc    r11, rax
+    mov    [rcx+104], r11 
+    mov    rax, [rsp+32]
+    adc    r12, rax
+    mov    [rcx+112], r12 
+    mov    r8, [rsp+40]
+    mov    r9, [rsp+48]
+    mov    r10, [rsp+56]
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0
+    add    rsp, 160   
+    mov    [rcx+120], r8 
+    mov    [rcx+128], r9 
+    mov    [rcx+136], r10 
+      
+    pop    rbp
+    pop    rbx
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+#else
+
+//***********************************************************************
+//  Integer multiplication
+//  Based on Karatsuba method
+//  Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
+//  NOTE: a=c or b=c are not allowed
+//***********************************************************************
+.global fmt(mul546_asm)
+fmt(mul546_asm):
+
+  ret
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+#endif
+
+#ifdef _MULX_
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: regs T0:T7
+// Temps:   regs T8
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL128x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, TT
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final    
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    adox   \T1, \T4               
+    adox   \T2, \T5     
+    mulx   \T4, \T7, 24\M1
+    adox   \T3, \T7         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T4, \T6           
+    adox   \T5, \TT   
+    
+    mov    rdx, 8\M0 
+    mulx   \T7, \T6, \M1 
+    adcx   \T1, \T6            // T1 <- C1_final 
+    adcx   \T2, \T7    
+    mulx   \T6, \T7, 8\M1
+    adox   \T2, \T7  
+    adcx   \T3, \T6        
+    mulx   \T6, \T7, 16\M1
+    adox   \T3, \T7
+    adcx   \T4, \T6     
+    mulx   \T6, \T7, 24\M1
+    adox   \T4, \T7     
+    adcx   \T5, \T6  
+    mulx   \T6, \T7, 32\M1 
+    adox   \T5, \T7          
+    adox   \T6, rax
+.endm
+
+.macro MUL64x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final    
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    adox   \T1, \T4               
+    adox   \T2, \T5     
+    mulx   \T4, \T7, 24\M1
+    adox   \T3, \T7         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T4, \T6           
+    adox   \T5, rax
+.endm
+
+#else
+
+.macro MUL128x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, TT
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final 
+    mulx   \T2, \T3, 8\M1
+    add    \T1, \T3               
+    adc    \T2, 0  
+
+    mov    rdx, 8\M0   
+    xor    \T5, \T5
+    mulx   \T3, \T4, \M1 
+    add    \T1, \T4               
+    adc    \T2, \T3  
+    adc    \T5, 0  
+      
+    xor    \T6, \T6
+    mulx   \T3, \T4, 8\M1
+    add    \T2, \T4  
+    adc    \T3, \T5           
+    adc    \T6, 0 
+        
+    mov    rdx, \M0         
+    mulx   \T4, \T5, 16\M1 
+    add    \T2, \T5  
+    adc    \T3, \T4           
+    adc    \T6, 0  
+        
+    xor    \T7, \T7        
+    mulx   \T4, \T5, 24\M1 
+    add    \T3, \T5  
+    adc    \T4, \T6           
+    adc    \T7, 0  
+
+    mov    rdx, 8\M0 
+    mulx   \T5, \T6, 16\M1 
+    add    \T3, \T6               
+    adc    \T4, \T5  
+    adc    \T7, 0    
+        
+    xor    \T6, \T6        
+    mulx   \T5, rax, 24\M1 
+    add    \T4, rax  
+    adc    \T5, \T7           
+    adc    \T6, 0  
+        
+    mov    rdx, 32\M1        
+    mulx   \T7, rax, \M0 
+    add    \T4, rax  
+    adc    \T5, \T7           
+    adc    \T6, 0      
+               
+    mulx   rax, \T7, 8\M0 
+    add    \T5, \T7  
+    adc    \T6, rax
+
+    add    \T5, \TT  
+    adc    \T6, 0
+.endm
+
+.macro MUL64x320_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final 
+    mulx   \T2, \T3, 8\M1
+    add    \T1, \T3               
+    adc    \T2, 0  
+  
+    mulx   \T3, \T4, 16\M1 
+    add    \T2, \T4  
+    adc    \T3, 0  
+      
+    mulx   \T4, \T5, 24\M1
+    add    \T3, \T5          
+    adc    \T4, 0 
+                
+    mulx   \T5, \T6, 32\M1 
+    add    \T4, \T6           
+    adc    \T5, 0
+.endm  
+#endif
+
+  
+//**************************************************************************************
+//  Montgomery reduction
+//  Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015  
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//************************************************************************************** 
+.global fmt(rdc546_asm)
+fmt(rdc546_asm):
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15 	
+    xor    rcx, rcx
+
+    // a[0-1] x p546p1_nz --> result: r8:r14 
+    MUL128x320_SCHOOL [reg_p1], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx     
+	
+    xor    rcx, rcx
+    add    r8, [reg_p1+32]  
+    adc    r9, [reg_p1+40]  
+    adc    r10, [reg_p1+48]   
+    adc    r11, [reg_p1+56]   
+    adc    r12, [reg_p1+64]   
+    adc    r13, [reg_p1+72]   
+    adc    r14, [reg_p1+80]
+	adc    rcx, 0  
+    mov    [reg_p1+32], r8  
+    mov    [reg_p1+40], r9  
+    mov    [reg_p1+48], r10  
+    mov    [reg_p1+56], r11  
+    mov    [reg_p1+64], r12  
+    mov    [reg_p1+72], r13  
+    mov    [reg_p1+80], r14
+
+    // a[2-3] x p546p1_nz --> result: r8:r14
+    MUL128x320_SCHOOL [reg_p1+16], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+48]  
+    adc    r9, [reg_p1+56]  
+    adc    r10, [reg_p1+64]   
+    adc    r11, [reg_p1+72]  
+    adc    r12, [reg_p1+80]   
+    adc    r13, [reg_p1+88]   
+    adc    r14, [reg_p1+96] 
+    adc    rcx, 0
+    mov    [reg_p1+48], r8  
+    mov    [reg_p1+56], r9  
+    mov    [reg_p1+64], r10  
+    mov    [reg_p1+72], r11   
+    mov    [reg_p1+80], r12  
+    mov    [reg_p1+88], r13  
+    mov    [reg_p1+96], r14
+
+    // a[4-5] x p546p1_nz --> result: r8:r14
+    MUL128x320_SCHOOL [reg_p1+32], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+64]  
+    adc    r9, [reg_p1+72]  
+    adc    r10, [reg_p1+80]   
+    adc    r11, [reg_p1+88]  
+    adc    r12, [reg_p1+96]   
+    adc    r13, [reg_p1+104]   
+    adc    r14, [reg_p1+112] 
+    adc    rcx, 0
+    mov    [reg_p1+64], r8 
+    mov    [reg_p2], r9         // C0_final 
+    mov    [reg_p1+80], r10  
+    mov    [reg_p1+88], r11   
+    mov    [reg_p1+96], r12  
+    mov    [reg_p1+104], r13  
+    mov    [reg_p1+112], r14
+
+    // a[6-7] x p546p1_nz --> result: r8:r14
+    MUL128x320_SCHOOL [reg_p1+48], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+80]  
+    adc    r9, [reg_p1+88]  
+    adc    r10, [reg_p1+96]   
+    adc    r11, [reg_p1+104]  
+    adc    r12, [reg_p1+112]   
+    adc    r13, [reg_p1+120]   
+    adc    r14, [reg_p1+128]
+    adc    rcx, [reg_p1+136]
+    mov    [reg_p2+8], r8       // C1_final
+    mov    [reg_p2+16], r9      // C2_final
+    mov    [reg_p1+96], r10  
+    mov    [reg_p1+104], r11   
+    mov    [reg_p1+112], r12  
+    mov    [reg_p1+120], r13  
+    mov    [reg_p1+128], r14
+
+    // a[8-9] x p546p1_nz --> result: r8:r13
+    MUL64x320_SCHOOL [reg_p1+64], [rip+fmt(p546p1)+32], r8, r9, r10, r11, r12, r13, r14, r15
+    
+    // Final result C3:C8
+    add    r8, [reg_p1+96]  
+    adc    r9, [reg_p1+104]  
+    adc    r10, [reg_p1+112]   
+    adc    r11, [reg_p1+120]  
+    adc    r12, [reg_p1+128]   
+    adc    r13, rcx
+    mov    [reg_p2+24], r8
+    mov    [reg_p2+32], r9  
+    mov    [reg_p2+40], r10   
+    mov    [reg_p2+48], r11  
+    mov    [reg_p2+56], r12  
+    mov    [reg_p2+64], r13
+
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+  #else
+  
+//***********************************************************************
+//  Montgomery reduction
+//  Based on comba method
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//*********************************************************************** 
+.global fmt(rdc546_asm)
+fmt(rdc546_asm):
+
+  ret
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+  #endif
+
+//***********************************************************************
+//  546-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_add546_asm)
+fmt(mp_add546_asm):  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rax, [reg_p1+32]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    rax, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rax
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48] 
+  mov    r10, [reg_p1+56]
+  mov    r11, [reg_p1+64] 
+  mov    rax, [reg_p1+72] 
+  adc    r8, [reg_p2+40] 
+  adc    r9, [reg_p2+48]
+  adc    r10, [reg_p2+56] 
+  adc    r11, [reg_p2+64]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  mov    [reg_p3+64], r11
+  ret
+
+
+//***********************************************************************
+//  2x546-bit multiprecision subtraction/addition
+//  Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p546*2^576
+//*********************************************************************** 
+.global fmt(mp_subadd546x2_asm)
+fmt(mp_subadd546x2_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rcx, [reg_p1+32]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    rcx, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rcx
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48]
+  mov    r10, [reg_p1+56] 
+  mov    r11, [reg_p1+64]
+  sbb    r8, [reg_p2+40] 
+  sbb    r9, [reg_p2+48] 
+  sbb    r10, [reg_p2+56]
+  sbb    r11, [reg_p2+64]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  mov    [reg_p3+64], r11
+  
+  mov    r8, [reg_p1+72]
+  mov    r9, [reg_p1+80] 
+  mov    r10, [reg_p1+88]
+  mov    r11, [reg_p1+96]
+  mov    rcx, [reg_p1+104]
+  sbb    r8, [reg_p2+72]
+  sbb    r9, [reg_p2+80]
+  sbb    r10, [reg_p2+88] 
+  sbb    r11, [reg_p2+96] 
+  sbb    rcx, [reg_p2+104]
+  mov    [reg_p3+72], r8 
+  mov    [reg_p3+80], r9
+  mov    [reg_p3+88], r10
+  mov    [reg_p3+96], r11
+  mov    [reg_p3+104], rcx
+  
+  mov    r8, [reg_p1+112]
+  mov    r9, [reg_p1+120]
+  mov    r10, [reg_p1+128]
+  mov    r11, [reg_p1+136]
+  sbb    r8, [reg_p2+112] 
+  sbb    r9, [reg_p2+120] 
+  sbb    r10, [reg_p2+128] 
+  sbb    r11, [reg_p2+136]
+  sbb    rax, 0
+  
+  // Add p546 anded with the mask in rax 
+  mov    r12, [rip+fmt(p546)]
+  mov    r13, [rip+fmt(p546)+32]
+  mov    r14, [rip+fmt(p546)+40]
+  mov    r15, [rip+fmt(p546)+48]
+  mov    rdi, [rip+fmt(p546)+56]
+  mov    rsi, [rip+fmt(p546)+64]
+  and    r12, rax
+  and    r13, rax
+  and    r14, rax
+  and    r15, rax
+  and    rdi, rax
+  and    rsi, rax
+  mov    rax, [reg_p3+72]
+  add    rax, r12
+  mov    [reg_p3+72], rax
+  mov    rax, [reg_p3+80]
+  adc    rax, r12
+  mov    [reg_p3+80], rax
+  mov    rax, [reg_p3+88]
+  adc    rax, r12
+  mov    [reg_p3+88], rax
+  adc    r12, [reg_p3+96]
+  adc    r13, [reg_p3+104]
+  mov    [reg_p3+96], r12
+  mov    [reg_p3+104], r13
+  adc    r8, r14
+  adc    r9, r15
+  adc    r10, rdi
+  adc    r11, rsi
+  
+  mov    [reg_p3+112], r8
+  mov    [reg_p3+120], r9
+  mov    [reg_p3+128], r10
+  mov    [reg_p3+136], r11
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Double 2x546-bit multiprecision subtraction
+//  Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_dblsub546x2_asm)
+fmt(mp_dblsub546x2_asm):
+  push   r12
+  push   r13
+  
+  mov    r8, [reg_p3]
+  mov    r9, [reg_p3+8]
+  mov    r10, [reg_p3+16]
+  mov    r11, [reg_p3+24]
+  mov    r12, [reg_p3+32]
+  mov    r13, [reg_p3+40]
+  sub    r8, [reg_p1]
+  sbb    r9, [reg_p1+8] 
+  sbb    r10, [reg_p1+16] 
+  sbb    r11, [reg_p1+24] 
+  sbb    r12, [reg_p1+32] 
+  sbb    r13, [reg_p1+40]
+  setc   al
+  sub    r8, [reg_p2]
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  setc   cl
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+    
+  mov    r8, [reg_p3+48]
+  mov    r9, [reg_p3+56]
+  mov    r10, [reg_p3+64]
+  mov    r11, [reg_p3+72]
+  mov    r12, [reg_p3+80]
+  mov    r13, [reg_p3+88]
+  bt     rax, 0  
+  sbb    r8, [reg_p1+48] 
+  sbb    r9, [reg_p1+56]
+  sbb    r10, [reg_p1+64] 
+  sbb    r11, [reg_p1+72] 
+  sbb    r12, [reg_p1+80] 
+  sbb    r13, [reg_p1+88]
+  setc   al 
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+48] 
+  sbb    r9, [reg_p2+56]
+  sbb    r10, [reg_p2+64] 
+  sbb    r11, [reg_p2+72] 
+  sbb    r12, [reg_p2+80] 
+  sbb    r13, [reg_p2+88]
+  setc   cl 
+  mov    [reg_p3+48], r8
+  mov    [reg_p3+56], r9
+  mov    [reg_p3+64], r10
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], r12
+  mov    [reg_p3+88], r13
+  
+  mov    r8, [reg_p3+96]
+  mov    r9, [reg_p3+104]
+  mov    r10, [reg_p3+112]
+  mov    r11, [reg_p3+120]
+  mov    r12, [reg_p3+128]
+  mov    r13, [reg_p3+136]
+  bt     rax, 0  
+  sbb    r8, [reg_p1+96] 
+  sbb    r9, [reg_p1+104] 
+  sbb    r10, [reg_p1+112] 
+  sbb    r11, [reg_p1+120]
+  sbb    r12, [reg_p1+128] 
+  sbb    r13, [reg_p1+136]
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+96] 
+  sbb    r9, [reg_p2+104] 
+  sbb    r10, [reg_p2+112] 
+  sbb    r11, [reg_p2+120]
+  sbb    r12, [reg_p2+128] 
+  sbb    r13, [reg_p2+136]
+  mov    [reg_p3+96], r8
+  mov    [reg_p3+104], r9
+  mov    [reg_p3+112], r10
+  mov    [reg_p3+120], r11
+  mov    [reg_p3+128], r12
+  mov    [reg_p3+136], r13
+  
+  pop    r13
+  pop    r12
+  ret
\ No newline at end of file
diff --git a/SIKE_sw/src/P546/P546.c b/SIKE_sw/src/P546/P546.c
new file mode 100644
index 0000000..953afe7
--- /dev/null
+++ b/SIKE_sw/src/P546/P546.c
@@ -0,0 +1,135 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P546
+*********************************************************************************************/  
+
+#include "P546_api.h" 
+#include "P546_internal.h"
+#include "../internal.h"
+
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). 
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. 
+// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32.
+// For example, a 546-bit field element is represented with Ceil(546 / 64) = 9 64-bit digits or Ceil(546 / 32) = 18 32-bit digits.
+
+//
+// Curve isogeny system "SIDHp546". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p546^2), where A=6, B=1, C=1 and p546 = 2^273*3^172-1
+//
+         
+const uint64_t p546[NWORDS64_FIELD]              = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xC1CCF59098E1FFFF, 
+                                                     0x91CA3591A0810F4F, 0xC3A747738CBAAD7D, 0x3E568459654D5F6B, 0x000000030F5EBA42 };
+const uint64_t p546p1[NWORDS64_FIELD]            = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xC1CCF59098E20000,
+                                                     0x91CA3591A0810F4F, 0xC3A747738CBAAD7D, 0x3E568459654D5F6B, 0x000000030F5EBA42 };
+const uint64_t p546x2[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x8399EB2131C3FFFF,
+                                                     0x23946B2341021E9F, 0x874E8EE719755AFB, 0x7CAD08B2CA9ABED7, 0x000000061EBD7484 }; 
+const uint64_t p546x4[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0733D6426387FFFF,
+                                                     0x4728D64682043D3F, 0x0E9D1DCE32EAB5F6, 0xF95A116595357DAF, 0x0000000C3D7AE908 };
+// Order of Alice's subgroup
+const uint64_t Alice_order[NWORDS64_ORDER]       = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000020000 }; 
+// Order of Bob's subgroup
+const uint64_t Bob_order[NWORDS64_ORDER]         = { 0x87A7E0E67AC84C71, 0x56BEC8E51AC8D040, 0xAFB5E1D3A3B9C65D, 0x5D211F2B422CB2A6, 0x00000000000187AF };
+// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} in GF(p546^2), expressed in Montgomery representation
+const uint64_t A_gen[6*NWORDS64_FIELD]           = { 0x8BF8B5CDA3529A11, 0x920F7AF8D8EDA1CE, 0x6A4FD6F4E65D2601, 0xAA5FDD88E6C8C053, 0x2DDFECC4564DD092,
+                                                     0xB5AE8E8B63CDD2EB, 0xF5530B1581D37EFC, 0xBB69799BE0974397, 0x000000029E924174,   // XPA0
+                                                     0x02BAA3F5AA08FBA0, 0xDF5E66F9718B1DB3, 0x7AAD305C4C16B9B5, 0xEFC538F7C899EC44, 0xB2B7A11B88589305,
+                                                     0xF4C2FE11D652F55A, 0x45F5A4010B37F36F, 0x68C0BE35B4414691, 0x00000002974A76B9,   // XPA1
+                                                     0x6655849EE4AD62B0, 0xA7B09BDA24F18E3D, 0xD9DC9DF1EFE6D4E3, 0x5618AE214D22122F, 0x35CE7CD8878AB07,
+                                                     0xDFBE3687D874F305, 0x0FFAC636361A0289, 0x732304C3E314E9F3, 0x00000002D4829F4D,   // XQA0
+                                                     0xD433C9386F41F07B, 0x591D74E6B6E16886, 0x1E91924E4D82BEA1, 0xE9ED0654FE5D746F, 0x95029EF76C0961D9,
+                                                     0x9C5798078846CCA8, 0xB8AD7EC5421DCE49, 0xEBEF3DD3098146F8, 0x000000010E9A2BCA,   // XQA1
+                                                     0xC218DF11E1FCA67A, 0x8C622C3530976AAF, 0xC5A558DA88A028C1, 0x5B0E218835EB3EEA, 0x63B412D6B77F6E5F,
+                                                     0x44265EEA17A1F58C, 0xD7A6BD5FE291AA13, 0xC0918F65ED8D3D23, 0x000000005562DBCD,   // XRA0
+                                                     0x071F4177BDD2E021, 0xDC8F3873504C93E7, 0x77038B491A006DB7, 0x9E205A8C15B8F717, 0x701734570E79CC07,
+                                                     0x0790455A85462B3D, 0x19AC9F7FC32A9F20, 0x04B599768492F2D5, 0x0000000248379BC7 }; // XRA1
+// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p546^2), expressed in Montgomery representation
+const uint64_t B_gen[6*NWORDS64_FIELD]           = { 0xC60DC8B9DD8A126C, 0x2841B16BD9C550AB, 0x33EB13E27326D027, 0xB4E345D7318893D7, 0x4F7BD19633EAA269,
+                                                     0xA93049DB038741F4, 0x93222D9F331C2848, 0x15FFBA19339361F0, 0x0000000089E90060,   // XPB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XPB1
+                                                     0xA07EEF3334ACF340, 0x417F1E66A839DFCB, 0x45C32C88DAA25A10, 0x563B27FA6991C6BF, 0x4BE0CC5C10D513A9,
+                                                     0xE4E1756C009BD03E, 0xAFDFBF640F2717AC, 0xDC5EE9B702D8E56C, 0x0000000182A09EB0,   // XQB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XQB1
+                                                     0x74937ACDD796D6EE, 0x7C5E906509CE108B, 0xDA57EBEF8BA73940, 0x1E5CB85A8E1C9A4C, 0xD4EBE9C3A955BB62,
+                                                     0xBA4C02A05B39742F, 0x21A4B5BCACC33156, 0xE96E8BD54B98A20C, 0x0000000104B99E73,   // XRB0
+                                                     0xEEFADB5C4965D7A8, 0xE653CE9D2DB5CD75, 0xB511FF5416DEAB7C, 0xA5D5B131D1112DEF, 0x72D33ED20BB3EB46,
+                                                     0x96809017849D85DF, 0x00BA691C5F526CFF, 0x9B384D1CF1873823, 0x0000000152691238 }; // XRB1                                                     
+// Montgomery constant Montgomery_R2 = (2^576)^2 mod p546
+const uint64_t Montgomery_R2[NWORDS64_FIELD]     = { 0x52EB0249395B3348, 0x984F8851AEFDB3F3, 0x913744158E52803C, 0x1EC818C9E0CA0DA3, 0x4C2396C7E7350E87,
+                                                     0x75D4E9F73AC13B39, 0x1640A26835D93C44, 0x5D441830B61AD042, 0x00000001357E298F };                                                    
+// Value one in Montgomery representation 
+const uint64_t Montgomery_one[NWORDS64_FIELD]    = { 0x0000000053A8B821, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAB9ED029DADE0000,
+                                                     0x7FD34034A42F114D, 0x319FDC331E9125F5, 0xF1361EF3C5499C8A, 0x00000001393B6AF7 };
+
+
+// Fixed parameters for isogeny tree computation
+const unsigned int strat_Alice[MAX_Alice-1] = { 
+65, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4,
+2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2,
+1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 32, 16, 8, 4, 2, 1,
+1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1,
+16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1,
+1, 2, 1, 1 };
+
+const unsigned int strat_Bob[MAX_Bob-1] = { 
+71, 43, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1,
+1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1,
+1, 1, 2, 1, 1, 1, 17, 11, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 4, 3, 2, 1, 1,
+1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2,
+1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1,
+1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1,
+1, 4, 2, 1, 1, 2, 1, 1 };
+           
+// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions
+#define fpcopy                        fpcopy546
+#define fpzero                        fpzero546
+#define fpadd                         fpadd546
+#define fpsub                         fpsub546
+#define fpneg                         fpneg546
+#define fpdiv2                        fpdiv2_546
+#define fpcorrection                  fpcorrection546
+#define fpmul_mont                    fpmul546_mont
+#define fpsqr_mont                    fpsqr546_mont
+#define fpinv_mont                    fpinv546_mont
+#define fpinv_chain_mont              fpinv546_chain_mont
+#define fp2copy                       fp2copy546
+#define fp2zero                       fp2zero546
+#define fp2add                        fp2add546
+#define fp2sub                        fp2sub546
+#define mp_sub_p2                     mp_sub546_p2
+#define mp_sub_p4                     mp_sub546_p4
+#define sub_p4                        mp_sub_p4
+#define fp2neg                        fp2neg546
+#define fp2div2                       fp2div2_546
+#define fp2correction                 fp2correction546
+#define fp2mul_mont                   fp2mul546_mont
+#define fp2sqr_mont                   fp2sqr546_mont
+#define fp2inv_mont                   fp2inv546_mont
+#define fp2inv_mont_ct                fp2inv546_mont_ct
+#define fp2inv_mont_bingcd            fp2inv546_mont_bingcd
+#define mp_add_asm                    mp_add546_asm
+#define mp_subaddx2_asm               mp_subadd546x2_asm
+#define mp_dblsubx2_asm               mp_dblsub546x2_asm
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp546
+#define crypto_kem_enc                crypto_kem_enc_SIKEp546
+#define crypto_kem_dec                crypto_kem_dec_SIKEp546
+#define random_mod_order_A            random_mod_order_A_SIDHp546
+#define random_mod_order_B            random_mod_order_B_SIDHp546
+#define EphemeralKeyGeneration_A      EphemeralKeyGeneration_A_SIDHp546
+#define EphemeralKeyGeneration_B      EphemeralKeyGeneration_B_SIDHp546
+#define EphemeralSecretAgreement_A    EphemeralSecretAgreement_A_SIDHp546
+#define EphemeralSecretAgreement_B    EphemeralSecretAgreement_B_SIDHp546
+
+#include "../fpx.c"
+#include "../ec_isogeny.c"
+#include "../sidh.c"    
+#include "../sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/src/P546/P546_api.h b/SIKE_sw/src/P546/P546_api.h
new file mode 100644
index 0000000..af461af
--- /dev/null
+++ b/SIKE_sw/src/P546/P546_api.h
@@ -0,0 +1,112 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: API header file for P546
+*********************************************************************************************/  
+
+#ifndef P546_API_H
+#define P546_API_H
+    
+
+/*********************** Key encapsulation mechanism API ***********************/
+
+#define CRYPTO_SECRETKEYBYTES     472    // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes
+#define CRYPTO_PUBLICKEYBYTES     414
+#define CRYPTO_BYTES               24
+#define CRYPTO_CIPHERTEXTBYTES    438    // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes  
+
+// Algorithm name
+#define CRYPTO_ALGNAME "SIKEp546"  
+
+// SIKE's key generation
+// It produces a private key sk and computes the public key pk.
+// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 472 bytes)
+//          public key pk (CRYPTO_PUBLICKEYBYTES = 414 bytes) 
+int crypto_kem_keypair_SIKEp546(unsigned char *pk, unsigned char *sk);
+
+// SIKE's encapsulation
+// Input:   public key pk         (CRYPTO_PUBLICKEYBYTES = 414 bytes)
+// Outputs: shared secret ss      (CRYPTO_BYTES = 24 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 438 bytes)
+int crypto_kem_enc_SIKEp546(unsigned char *ct, unsigned char *ss, const unsigned char *pk);
+
+// SIKE's decapsulation
+// Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = 472 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 438 bytes) 
+// Outputs: shared secret ss      (CRYPTO_BYTES = 24 bytes)
+int crypto_kem_dec_SIKEp546(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);
+
+
+// Encoding of keys for KEM-based isogeny system "SIKEp546" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p546) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p546^2), where a and b are defined over GF(p546), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^Floor(Log(2,3^172))-1] and the public key pk. In the SIKE API, 
+// private keys are encoded in 472 octets in little endian format. 
+// Public keys pk consist of 3 elements in GF(p546^2). In the SIKE API, pk is encoded in 414 octets. 
+// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 414 + 24 = 438 octets.  
+// Shared keys ss consist of a value of 24 octets.
+
+
+/*********************** Ephemeral key exchange API ***********************/
+
+#define SIDH_SECRETKEYBYTES_A    35
+#define SIDH_SECRETKEYBYTES_B    34
+#define SIDH_PUBLICKEYBYTES     414
+#define SIDH_BYTES              138
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016.
+// Extended version available at: http://eprint.iacr.org/2016/859  
+
+// Generation of Alice's secret key 
+// Outputs random value in [0, 2^273 - 1] to be used as Alice's private key
+void random_mod_order_A_SIDHp546(unsigned char* random_digits);
+
+// Generation of Bob's secret key 
+// Outputs random value in [0, 2^Floor(Log(2,3^172)) - 1] to be used as Bob's private key
+void random_mod_order_B_SIDHp546(unsigned char* random_digits);
+
+// Alice's ephemeral public key generation
+// Input:  a private key PrivateKeyA in the range [0, 2^273 - 1], stored in 35 bytes. 
+// Output: the public key PublicKeyA consisting of 3 GF(p546^2) elements encoded in 414 bytes.
+int EphemeralKeyGeneration_A_SIDHp546(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^172)) - 1], stored in 34 bytes. 
+// The public key consists of 3 GF(p546^2) elements encoded in 414 bytes.
+int EphemeralKeyGeneration_B_SIDHp546(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^273 - 1], stored in 35 bytes. 
+//         Bob's PublicKeyB consists of 3 GF(p546^2) elements encoded in 414 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p546^2) encoded in 138 bytes.
+int EphemeralSecretAgreement_A_SIDHp546(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^172)) - 1], stored in 34 bytes. 
+//         Alice's PublicKeyA consists of 3 GF(p546^2) elements encoded in 414 bytes.
+// Output: a shared secret SharedSecretB that consists of one element in GF(p546^2) encoded in 138 bytes.
+int EphemeralSecretAgreement_B_SIDHp546(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB);
+
+
+// Encoding of keys for KEX-based isogeny system "SIDHp546" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p546) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p546^2), where a and b are defined over GF(p546), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^273-1] and [0, 2^Floor(Log(2,3^172)) - 1], resp. In the SIDH API, 
+// Alice's and Bob's private keys are encoded in 35 and 34 octets, resp., in little endian format. 
+// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p546^2). In the SIDH API, they are encoded in 414 octets. 
+// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p546^2). In the SIDH API, they are encoded in 138 octets.
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/P546/P546_internal.h b/SIKE_sw/src/P546/P546_internal.h
new file mode 100644
index 0000000..6888b9a
--- /dev/null
+++ b/SIKE_sw/src/P546/P546_internal.h
@@ -0,0 +1,175 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for P546
+*********************************************************************************************/  
+
+#ifndef P546_INTERNAL_H
+#define P546_INTERNAL_H
+
+#include "../config.h"
+ 
+
+#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64)
+    #define NWORDS_FIELD    9               // Number of words of a 546-bit field element
+    #define p546_ZERO_WORDS 4               // Number of "0" digits in the least significant part of p546 + 1    
+#elif (TARGET == TARGET_x86)
+    #define NWORDS_FIELD    18
+    #define p546_ZERO_WORDS 8
+#endif
+    
+
+// Basic constants
+
+#define NBITS_FIELD             546  
+#define MAXBITS_FIELD           576                
+#define MAXWORDS_FIELD          ((MAXBITS_FIELD+RADIX-1)/RADIX)     // Max. number of words to represent field elements
+#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 546-bit field element 
+#define NBITS_ORDER             320
+#define NWORDS_ORDER            ((NBITS_ORDER+RADIX-1)/RADIX)       // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)               // Number of 64-bit words of a 256-bit element 
+#define MAXBITS_ORDER           NBITS_ORDER
+#define ALICE                   0
+#define BOB                     1 
+#define OALICE_BITS             273  
+#define OBOB_BITS               273     
+#define OBOB_EXPON              172    
+#define MASK_ALICE              0x01 
+#define MASK_BOB                0xFF 
+#define PRIME                   p546 
+#define PARAM_A                 6  
+#define PARAM_C                 1
+// Fixed parameters for isogeny tree computation
+#define MAX_INT_POINTS_ALICE    8
+#define MAX_INT_POINTS_BOB      8    
+#define MAX_Alice               136
+#define MAX_Bob                 172
+#define MSG_BYTES               24
+#define SECRETKEY_A_BYTES       ((OALICE_BITS + 7) / 8)
+#define SECRETKEY_B_BYTES       ((OBOB_BITS - 1 + 7) / 8)
+#define FP2_ENCODED_BYTES       2*((NBITS_FIELD + 7) / 8)
+
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t felm_t[NWORDS_FIELD];                                 // Datatype for representing 546-bit field elements (576-bit max.)
+typedef digit_t dfelm_t[2*NWORDS_FIELD];                              // Datatype for representing double-precision 2x546-bit field elements (2x576-bit max.) 
+typedef felm_t  f2elm_t[2];                                           // Datatype for representing quadratic extension field elements GF(p546^2)
+        
+typedef struct { f2elm_t X; f2elm_t Z; } point_proj;                  // Point representation in projective XZ Montgomery coordinates.
+typedef point_proj point_proj_t[1]; 
+
+#ifdef COMPRESS
+    typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj;  // Point representation in full projective XYZ Montgomery coordinates 
+    typedef point_full_proj point_full_proj_t[1]; 
+
+    typedef struct { f2elm_t x; f2elm_t y; } point_affine;                // Point representation in affine coordinates.
+    typedef point_affine point_t[1]; 
+
+    typedef f2elm_t publickey_t[3];      
+#endif
+
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// 546-bit multiprecision addition, c = a+b
+void mp_add546(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_add546_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// 546-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p
+extern void mp_sub546_p2(const digit_t* a, const digit_t* b, digit_t* c);
+extern void mp_sub546_p4(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_sub546_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+void mp_sub546_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+
+// 2x546-bit multiprecision subtraction followed by addition with p546*2^576, c = a-b+(p546*2^576) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_subadd546x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Double 2x546-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
+void mp_dblsub546x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void fpcopy546(const digit_t* a, digit_t* c);
+
+// Zeroing a field element, a = 0
+void fpzero546(digit_t* a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool fpequal546_non_constant_time(const digit_t* a, const digit_t* b); 
+
+// Modular addition, c = a+b mod p546
+extern void fpadd546(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpadd546_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular subtraction, c = a-b mod p546
+extern void fpsub546(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpsub546_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular negation, a = -a mod p546        
+extern void fpneg546(digit_t* a);  
+
+// Modular division by two, c = a/2 mod p546.
+void fpdiv2_546(const digit_t* a, digit_t* c);
+
+// Modular correction to reduce field element a in [0, 2*p546-1] to [0, p546-1].
+void fpcorrection546(digit_t* a);
+
+// 546-bit Montgomery reduction, c = a mod p
+void rdc546_asm(digit_t* ma, digit_t* mc);
+            
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p546, where R=2^768
+void fpmul546_mont(const digit_t* a, const digit_t* b, digit_t* c);
+void mul546_asm(const digit_t* a, const digit_t* b, digit_t* c);
+   
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p546, where R=2^768
+void fpsqr546_mont(const digit_t* ma, digit_t* mc);
+
+// Field inversion, a = a^-1 in GF(p546)
+void fpinv546_mont(digit_t* a);
+
+// Chain to compute (p546-3)/4 using Montgomery arithmetic
+void fpinv546_chain_mont(digit_t* a);
+
+/************ GF(p^2) arithmetic functions *************/
+    
+// Copy of a GF(p546^2) element, c = a
+void fp2copy546(const f2elm_t a, f2elm_t c);
+
+// Zeroing a GF(p546^2) element, a = 0
+void fp2zero546(f2elm_t a);
+
+// GF(p546^2) negation, a = -a in GF(p546^2)
+void fp2neg546(f2elm_t a);
+
+// GF(p546^2) addition, c = a+b in GF(p546^2)
+extern void fp2add546(const f2elm_t a, const f2elm_t b, f2elm_t c);           
+
+// GF(p546^2) subtraction, c = a-b in GF(p546^2)
+extern void fp2sub546(const f2elm_t a, const f2elm_t b, f2elm_t c); 
+
+// GF(p546^2) division by two, c = a/2  in GF(p546^2) 
+void fp2div2_546(const f2elm_t a, f2elm_t c);
+
+// Modular correction, a = a in GF(p546^2)
+void fp2correction546(f2elm_t a);
+            
+// GF(p546^2) squaring using Montgomery arithmetic, c = a^2 in GF(p546^2)
+void fp2sqr546_mont(const f2elm_t a, f2elm_t c);
+ 
+// GF(p546^2) multiplication using Montgomery arithmetic, c = a*b in GF(p546^2)
+void fp2mul546_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+
+// GF(p546^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void fp2inv546_mont(f2elm_t a);
+
+
+#endif
diff --git a/SIKE_sw/src/P546/generic/fp_generic.c b/SIKE_sw/src/P546/generic/fp_generic.c
new file mode 100644
index 0000000..2cffafc
--- /dev/null
+++ b/SIKE_sw/src/P546/generic/fp_generic.c
@@ -0,0 +1,259 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: portable modular arithmetic for P546
+*********************************************************************************************/
+
+#include "../P546_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p546[NWORDS64_FIELD];
+extern const uint64_t p546p1[NWORDS64_FIELD]; 
+extern const uint64_t p546x2[NWORDS64_FIELD];
+extern const uint64_t p546x4[NWORDS64_FIELD];
+
+
+__inline void mp_sub546_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p546x2)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void mp_sub546_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p546x4)[i], borrow, c[i]); 
+    }
+}
+
+
+__inline void fpadd546(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p546.
+  // Inputs: a, b in [0, 2*p546-1] 
+  // Output: c in [0, 2*p546-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p546x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p546x2)[i] & mask, carry, c[i]); 
+    }
+} 
+
+
+__inline void fpsub546(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p546.
+  // Inputs: a, b in [0, 2*p546-1] 
+  // Output: c in [0, 2*p546-1] 
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p546x2)[i] & mask, borrow, c[i]); 
+    }
+}
+
+
+__inline void fpneg546(digit_t* a)
+{ // Modular negation, a = -a mod p546.
+  // Input/output: a in [0, 2*p546-1] 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p546x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_546(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p546.
+  // Input : a in [0, 2*p546-1] 
+  // Output: c in [0, 2*p546-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p546
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p546)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+} 
+
+
+void fpcorrection546(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p546-1] to [0, p546-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p546)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p546)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result    
+    register digit_t al, ah, bl, bh, temp;
+    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
+
+    al = a & mask_low;                        // Low part
+    ah = a >> (sizeof(digit_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(digit_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                   // C00
+
+    res1 = albl >> (sizeof(digit_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;  
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(digit_t) * 4);
+    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   
+
+    res1 = ahbl >> (sizeof(digit_t) * 4);
+    res2 = albh >> (sizeof(digit_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                   // C10 
+    carry = temp & mask_high; 
+    c[1] ^= (ahbh & mask_high) + carry;       // C11
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
+    unsigned int i, j;
+    digit_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+    
+    for (i = 0; i < nwords; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+
+    for (i = nwords; i < 2*nwords-1; i++) {
+        for (j = i-nwords+1; j < nwords; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+    c[2*nwords-1] = v; 
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p546.
+  // mc = ma*R^-1 mod p546x2, where R = 2^576.
+  // If ma < 2^576*p546, the output mc is in the range [0, 2*p546-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = p546_ZERO_WORDS;
+    digit_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-p546_ZERO_WORDS+1)) { 
+                MUL(mc[j], ((digit_t*)p546p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry; 
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }    
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) { 
+                MUL(mc[j], ((digit_t*)p546p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); 
+    mc[NWORDS_FIELD-1] = v;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P610/AMD64/fp_x64.c b/SIKE_sw/src/P610/AMD64/fp_x64.c
new file mode 100644
index 0000000..f53ff76
--- /dev/null
+++ b/SIKE_sw/src/P610/AMD64/fp_x64.c
@@ -0,0 +1,729 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: modular arithmetic optimized for x64 platforms for P610
+*********************************************************************************************/
+
+#include "../P610_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p610[NWORDS_FIELD];
+extern const uint64_t p610p1[NWORDS_FIELD]; 
+extern const uint64_t p610x2[NWORDS_FIELD];    
+extern const uint64_t p610x4[NWORDS_FIELD];
+
+
+__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p610x2)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub610_p2_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p610x4)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub610_p4_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p610.
+  // Inputs: a, b in [0, 2*p610-1] 
+  // Output: c in [0, 2*p610-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p610x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p610x2)[i] & mask, carry, c[i]); 
+    } 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpadd610_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p610.
+  // Inputs: a, b in [0, 2*p610-1] 
+  // Output: c in [0, 2*p610-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p610x2)[i] & mask, borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpsub610_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpneg610(digit_t* a)
+{ // Modular negation, a = -a mod p610.
+  // Input/output: a in [0, 2*p610-1] 
+    unsigned int i, borrow = 0;
+    
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p610x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_610(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p610.
+  // Input : a in [0, 2*p610-1] 
+  // Output: c in [0, 2*p610-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p610
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p610)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+}  
+
+
+void fpcorrection610(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p610-1] to [0, p610-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p610)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p610)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+        
+    (void)nwords;
+
+#if (OS_TARGET == OS_WIN)
+    digit_t t = 0;
+    uint128_t uv = {0};
+    unsigned int carry = 0;
+        
+    MULADD128(a[0], b[0], uv, carry, uv);
+    t += carry;
+    c[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[0], uv, carry, uv);
+    t += carry;
+    c[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[0], uv, carry, uv);
+    t += carry;
+    c[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[0], uv, carry, uv);
+    t += carry;
+    c[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[0], uv, carry, uv);
+    t += carry;
+    c[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[0], uv, carry, uv);
+    t += carry;
+    c[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[0], uv, carry, uv);
+    t += carry;
+    c[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[0], uv, carry, uv);
+    t += carry;
+    c[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[0], uv, carry, uv);
+    t += carry;
+    c[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[0], uv, carry, uv);
+    t += carry;
+    c[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[9], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[9], uv, carry, uv);
+    t += carry;
+    c[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[9], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[9], uv, carry, uv);
+    t += carry;
+    c[11] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[9], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[9], uv, carry, uv);
+    t += carry;
+    c[12] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;    
+    
+    MULADD128(a[9], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[9], uv, carry, uv);
+    t += carry;
+    c[13] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[9], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[9], uv, carry, uv);
+    t += carry;
+    c[14] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[9], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[9], uv, carry, uv);
+    t += carry;
+    c[15] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[9], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[9], uv, carry, uv);
+    t += carry;
+    c[16] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[9], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[9], uv, carry, uv);
+    t += carry;
+    c[17] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    
+    MULADD128(a[9], b[9], uv, carry, uv);
+    c[18] = uv[0];
+    c[19] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+    
+    mul610_asm(a, b, c);
+
+#endif
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Montgomery reduction exploiting special form of the prime.
+  // mc = ma*R^-1 mod p610x2, where R = 2^640.
+  // If ma < 2^640*p610, the output mc is in the range [0, 2*p610-1].
+  // ma is assumed to be in Montgomery representation.
+        
+#if (OS_TARGET == OS_WIN)
+    unsigned int carry;
+    digit_t t = 0;
+    uint128_t uv = {0};
+    
+    mc[0] = ma[0];
+    mc[1] = ma[1];
+    mc[2] = ma[2];
+    mc[3] = ma[3];
+    MUL128(mc[0], ((digit_t*)p610p1)[4], uv);
+    ADDC(0, uv[0], ma[4], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = 0;
+
+    MULADD128(mc[0], ((digit_t*)p610p1)[5], uv, carry, uv);
+    MULADD128(mc[1], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[5], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[6], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[7], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[8], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[9], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(mc[1], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[10], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[2], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[11], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[3], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[12], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[4], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p610p1)[4], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[13], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[5], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p610p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[14], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[6], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p610p1)[6], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[15], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[7], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p610p1)[7], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[16], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[8], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p610p1)[8], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[17], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[9], ((digit_t*)p610p1)[9], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[18], carry, mc[8]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    ADDC(0, uv[1], ma[19], carry, mc[9]); 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    rdc610_asm(ma, mc);    
+
+#endif
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P610/AMD64/fp_x64_asm.S b/SIKE_sw/src/P610/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000..a799504
--- /dev/null
+++ b/SIKE_sw/src/P610/AMD64/fp_x64_asm.S
@@ -0,0 +1,1314 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny cryptography library 
+// Copyright (c) Microsoft Corporation
+//
+// Website: https://github.com/microsoft/PQCrypto-SIDH
+// Released under MIT license 
+//
+// Abstract: field arithmetic in x64 assembly for P610 on Linux 
+//*******************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Format function and variable names for Mac OS X
+#if defined(__APPLE__)
+    #define fmt(f)    _##f
+#else
+    #define fmt(f)    f
+#endif
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpadd610_asm)
+fmt(fpadd610_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  mov    rdi, [reg_p1+72]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+  adc    rcx, [reg_p2+64]
+  adc    rdi, [reg_p2+72]
+
+  mov    rax, [rip+fmt(p610x2)]
+  sub    r8, rax
+  mov    rax, [rip+fmt(p610x2)+8]
+  sbb    r9, rax
+  sbb    r10, rax
+  sbb    r11, rax
+  mov    rax, [rip+fmt(p610x2)+32]
+  sbb    r12, rax
+  mov    rax, [rip+fmt(p610x2)+40]
+  sbb    r13, rax
+  mov    rax, [rip+fmt(p610x2)+48]
+  sbb    r14, rax
+  mov    rax, [rip+fmt(p610x2)+56]
+  sbb    r15, rax
+  mov    rax, [rip+fmt(p610x2)+64]
+  sbb    rcx, rax
+  mov    rax, [rip+fmt(p610x2)+72]
+  sbb    rdi, rax
+  mov    [reg_p3+64], rcx
+  mov    [reg_p3+72], rdi
+  mov    rax, 0
+  sbb    rax, 0
+  
+  mov    rsi, [rip+fmt(p610x2)]
+  and    rsi, rax
+  mov    rdi, [rip+fmt(p610x2)+8]
+  and    rdi, rax
+  
+  add    r8, rsi  
+  adc    r9, rdi 
+  adc    r10, rdi 
+  adc    r11, rdi
+  mov    [reg_p3], r8 
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  setc   cl
+  
+  mov    rdi, [rip+fmt(p610x2)+32]
+  and    rdi, rax
+  mov    rsi, [rip+fmt(p610x2)+40]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p610x2)+48]
+  and    r8, rax
+  mov    r9, [rip+fmt(p610x2)+56]
+  and    r9, rax
+  mov    r10, [rip+fmt(p610x2)+64]
+  and    r10, rax
+  mov    r11, [rip+fmt(p610x2)+72]
+  and    r11, rax
+  
+  bt     rcx, 0
+  adc    r12, rdi
+  adc    r13, rsi  
+  adc    r14, r8
+  adc    r15, r9
+  mov    rsi, [reg_p3+64]
+  mov    rdi, [reg_p3+72]
+  adc    rsi, r10  
+  adc    rdi, r11
+  mov    [reg_p3+32], r12  
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15  
+  mov    [reg_p3+64], rsi
+  mov    [reg_p3+72], rdi
+
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpsub610_asm)
+fmt(fpsub610_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64] 
+  mov    rdi, [reg_p1+72]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  sbb    rcx, [reg_p2+64]
+  sbb    rdi, [reg_p2+72]
+  mov    [reg_p3+64], rcx
+  mov    [reg_p3+72], rdi
+  mov    rax, 0
+  sbb    rax, 0
+    
+  mov    rsi, [rip+fmt(p610x2)]
+  and    rsi, rax
+  mov    rdi, [rip+fmt(p610x2)+8]
+  and    rdi, rax
+  
+  add    r8, rsi  
+  adc    r9, rdi 
+  adc    r10, rdi 
+  adc    r11, rdi
+  mov    [reg_p3], r8 
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  setc   cl
+  
+  mov    rdi, [rip+fmt(p610x2)+32]
+  and    rdi, rax
+  mov    rsi, [rip+fmt(p610x2)+40]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p610x2)+48]
+  and    r8, rax
+  mov    r9, [rip+fmt(p610x2)+56]
+  and    r9, rax
+  mov    r10, [rip+fmt(p610x2)+64]
+  and    r10, rax
+  mov    r11, [rip+fmt(p610x2)+72]
+  and    r11, rax
+  
+  bt     rcx, 0
+  adc    r12, rdi
+  adc    r13, rsi  
+  adc    r14, r8
+  adc    r15, r9
+  mov    rsi, [reg_p3+64]
+  mov    rdi, [reg_p3+72]
+  adc    rsi, r10  
+  adc    rdi, r11
+  mov    [reg_p3+32], r12  
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15  
+  mov    [reg_p3+64], rsi
+  mov    [reg_p3+72], rdi
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret 
+
+
+///////////////////////////////////////////////////////////////// MACRO
+.macro SUB610_PX  P0
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56]
+  mov    rax, [reg_p1+64]
+  mov    rcx, [reg_p1+72]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40]
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56] 
+  sbb    rax, [reg_p2+64] 
+  sbb    rcx, [reg_p2+72] 
+
+  mov    rdi, [rip+\P0]
+  mov    rsi, [rip+\P0+8]
+  add    r8, rdi  
+  adc    r9, rsi  
+  adc    r10, rsi 
+  adc    r11, rsi 
+  mov    rdi, [rip+\P0+32]
+  mov    rsi, [rip+\P0+40]
+  adc    r12, rdi   
+  adc    r13, rsi   
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  mov    rdi, [rip+\P0+48]
+  mov    rsi, [rip+\P0+56]
+  adc    r14, rdi  
+  adc    r15, rsi  
+  mov    rdi, [rip+\P0+64]
+  mov    rsi, [rip+\P0+72]
+  adc    rax, rdi 
+  adc    rcx, rsi  
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15 
+  mov    [reg_p3+64], rax 
+  mov    [reg_p3+72], rcx
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  .endm
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 2*p610
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p610
+//*********************************************************************** 
+.global fmt(mp_sub610_p2_asm)
+fmt(mp_sub610_p2_asm):
+
+  SUB610_PX  fmt(p610x2)
+  ret
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 4*p610
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p610
+//*********************************************************************** 
+.global fmt(mp_sub610_p4_asm)
+fmt(mp_sub610_p4_asm):
+
+  SUB610_PX  fmt(p610x4)
+  ret
+
+
+#ifdef _MULX_
+
+/////////////////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C
+// Temps:   regs T0:T7
+///////////////////////////////////////////////////////////////////////////
+#ifdef _ADX_
+
+.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    adox   \T0, \T3               
+    adox   \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adox   \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T1, \T6     
+    adox   \T5, rax        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adcx   \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T2, \T7 
+    adcx   \T4, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T4, \T6  
+    adcx   \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T1, \T5 
+    adox   \T0, \T7   
+    mulx   \T5, \T6, 32\M1 
+    adcx   \T5, rax         
+    adox   \T1, \T6  
+    adox   \T5, rax         
+    
+    mov    rdx, 16\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adcx   \T4, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T4, \T7 
+    adcx   \T0, \T6        
+    mulx   \T2, \T6, 16\M1
+    adox   \T0, \T6 
+    adcx   \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adcx   \T5, \T2          
+    adox   \T1, \T7   
+    mulx   \T2, \T6, 32\M1   
+    adcx   \T2, rax 
+    adox   \T5, \T6 
+    adox   \T2, rax           
+    
+    mov    rdx, 24\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T4, \T7 
+    mov    24\C, \T4           // C3_final 
+    adcx   \T0, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T0, \T7
+    adcx   \T1, \T6        
+    mulx   \T4, \T6, 16\M1
+    adox   \T1, \T6  
+    adcx   \T5, \T4     
+    mulx   \T4, \T7, 24\M1   
+    adcx   \T2, \T4        
+    adox   \T5, \T7   
+    mulx   \T4, \T6, 32\M1   
+    adcx   \T4, rax 
+    adox   \T2, \T6  
+    adox   \T4, rax         
+    
+    mov    rdx, 32\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    32\C, \T0           // C4_final 
+    adcx   \T1, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T1, \T7 
+    adcx   \T5, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T5, \T6 
+    adcx   \T2, \T0     
+    mulx   \T0, \T7, 24\M1   
+    adcx   \T4, \T0 
+    adox   \T2, \T7  
+    mulx   \T0, \T6, 32\M1   
+    adcx   \T0, rax           
+    adox   \T4, \T6 
+    adox   \T0, rax 
+
+    mov    40\C, \T1 
+    mov    48\C, \T5 
+    mov    56\C, \T2 
+    mov    64\C, \T4
+    mov    72\C, \T0
+.endm
+
+#else
+
+.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    mulx   \T4, \T5, 16\M1 
+    add    \T0, \T3               
+    adc    \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adc    \T3, \T4         
+    mulx   \T5, \T6, 32\M1 
+    adc    \T1, \T6     
+    adc    \T5, 0        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adc    \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adc    \T3, \T6        
+    mulx   \T0, \T4, 16\M1
+    adc    \T0, \T1     
+    mulx   \T1, \T6, 24\M1   
+    adc    \T5, \T1  
+    mulx   \T1, rax, 32\M1     
+    adc    \T1, 0 
+        
+    add    \T2, \T7 
+    adc    \T3, \T4  
+    adc    \T0, \T6  
+    adc    \T5, rax  
+    adc    \T1, 0         
+    
+    mov    rdx, 16\M0 
+    mulx   \T4, \T6, \M1 
+    add    \T2, \T6 
+    mov    16\C, \T2           // C2_final 
+    adc    \T3, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T0, \T6        
+    mulx   \T2, \T4, 16\M1 
+    adc    \T2, \T5     
+    mulx   \T5, \T6, 24\M1   
+    adc    \T1, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0 
+        
+    add    \T3, \T7
+    adc    \T0, \T4  
+    adc    \T2, \T6  
+    adc    \T1, rax 
+    adc    \T5, 0          
+    
+    mov    rdx, 24\M0
+    mulx   \T4, \T6, \M1 
+    add    \T3, \T6 
+    mov    24\C, \T3           // C3_final 
+    adc    \T0, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T2, \T6        
+    mulx   \T3, \T4, 16\M1 
+    adc    \T1, \T3     
+    mulx   \T3, \T6, 24\M1   
+    adc    \T3, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0
+        
+    add    \T0, \T7
+    adc    \T2, \T4  
+    adc    \T1, \T6  
+    adc    \T3, rax 
+    adc    \T5, 0       
+    
+    mov    rdx, 32\M0 
+    mulx   \T4, \T6, \M1 
+    add    \T0, \T6 
+    mov    32\C, \T0           // C4_final 
+    adc    \T2, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T1, \T6        
+    mulx   \T0, \T4, 16\M1 
+    adc    \T3, \T0     
+    mulx   \T0, \T6, 24\M1   
+    adc    \T0, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0
+        
+    add    \T2, \T7 
+    adc    \T1, \T4  
+    adc    \T3, \T6 
+    adc    \T0, rax 
+    adc    \T5, 0 
+    mov    40\C, \T2 
+    mov    48\C, \T1 
+    mov    56\C, \T3 
+    mov    64\C, \T0
+    mov    72\C, \T5 
+.endm
+
+#endif
+
+
+//*****************************************************************************
+//  610-bit multiplication using Karatsuba (one level), schoolbook (two levels)
+//***************************************************************************** 
+.global fmt(mul610_asm)
+fmt(mul610_asm):    
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15
+    mov    rcx, reg_p3 
+
+    // [rsp] <- AH + AL, rax <- mask
+    xor    rax, rax
+    mov    r8, [reg_p1]
+    mov    r9, [reg_p1+8]
+    mov    r10, [reg_p1+16]
+    mov    r11, [reg_p1+24] 
+    mov    r12, [reg_p1+32] 
+    push   rbx 
+    sub    rsp, 112
+    add    r8, [reg_p1+40]
+    adc    r9, [reg_p1+48]
+    adc    r10, [reg_p1+56]
+    adc    r11, [reg_p1+64]
+    adc    r12, [reg_p1+72]
+    sbb    rax, 0
+    mov    [rsp], r8
+    mov    [rsp+8], r9
+    mov    [rsp+16], r10
+    mov    [rsp+24], r11
+    mov    [rsp+32], r12
+
+    // [rsp+40] <- BH + BL, rdx <- mask
+    xor    rdx, rdx
+    mov    r8, [reg_p2]
+    mov    r9, [reg_p2+8]
+    mov    rbx, [reg_p2+16]
+    mov    r13, [reg_p2+24] 
+    mov    r14, [reg_p2+32]    
+    add    r8, [reg_p2+40]
+    adc    r9, [reg_p2+48]
+    adc    rbx, [reg_p2+56]
+    adc    r13, [reg_p2+64]
+    adc    r14, [reg_p2+72]
+    sbb    rdx, 0
+    mov    [rsp+40], r8
+    mov    [rsp+48], r9
+    mov    [rsp+56], rbx
+    mov    [rsp+64], r13
+    mov    [rsp+72], r14     
+    
+    // [rcx] <- masked (BH + BL)
+    and    r8, rax
+    and    r9, rax
+    and    rbx, rax
+    and    r13, rax
+    and    r14, rax    
+    mov    [rcx], r8
+    mov    [rcx+8], r9
+
+    // r8-r12 <- masked (AH + AL)
+    mov    r8, [rsp]
+    mov    r9, [rsp+8]
+    and    r8, rdx
+    and    r9, rdx
+    and    r10, rdx
+    and    r11, rdx
+    and    r12, rdx
+
+    // [rsp+80] <- masked (AH + AL) + masked (BH + BL)
+    mov    rax, [rcx]
+    mov    rdx, [rcx+8]
+    add    r8, rax
+    adc    r9, rdx
+    adc    r10, rbx
+    adc    r11, r13
+    adc    r12, r14        
+    mov    [rsp+80], r8
+    mov    [rsp+88], r9
+    mov    [rsp+96], r10
+    mov    [rsp+104], r11
+
+    // [rcx] <- AL x BL
+    MUL320_SCHOOL  [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, rbx, r13, r14, r15     // Result C0-C4 
+
+    // [rcx+80] <- (AH+AL) x (BH+BL), low part 
+    MUL320_SCHOOL  [rsp], [rsp+40], [rcx+80], r8, r9, r10, r11, rbx, r13, r14, r15
+
+    // [rsp] <- AH x BH 
+    MUL320_SCHOOL  [reg_p1+40], [reg_p2+40], [rsp], r8, r9, r10, r11, rbx, r13, r14, r15
+    
+    // r8-r12 <- (AH+AL) x (BH+BL), final step
+    mov    r8, [rsp+80]
+    mov    r9, [rsp+88]
+    mov    r10, [rsp+96]
+    mov    r11, [rsp+104]
+    mov    rax, [rcx+120]
+    add    r8, rax
+    mov    rax, [rcx+128]
+    adc    r9, rax
+    mov    rax, [rcx+136]
+    adc    r10, rax
+    mov    rax, [rcx+144]
+    adc    r11, rax
+    mov    rax, [rcx+152]
+    adc    r12, rax
+    
+    // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL
+    mov    rdi, [rcx+80]
+    sub    rdi, [rcx]
+    mov    rdx, [rcx+88]
+    sbb    rdx, [rcx+8]
+    mov    rbx, [rcx+96]
+    sbb    rbx, [rcx+16]
+    mov    r13, [rcx+104]
+    sbb    r13, [rcx+24]
+    mov    r14, [rcx+112]     
+    sbb    r14, [rcx+32]  
+    sbb    r8, [rcx+40]
+    sbb    r9, [rcx+48]
+    sbb    r10, [rcx+56]
+    sbb    r11, [rcx+64]
+    sbb    r12, [rcx+72]
+    
+    // rdi,rdx,rbx,r13,r14,r8-r12 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+    sub    rdi, [rsp]
+    sbb    rdx, [rsp+8]
+    sbb    rbx, [rsp+16]
+    sbb    r13, [rsp+24]
+    sbb    r14, [rsp+32]  
+    sbb    r8, [rsp+40]
+    sbb    r9, [rsp+48]
+    sbb    r10, [rsp+56]
+    sbb    r11, [rsp+64]
+    sbb    r12, [rsp+72]
+    
+    mov    rax, [rcx+40]
+    add    rax, rdi
+    mov    [rcx+40], rax    // Result C5-C9
+    mov    rax, [rcx+48]
+    adc    rax, rdx
+    mov    [rcx+48], rax 
+    mov    rax, [rcx+56]
+    adc    rax, rbx
+    mov    [rcx+56], rax 
+    mov    rax, [rcx+64]
+    adc    rax, r13
+    mov    [rcx+64], rax 
+    mov    rax, [rcx+72]
+    adc    rax, r14           
+    mov    [rcx+72], rax 
+    mov    rax, [rsp]
+    adc    r8, rax 
+    mov    [rcx+80], r8    // Result C10-C19
+    mov    rax, [rsp+8]
+    adc    r9, rax
+    mov    [rcx+88], r9 
+    mov    rax, [rsp+16]
+    adc    r10, rax
+    mov    [rcx+96], r10 
+    mov    rax, [rsp+24]
+    adc    r11, rax
+    mov    [rcx+104], r11 
+    mov    rax, [rsp+32]
+    adc    r12, rax
+    mov    [rcx+112], r12 
+    mov    r8, [rsp+40]
+    mov    r9, [rsp+48]
+    mov    r10, [rsp+56]
+    mov    r11, [rsp+64]
+    mov    r12, [rsp+72]
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    adc    r12, 0
+    add    rsp, 112   
+    mov    [rcx+120], r8 
+    mov    [rcx+128], r9 
+    mov    [rcx+136], r10 
+    mov    [rcx+144], r11 
+    mov    [rcx+152], r12 
+      
+    pop    rbx
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+#else
+
+//***********************************************************************
+//  Integer multiplication
+//  Based on Karatsuba method
+//  Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
+//  NOTE: a=c or b=c are not allowed
+//***********************************************************************
+.global fmt(mul610_asm)
+fmt(mul610_asm):
+
+  ret
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+#endif
+
+
+#ifdef _MULX_
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: regs T0:T7
+// Temps:   regs T8
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final    
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    adox   \T1, \T4               
+    adox   \T2, \T5     
+    mulx   \T4, \T7, 24\M1
+    adox   \T3, \T7         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T4, \T6         
+    mulx   \T7, \T8, 40\M1           
+    adox   \T5, \T8         
+    adox   \T7, rax   
+    
+    mov    rdx, 8\M0 
+    mulx   \T8, \T6, \M1 
+    adcx   \T1, \T6            // T1 <- C1_final 
+    adcx   \T2, \T8    
+    mulx   \T6, \T8, 8\M1
+    adox   \T2, \T8  
+    adcx   \T3, \T6        
+    mulx   \T6, \T8, 16\M1
+    adox   \T3, \T8
+    adcx   \T4, \T6     
+    mulx   \T6, \T8, 24\M1
+    adox   \T4, \T8     
+    adcx   \T5, \T6  
+    mulx   \T6, \T8, 32\M1 
+    adox   \T5, \T8 
+    adcx   \T6, \T7 
+    mulx   \T7, \T8, 40\M1
+    adcx   \T7, rax  
+    adox   \T6, \T8          
+    adox   \T7, rax
+.endm
+
+#else
+
+.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final 
+    mulx   \T2, \T3, 8\M1
+    add    \T1, \T3               
+    adc    \T2, 0  
+
+    mov    rdx, 8\M0   
+    xor    \T5, \T5
+    mulx   \T3, \T4, \M1 
+    add    \T1, \T4               
+    adc    \T2, \T3  
+    adc    \T5, 0  
+      
+    xor    \T6, \T6
+    mulx   \T3, \T4, 8\M1
+    add    \T2, \T4  
+    adc    \T3, \T5           
+    adc    \T6, 0 
+        
+    mov    rdx, \M0         
+    mulx   \T4, \T5, 16\M1 
+    add    \T2, \T5  
+    adc    \T3, \T4           
+    adc    \T6, 0  
+        
+    xor    \T7, \T7        
+    mulx   \T4, \T5, 24\M1 
+    add    \T3, \T5  
+    adc    \T4, \T6           
+    adc    \T7, 0  
+
+    mov    rdx, 8\M0 
+    mulx   \T5, \T6, 16\M1 
+    add    \T3, \T6               
+    adc    \T4, \T5  
+    adc    \T7, 0    
+        
+    xor    \T6, \T6        
+    mulx   \T5, \T8, 24\M1 
+    add    \T4, \T8  
+    adc    \T5, \T7           
+    adc    \T6, 0  
+        
+    mov    rdx, \M0        
+    mulx   \T7, \T8, 32\M1 
+    add    \T4, \T8  
+    adc    \T5, \T7           
+    adc    \T6, 0      
+        
+    xor    \T7, \T7        
+    mulx   \T8, rax, 40\M1 
+    add    \T5, rax  
+    adc    \T6, \T8          
+    adc    \T7, 0  
+        
+    mov    rdx, 8\M0        
+    mulx   \T8, rax, 32\M1 
+    add    \T5, rax  
+    adc    \T6, \T8         
+    adc    \T7, 0   
+        
+    mov    rdx, 8\M0        
+    mulx   \T8, rax, 40\M1 
+    add    \T6, rax  
+    adc    \T7, \T8  
+.endm
+#endif
+
+  
+//**************************************************************************************
+//  Montgomery reduction
+//  Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015  
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//************************************************************************************** 
+.global fmt(rdc610_asm)
+fmt(rdc610_asm):
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15  
+
+    // a[0-1] x p610p1_nz --> result: r8:r15 
+    MUL128x384_SCHOOL [reg_p1], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx     
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+32]  
+    adc    r9, [reg_p1+40]  
+    adc    r10, [reg_p1+48]   
+    adc    r11, [reg_p1+56]   
+    adc    r12, [reg_p1+64]   
+    adc    r13, [reg_p1+72]   
+    adc    r14, [reg_p1+80]  
+    adc    r15, [reg_p1+88]   
+    adc    rcx, [reg_p1+96] 
+    mov    [reg_p1+32], r8  
+    mov    [reg_p1+40], r9  
+    mov    [reg_p1+48], r10  
+    mov    [reg_p1+56], r11  
+    mov    [reg_p1+64], r12  
+    mov    [reg_p1+72], r13  
+    mov    [reg_p1+80], r14
+    mov    [reg_p1+88], r15  
+    mov    [reg_p1+96], rcx  
+    mov    r8, [reg_p1+104]  
+    mov    r9, [reg_p1+112]  
+    mov    r10, [reg_p1+120]
+    mov    r11, [reg_p1+128]
+    mov    r12, [reg_p1+136]
+    mov    r13, [reg_p1+144]
+    mov    r14, [reg_p1+152]
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    adc    r12, 0
+    adc    r13, 0
+    adc    r14, 0
+    mov    [reg_p1+104], r8  
+    mov    [reg_p1+112], r9  
+    mov    [reg_p1+120], r10  
+    mov    [reg_p1+128], r11  
+    mov    [reg_p1+136], r12 
+    mov    [reg_p1+144], r13 
+    mov    [reg_p1+152], r14
+
+    // a[2-3] x p610p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+16], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+48]  
+    adc    r9, [reg_p1+56]  
+    adc    r10, [reg_p1+64]   
+    adc    r11, [reg_p1+72]  
+    adc    r12, [reg_p1+80]   
+    adc    r13, [reg_p1+88]   
+    adc    r14, [reg_p1+96]  
+    adc    r15, [reg_p1+104]
+    adc    rcx, [reg_p1+112]
+    mov    [reg_p1+48], r8  
+    mov    [reg_p1+56], r9  
+    mov    [reg_p1+64], r10  
+    mov    [reg_p1+72], r11   
+    mov    [reg_p1+80], r12  
+    mov    [reg_p1+88], r13  
+    mov    [reg_p1+96], r14
+    mov    [reg_p1+104], r15
+    mov    [reg_p1+112], rcx
+    mov    r8, [reg_p1+120]
+    mov    r9, [reg_p1+128]
+    mov    r10, [reg_p1+136] 
+    mov    r11, [reg_p1+144] 
+    mov    r12, [reg_p1+152] 
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0  
+    adc    r11, 0 
+    adc    r12, 0  
+    mov    [reg_p1+120], r8  
+    mov    [reg_p1+128], r9  
+    mov    [reg_p1+136], r10 
+    mov    [reg_p1+144], r11 
+    mov    [reg_p1+152], r12 
+
+    // a[4-5] x p610p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+32], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+64]  
+    adc    r9, [reg_p1+72]  
+    adc    r10, [reg_p1+80]   
+    adc    r11, [reg_p1+88]  
+    adc    r12, [reg_p1+96]   
+    adc    r13, [reg_p1+104]   
+    adc    r14, [reg_p1+112]  
+    adc    r15, [reg_p1+120]
+    adc    rcx, [reg_p1+128]
+    mov    [reg_p1+64], r8  
+    mov    [reg_p1+72], r9  
+    mov    [reg_p1+80], r10  
+    mov    [reg_p1+88], r11   
+    mov    [reg_p1+96], r12  
+    mov    [reg_p1+104], r13  
+    mov    [reg_p1+112], r14
+    mov    [reg_p1+120], r15
+    mov    [reg_p1+128], rcx
+    mov    r8, [reg_p1+136]
+    mov    r9, [reg_p1+144]
+    mov    r10, [reg_p1+152] 
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0 
+    mov    [reg_p1+136], r8  
+    mov    [reg_p1+144], r9  
+    mov    [reg_p1+152], r10 
+
+    // a[6-7] x p610p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+48], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+80]  
+    adc    r9, [reg_p1+88]  
+    adc    r10, [reg_p1+96]   
+    adc    r11, [reg_p1+104]  
+    adc    r12, [reg_p1+112]   
+    adc    r13, [reg_p1+120]   
+    adc    r14, [reg_p1+128]  
+    adc    r15, [reg_p1+136]
+    adc    rcx, [reg_p1+144]
+    mov    [reg_p2], r8         // C0_final
+    mov    [reg_p2+8], r9       // C1_final
+    mov    [reg_p1+96], r10  
+    mov    [reg_p1+104], r11   
+    mov    [reg_p1+112], r12  
+    mov    [reg_p1+120], r13  
+    mov    [reg_p1+128], r14
+    mov    [reg_p1+136], r15
+    mov    [reg_p1+144], rcx
+    mov    r8, [reg_p1+152] 
+    adc    r8, 0
+    mov    [reg_p1+152], r8
+
+    // a[8-9] x p610p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+64], [rip+fmt(p610p1)+32], r8, r9, r10, r11, r12, r13, r14, r15, rcx
+    
+    // Final result C2:C9
+    add    r8, [reg_p1+96]  
+    adc    r9, [reg_p1+104]  
+    adc    r10, [reg_p1+112]   
+    adc    r11, [reg_p1+120]  
+    adc    r12, [reg_p1+128]   
+    adc    r13, [reg_p1+136]   
+    adc    r14, [reg_p1+144]   
+    adc    r15, [reg_p1+152] 
+    mov    [reg_p2+16], r8
+    mov    [reg_p2+24], r9  
+    mov    [reg_p2+32], r10   
+    mov    [reg_p2+40], r11  
+    mov    [reg_p2+48], r12  
+    mov    [reg_p2+56], r13 
+    mov    [reg_p2+64], r14 
+    mov    [reg_p2+72], r15
+
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+  #else
+  
+//***********************************************************************
+//  Montgomery reduction
+//  Based on comba method
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//*********************************************************************** 
+.global fmt(rdc610_asm)
+fmt(rdc610_asm):
+
+  ret
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+  #endif
+
+
+//***********************************************************************
+//  610-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_add610_asm)
+fmt(mp_add610_asm):  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rax, [reg_p1+32]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    rax, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rax
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48] 
+  mov    r10, [reg_p1+56]
+  mov    r11, [reg_p1+64] 
+  mov    rax, [reg_p1+72] 
+  adc    r8, [reg_p2+40] 
+  adc    r9, [reg_p2+48]
+  adc    r10, [reg_p2+56] 
+  adc    r11, [reg_p2+64]
+  adc    rax, [reg_p2+72]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  mov    [reg_p3+64], r11
+  mov    [reg_p3+72], rax
+  ret
+
+
+//***********************************************************************
+//  2x610-bit multiprecision subtraction/addition
+//  Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p610*2^640
+//*********************************************************************** 
+.global fmt(mp_subadd610x2_asm)
+fmt(mp_subadd610x2_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15
+  push   rbx
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rcx, [reg_p1+32]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    rcx, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rcx
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48]
+  mov    r10, [reg_p1+56] 
+  mov    r11, [reg_p1+64]
+  mov    rcx, [reg_p1+72] 
+  sbb    r8, [reg_p2+40] 
+  sbb    r9, [reg_p2+48] 
+  sbb    r10, [reg_p2+56]
+  sbb    r11, [reg_p2+64] 
+  sbb    rcx, [reg_p2+72]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  mov    [reg_p3+64], r11
+  mov    [reg_p3+72], rcx
+  
+  mov    r8, [reg_p1+80]
+  mov    r9, [reg_p1+88] 
+  mov    r10, [reg_p1+96]
+  mov    r11, [reg_p1+104]
+  mov    rcx, [reg_p1+112]
+  sbb    r8, [reg_p2+80]
+  sbb    r9, [reg_p2+88]
+  sbb    r10, [reg_p2+96] 
+  sbb    r11, [reg_p2+104] 
+  sbb    rcx, [reg_p2+112]
+  mov    [reg_p3+80], r8 
+  mov    [reg_p3+88], r9
+  mov    [reg_p3+96], r10
+  mov    [reg_p3+104], r11
+  mov    [reg_p3+112], rcx
+  
+  mov    r8, [reg_p1+120]
+  mov    r9, [reg_p1+128]
+  mov    r10, [reg_p1+136]
+  mov    r11, [reg_p1+144]
+  mov    rcx, [reg_p1+152]
+  sbb    r8, [reg_p2+120] 
+  sbb    r9, [reg_p2+128] 
+  sbb    r10, [reg_p2+136] 
+  sbb    r11, [reg_p2+144] 
+  sbb    rcx, [reg_p2+152]
+  sbb    rax, 0
+  
+  // Add p610 anded with the mask in rax 
+  mov    r12, [rip+fmt(p610)]
+  mov    r13, [rip+fmt(p610)+32]
+  mov    r14, [rip+fmt(p610)+40]
+  mov    r15, [rip+fmt(p610)+48]
+  mov    rdi, [rip+fmt(p610)+56]
+  mov    rsi, [rip+fmt(p610)+64]
+  mov    rbx, [rip+fmt(p610)+72]
+  and    r12, rax
+  and    r13, rax
+  and    r14, rax
+  and    r15, rax
+  and    rdi, rax
+  and    rsi, rax
+  and    rbx, rax
+  mov    rax, [reg_p3+80]
+  add    rax, r12
+  mov    [reg_p3+80], rax
+  mov    rax, [reg_p3+88]
+  adc    rax, r12
+  mov    [reg_p3+88], rax
+  mov    rax, [reg_p3+96]
+  adc    rax, r12
+  mov    [reg_p3+96], rax
+  adc    r12, [reg_p3+104]
+  adc    r13, [reg_p3+112]
+  mov    [reg_p3+104], r12
+  mov    [reg_p3+112], r13
+  adc    r8, r14
+  adc    r9, r15
+  adc    r10, rdi
+  adc    r11, rsi
+  adc    rcx, rbx
+  
+  mov    [reg_p3+120], r8
+  mov    [reg_p3+128], r9
+  mov    [reg_p3+136], r10
+  mov    [reg_p3+144], r11
+  mov    [reg_p3+152], rcx
+  pop    rbx
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Double 2x610-bit multiprecision subtraction
+//  Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_dblsub610x2_asm)
+fmt(mp_dblsub610x2_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p3]
+  mov    r9, [reg_p3+8]
+  mov    r10, [reg_p3+16]
+  mov    r11, [reg_p3+24]
+  mov    r12, [reg_p3+32]
+  mov    r13, [reg_p3+40]
+  mov    r14, [reg_p3+48]
+  mov    r15, [reg_p3+56]
+  sub    r8, [reg_p1]
+  sbb    r9, [reg_p1+8] 
+  sbb    r10, [reg_p1+16] 
+  sbb    r11, [reg_p1+24] 
+  sbb    r12, [reg_p1+32] 
+  sbb    r13, [reg_p1+40] 
+  sbb    r14, [reg_p1+48] 
+  sbb    r15, [reg_p1+56]
+  setc   al
+  sub    r8, [reg_p2]
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  setc   cl
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+    
+  mov    r8, [reg_p3+64]
+  mov    r9, [reg_p3+72]
+  mov    r10, [reg_p3+80]
+  mov    r11, [reg_p3+88]
+  mov    r12, [reg_p3+96]
+  mov    r13, [reg_p3+104]
+  mov    r14, [reg_p3+112]
+  mov    r15, [reg_p3+120]
+  bt     rax, 0 
+  sbb    r8, [reg_p1+64] 
+  sbb    r9, [reg_p1+72] 
+  sbb    r10, [reg_p1+80] 
+  sbb    r11, [reg_p1+88] 
+  sbb    r12, [reg_p1+96] 
+  sbb    r13, [reg_p1+104] 
+  sbb    r14, [reg_p1+112] 
+  sbb    r15, [reg_p1+120]
+  setc   al 
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+64] 
+  sbb    r9, [reg_p2+72] 
+  sbb    r10, [reg_p2+80] 
+  sbb    r11, [reg_p2+88] 
+  sbb    r12, [reg_p2+96] 
+  sbb    r13, [reg_p2+104] 
+  sbb    r14, [reg_p2+112] 
+  sbb    r15, [reg_p2+120]
+  setc   cl 
+  mov    [reg_p3+64], r8
+  mov    [reg_p3+72], r9
+  mov    [reg_p3+80], r10
+  mov    [reg_p3+88], r11
+  mov    [reg_p3+96], r12
+  mov    [reg_p3+104], r13
+  mov    [reg_p3+112], r14
+  mov    [reg_p3+120], r15
+  
+  mov    r8, [reg_p3+128]
+  mov    r9, [reg_p3+136]
+  mov    r10, [reg_p3+144]
+  mov    r11, [reg_p3+152]
+  bt     rax, 0 
+  sbb    r8, [reg_p1+128] 
+  sbb    r9, [reg_p1+136] 
+  sbb    r10, [reg_p1+144] 
+  sbb    r11, [reg_p1+152]
+  bt     rcx, 0 
+  sbb    r8, [reg_p2+128] 
+  sbb    r9, [reg_p2+136] 
+  sbb    r10, [reg_p2+144] 
+  sbb    r11, [reg_p2+152]
+  mov    [reg_p3+128], r8
+  mov    [reg_p3+136], r9
+  mov    [reg_p3+144], r10
+  mov    [reg_p3+152], r11
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
\ No newline at end of file
diff --git a/SIKE_sw/src/P610/P610.c b/SIKE_sw/src/P610/P610.c
new file mode 100644
index 0000000..cec3d9b
--- /dev/null
+++ b/SIKE_sw/src/P610/P610.c
@@ -0,0 +1,140 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P610
+*********************************************************************************************/  
+
+#include "P610_api.h" 
+#include "P610_internal.h"
+#include "../internal.h"
+
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). 
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. 
+// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32.
+// For example, a 610-bit field element is represented with Ceil(610 / 64) = 10 64-bit digits or Ceil(610 / 32) = 20 32-bit digits.
+
+//
+// Curve isogeny system "SIDHp610". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p610^2), where A=6, B=1, C=1 and p610 = 2^305*3^192-1
+//
+         
+const uint64_t p610[NWORDS64_FIELD]              = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF, 
+                                                     0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768 };
+const uint64_t p610x2[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF,
+                                                     0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1 };
+const uint64_t p610x4[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xB807FFFFFFFFFFFF, 
+                                                     0xC5E137A2A96AC0B9, 0x6B9EFD14123FE6AE, 0xC956CBE8431094AA, 0x06404309479F6232, 0x00000009EFDA9DA2 };
+const uint64_t p610p1[NWORDS64_FIELD]            = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000,
+                                                     0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768 };   
+const uint64_t p610x16p[2*NWORDS64_FIELD]        = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x3FC0000000000000, 
+                                                     0xD0F642EAB4A9FA32, 0xA308175F6E00CA89, 0xB549A0BDE77B5AAC, 0xCDFDE7B5C304EE69, 0x7FDB7FF0812B12EF, 
+                                                     0xE09BA529B9FE1167, 0xD249C196DAB8CD7F, 0xD4E22754A3F20928, 0x97825638B19A7CCE, 0x05E04550FC4CCE0D, 
+                                                     0x8FB5DA1152CDE50C, 0xF9649BA3EA408644, 0x4473C93E6441063D, 0xBE190269D1337B7B, 0x0000000000000062 }; 
+// Order of Alice's subgroup
+const uint64_t Alice_order[NWORDS64_ORDER]       = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0002000000000000 }; 
+// Order of Bob's subgroup
+const uint64_t Bob_order[NWORDS64_ORDER]         = { 0x26F4552D58173701, 0xDFA28247FCD5D8BC, 0xD97D086212954D73, 0x086128F3EC46592A, 0x00013DFB53B440C8 };
+// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p610^2), expressed in Montgomery representation
+const uint64_t A_gen[6 * NWORDS64_FIELD]         = { 0x5019EC96A75AC57A, 0x8AEA0E717712C6F1, 0x03C067C819D29E5E, 0x59F454425FE307D9, 0x6D29215D9AD5E6D4,
+                                                     0xD8C5A27CDC9DD34A, 0x972DC274DAB435B3, 0x82A597C70A80E10F, 0x48175986EFED547F, 0x00000000671A3592,   // XPA0
+                                                     0xE4BA9CC3EEEC53F4, 0xBD34E4FEDB0132D3, 0x1B7125C87BEE960C, 0x25D615BF3CFAA355, 0xFC8EC20DC367D66A,
+                                                     0xB44F3FD1CC73289C, 0xD84BF51195C2E012, 0x38D7C756EB370F48, 0xBBC236249F94F72A, 0x000000013020CC63,   // XPA1
+                                                     0x1D7C945D3DBCC38C, 0x9A5F7C12CA8BA5B9, 0x1E8F87985B01CBE3, 0xD2CABF82F5BC5235, 0x3BDE474ECCA9FAA2,
+                                                     0xB98CD975DF9FB0A8, 0x444E4464B9C67790, 0xCB2E888565CE6AD9, 0xDB64FFE2A1C350E2, 0x00000001D7532756,   // XQA0
+                                                     0x1E8B3AA2382C9079, 0x28CB31E08A943C00, 0xE04D02266E8A63E1, 0x84A2D260214EF65F, 0xD5933DA25018E226,
+                                                     0xBC8BF038928C4BA9, 0x91E9D0CB7EAF58A9, 0x04A4627B75E008E1, 0x58CEF27583E50C2E, 0x00000002170DDF44,   // XQA1
+                                                     0x261DD0782CEC958D, 0xC25B3AE64BBC0311, 0x9F21B8A8981B15FE, 0xA3C0B52CD5FFC45B, 0x5D2E65A016702C6A,
+                                                     0x8C5586CA98722EDE, 0x61490A967A6B4B1A, 0xFA64E30231F719AF, 0x9CEAB8B6301BB2DF, 0x00000000CF5AEA7D,   // XRA0
+                                                     0xB980435A77B912C0, 0x2B4A97F70E0FC873, 0x415C7FA4DE96F43C, 0xE5EED95643E443FD, 0xCBE18DB57C51B354,
+                                                     0x51C96C3FFABD2D46, 0x5C14637B9A5765D6, 0x45D2369C4D0199A5, 0x25A1F9C5BBF1E683, 0x000000025AD7A11B }; // XRA1
+// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p610^2), expressed in Montgomery representation
+const uint64_t B_gen[6 * NWORDS64_FIELD]         = { 0xC6C8E180E41884BA, 0x2161D2F4FBC32B95, 0xCBF83091BDB34092, 0xD742CC0AD4CC7E38, 0x61A1FA7E1B14FBD7,
+                                                     0xF0E5FC70137597C4, 0x1F0C8F2585E20B1F, 0xC68E44A1C032A4C2, 0xE3C65FB8AF155A0D, 0x00000001409EE8D5,   // XPB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XPB1
+                                                     0xF586DB4A16BE1880, 0x712F10D95E6C65A9, 0x9D5AAC3B83584B87, 0x4ECDAA98182C8261, 0xAD7D4C15588FD230,
+                                                     0x4197C54E96B7D926, 0xED15BB13E8C588ED, 0x3E299AEAD5AAD7C7, 0xF36B25F1BD579F79, 0x000000021CE65B5B,   // XQB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XQB1
+                                                     0x7A87897A0C4C3FD7, 0x3C1879ECD4D33D76, 0x595C28A36FFBA1A0, 0xF53FF66A2A7FD0FB, 0xB39F5A91230E56FA,
+                                                     0x81F21610DA3EA8B5, 0xEBB3B9A627428A90, 0x8661123B35748010, 0xE196173B9C48781D, 0x00000002198166AC,   // XRB0
+                                                     0x5E3CC79B37006D6A, 0xE0358A9AB2EA7923, 0x3B725CB595180951, 0x0724637F1DD0C191, 0x7BB031B67DAB9D19,
+                                                     0x53CCB8BECEDD3435, 0xEE5DF7FFEBFA7A0A, 0x899EDB7D8B9694C4, 0x0CA38EB4AE5506B6, 0x00000001489DE1CD }; // XRB1
+// Montgomery constant Montgomery_R2 = (2^640)^2 mod p610
+const uint64_t Montgomery_R2[NWORDS64_FIELD]     = { 0xE75F5D201A197727, 0xE0B85963B627392E, 0x6BC1707818DE493D, 0xDC7F419940D1A0C5, 0x7358030979EDE54A,
+                                                     0x84F4BEBDEED75A5C, 0x7ECCA66E13427B47, 0xC5BB4E65280080B3, 0x7019950F516DA19A, 0x000000008E290FF3 };                                                    
+// Value one in Montgomery representation 
+const uint64_t Montgomery_one[NWORDS64_FIELD]    = { 0x00000000670CC8E6, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x9A34000000000000,
+                                                     0x4D99C2BD28717A3F, 0x0A4A1839A323D41C, 0xD2B62215D06AD1E2, 0x1369026E862CAF3D, 0x000000010894E964 };
+
+
+// Fixed parameters for isogeny tree computation
+const unsigned int strat_Alice[MAX_Alice-1] = { 
+67, 37, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 
+2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 16, 9, 
+5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 
+1, 4, 2, 1, 1, 2, 1, 1, 33, 16, 8, 5, 2, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 2, 1, 
+1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, 
+4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 };
+
+const unsigned int strat_Bob[MAX_Bob-1] = { 
+86, 48, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 
+1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 
+1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 
+1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 38, 
+21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 
+9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9, 5, 3, 2, 1, 1, 
+1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 
+1, 1 };
+
+// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions
+#define fpcopy                        fpcopy610
+#define fpzero                        fpzero610
+#define fpadd                         fpadd610
+#define fpsub                         fpsub610
+#define fpneg                         fpneg610
+#define fpdiv2                        fpdiv2_610
+#define fpcorrection                  fpcorrection610
+#define fpmul_mont                    fpmul610_mont
+#define fpsqr_mont                    fpsqr610_mont
+#define fpinv_mont                    fpinv610_mont
+#define fpinv_chain_mont              fpinv610_chain_mont
+#define fp2copy                       fp2copy610
+#define fp2zero                       fp2zero610
+#define fp2add                        fp2add610
+#define fp2sub                        fp2sub610
+#define mp_sub_p2                     mp_sub610_p2
+#define mp_sub_p4                     mp_sub610_p4
+#define sub_p4                        mp_sub_p4
+#define fp2neg                        fp2neg610
+#define fp2div2                       fp2div2_610
+#define fp2correction                 fp2correction610
+#define fp2mul_mont                   fp2mul610_mont
+#define fp2sqr_mont                   fp2sqr610_mont
+#define fp2inv_mont                   fp2inv610_mont
+#define fp2inv_mont_bingcd            fp2inv610_mont_bingcd
+#define fpequal_non_constant_time     fpequal610_non_constant_time
+#define mp_add_asm                    mp_add610_asm
+#define mp_subaddx2_asm               mp_subadd610x2_asm
+#define mp_dblsubx2_asm               mp_dblsub610x2_asm
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp610
+#define crypto_kem_enc                crypto_kem_enc_SIKEp610
+#define crypto_kem_dec                crypto_kem_dec_SIKEp610
+#define random_mod_order_A            random_mod_order_A_SIDHp610
+#define random_mod_order_B            random_mod_order_B_SIDHp610
+#define EphemeralKeyGeneration_A      EphemeralKeyGeneration_A_SIDHp610
+#define EphemeralKeyGeneration_B      EphemeralKeyGeneration_B_SIDHp610
+#define EphemeralSecretAgreement_A    EphemeralSecretAgreement_A_SIDHp610
+#define EphemeralSecretAgreement_B    EphemeralSecretAgreement_B_SIDHp610
+
+#include "../fpx.c"
+#include "../ec_isogeny.c"
+#include "../sidh.c"
+#include "../sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/src/P610/P610_api.h b/SIKE_sw/src/P610/P610_api.h
new file mode 100644
index 0000000..40bec58
--- /dev/null
+++ b/SIKE_sw/src/P610/P610_api.h
@@ -0,0 +1,112 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: API header file for P610
+*********************************************************************************************/  
+
+#ifndef P610_API_H
+#define P610_API_H
+
+
+/*********************** Key encapsulation mechanism API ***********************/
+
+#define CRYPTO_SECRETKEYBYTES     524    // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes
+#define CRYPTO_PUBLICKEYBYTES     462
+#define CRYPTO_BYTES               24
+#define CRYPTO_CIPHERTEXTBYTES    486    // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes
+
+// Algorithm name
+#define CRYPTO_ALGNAME "SIKEp610"  
+
+// SIKE's key generation
+// It produces a private key sk and computes the public key pk.
+// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 524 bytes)
+//          public key pk (CRYPTO_PUBLICKEYBYTES = 462 bytes) 
+int crypto_kem_keypair_SIKEp610(unsigned char *pk, unsigned char *sk);
+
+// SIKE's encapsulation
+// Input:   public key pk         (CRYPTO_PUBLICKEYBYTES = 462 bytes)
+// Outputs: shared secret ss      (CRYPTO_BYTES = 24 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 486 bytes) 
+int crypto_kem_enc_SIKEp610(unsigned char *ct, unsigned char *ss, const unsigned char *pk);
+
+// SIKE's decapsulation
+// Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = 524 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 486 bytes) 
+// Outputs: shared secret ss      (CRYPTO_BYTES = 24 bytes)
+int crypto_kem_dec_SIKEp610(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);
+
+
+// Encoding of keys for KEM-based isogeny system "SIKEp610" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p610) are encoded in 77 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p610^2), where a and b are defined over GF(p610), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^Floor(Log(2,3^192))-1] and the public key pk. In the SIKE API, 
+// private keys are encoded in 524 octets in little endian format. 
+// Public keys pk consist of 3 elements in GF(p610^2). In the SIKE API, pk is encoded in 462 octets. 
+// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 462 + 24 = 486 octets.  
+// Shared keys ss consist of a value of 24 octets.
+
+
+/*********************** Ephemeral key exchange API ***********************/
+
+#define SIDH_SECRETKEYBYTES_A    39
+#define SIDH_SECRETKEYBYTES_B    38
+#define SIDH_PUBLICKEYBYTES     462
+#define SIDH_BYTES              154 
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016.
+// Extended version available at: http://eprint.iacr.org/2016/859     
+
+// Generation of Alice's secret key 
+// Outputs random value in [0, 2^305 - 1] to be used as Alice's private key
+void random_mod_order_A_SIDHp610(unsigned char* random_digits);
+
+// Generation of Bob's secret key 
+// Outputs random value in [0, 2^Floor(Log(2,3^192)) - 1] to be used as Bob's private key
+void random_mod_order_B_SIDHp610(unsigned char* random_digits);
+
+// Alice's ephemeral public key generation
+// Input:  a private key PrivateKeyA in the range [0, 2^305 - 1], stored in 38 bytes. 
+// Output: the public key PublicKeyA consisting of 3 GF(p610^2) elements encoded in 462 bytes.
+int EphemeralKeyGeneration_A_SIDHp610(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^192)) - 1], stored in 38 bytes.  
+// The public key consists of 3 GF(p610^2) elements encoded in 462 bytes.
+int EphemeralKeyGeneration_B_SIDHp610(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^305 - 1], stored in 38 bytes. 
+//         Bob's PublicKeyB consists of 3 GF(p610^2) elements encoded in 462 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p610^2) encoded in 154 bytes.
+int EphemeralSecretAgreement_A_SIDHp610(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^192)) - 1], stored in 38 bytes.  
+//         Alice's PublicKeyA consists of 3 GF(p610^2) elements encoded in 462 bytes.
+// Output: a shared secret SharedSecretB that consists of one element in GF(p610^2) encoded in 154 bytes. 
+int EphemeralSecretAgreement_B_SIDHp610(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB);
+
+
+// Encoding of keys for KEX-based isogeny system "SIDHp610" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p610) are encoded in 77 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p610^2), where a and b are defined over GF(p610), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^305-1] and [0, 2^Floor(Log(2,3^192)) - 1], resp. In the SIDH API, 
+// Alice's and Bob's private keys are encoded in 39 and 38 octets, resp., in little endian format.
+// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p610^2). In the SIDH API, they are encoded in 462 octets. 
+// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p610^2). In the SIDH API, they are encoded in 154 octets.
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/P610/P610_internal.h b/SIKE_sw/src/P610/P610_internal.h
new file mode 100644
index 0000000..bf99702
--- /dev/null
+++ b/SIKE_sw/src/P610/P610_internal.h
@@ -0,0 +1,174 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for P610
+*********************************************************************************************/  
+
+#ifndef P610_INTERNAL_H
+#define P610_INTERNAL_H
+
+#include "../config.h"
+ 
+
+#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64)
+    #define NWORDS_FIELD    10              // Number of words of a 610-bit field element
+    #define p610_ZERO_WORDS 4               // Number of "0" digits in the least significant part of p610 + 1     
+#elif (TARGET == TARGET_x86)
+    #define NWORDS_FIELD    20 
+    #define p610_ZERO_WORDS 9
+#endif
+    
+
+// Basic constants
+
+#define NBITS_FIELD             610  
+#define MAXBITS_FIELD           640                
+#define MAXWORDS_FIELD          ((MAXBITS_FIELD+RADIX-1)/RADIX)     // Max. number of words to represent field elements
+#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 610-bit field element 
+#define NBITS_ORDER             320
+#define NWORDS_ORDER            ((NBITS_ORDER+RADIX-1)/RADIX)       // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)               // Number of 64-bit words of a 320-bit element 
+#define MAXBITS_ORDER           NBITS_ORDER
+#define ALICE                   0
+#define BOB                     1 
+#define OALICE_BITS             305  
+#define OBOB_BITS               305    
+#define OBOB_EXPON              192 
+#define MASK_ALICE              0x01
+#define MASK_BOB                0xFF  
+#define PRIME                   p610  
+#define PARAM_A                 6  
+#define PARAM_C                 1
+// Fixed parameters for isogeny tree computation
+#define MAX_INT_POINTS_ALICE    8      
+#define MAX_INT_POINTS_BOB      10 
+#define MAX_Alice               152
+#define MAX_Bob                 192
+#define MSG_BYTES               24
+#define SECRETKEY_A_BYTES       ((OALICE_BITS + 7) / 8)
+#define SECRETKEY_B_BYTES       ((OBOB_BITS - 1 + 7) / 8)
+#define FP2_ENCODED_BYTES       2*((NBITS_FIELD + 7) / 8)
+
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t felm_t[NWORDS_FIELD];                                 // Datatype for representing 610-bit field elements (640-bit max.)
+typedef digit_t dfelm_t[2*NWORDS_FIELD];                              // Datatype for representing double-precision 2x610-bit field elements (2x640-bit max.) 
+typedef felm_t  f2elm_t[2];                                           // Datatype for representing quadratic extension field elements GF(p610^2)
+        
+typedef struct { f2elm_t X; f2elm_t Z; } point_proj;                  // Point representation in projective XZ Montgomery coordinates.
+typedef point_proj point_proj_t[1]; 
+
+#ifdef COMPRESS
+    typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj;  // Point representation in full projective XYZ Montgomery coordinates 
+    typedef point_full_proj point_full_proj_t[1]; 
+
+    typedef struct { f2elm_t x; f2elm_t y; } point_affine;                // Point representation in affine coordinates.
+    typedef point_affine point_t[1]; 
+
+    typedef f2elm_t publickey_t[3];      
+#endif
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// 610-bit multiprecision addition, c = a+b
+void mp_add610(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_add610_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// 610-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p
+extern void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c);
+extern void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_sub610_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+void mp_sub610_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+
+// 2x610-bit multiprecision subtraction followed by addition with p610*2^640, c = a-b+(p610*2^640) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_subadd610x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Double 2x610-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
+void mp_dblsub610x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void fpcopy610(const digit_t* a, digit_t* c);
+
+// Zeroing a field element, a = 0
+void fpzero610(digit_t* a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool fpequal610_non_constant_time(const digit_t* a, const digit_t* b); 
+
+// Modular addition, c = a+b mod p610
+extern void fpadd610(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpadd610_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular subtraction, c = a-b mod p610
+extern void fpsub610(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpsub610_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular negation, a = -a mod p610        
+extern void fpneg610(digit_t* a);  
+
+// Modular division by two, c = a/2 mod p610.
+void fpdiv2_610(const digit_t* a, digit_t* c);
+
+// Modular correction to reduce field element a in [0, 2*p610-1] to [0, p610-1].
+void fpcorrection610(digit_t* a);
+
+// 610-bit Montgomery reduction, c = a mod p
+void rdc610_asm(digit_t* ma, digit_t* mc);
+            
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
+void fpmul610_mont(const digit_t* a, const digit_t* b, digit_t* c);
+void mul610_asm(const digit_t* a, const digit_t* b, digit_t* c);
+   
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
+void fpsqr610_mont(const digit_t* ma, digit_t* mc);
+
+// Field inversion, a = a^-1 in GF(p610)
+void fpinv610_mont(digit_t* a);
+
+// Chain to compute (p610-3)/4 using Montgomery arithmetic
+void fpinv610_chain_mont(digit_t* a);
+
+/************ GF(p^2) arithmetic functions *************/
+    
+// Copy of a GF(p610^2) element, c = a
+void fp2copy610(const f2elm_t a, f2elm_t c);
+
+// Zeroing a GF(p610^2) element, a = 0
+void fp2zero610(f2elm_t a);
+
+// GF(p610^2) negation, a = -a in GF(p610^2)
+void fp2neg610(f2elm_t a);
+
+// GF(p610^2) addition, c = a+b in GF(p610^2)
+extern void fp2add610(const f2elm_t a, const f2elm_t b, f2elm_t c);           
+
+// GF(p610^2) subtraction, c = a-b in GF(p610^2)
+extern void fp2sub610(const f2elm_t a, const f2elm_t b, f2elm_t c); 
+
+// GF(p610^2) division by two, c = a/2  in GF(p610^2) 
+void fp2div2_610(const f2elm_t a, f2elm_t c);
+
+// Modular correction, a = a in GF(p610^2)
+void fp2correction610(f2elm_t a);
+            
+// GF(p610^2) squaring using Montgomery arithmetic, c = a^2 in GF(p610^2)
+void fp2sqr610_mont(const f2elm_t a, f2elm_t c);
+ 
+// GF(p610^2) multiplication using Montgomery arithmetic, c = a*b in GF(p610^2)
+void fp2mul610_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+
+// GF(p610^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void fp2inv610_mont(f2elm_t a);
+
+
+#endif
diff --git a/SIKE_sw/src/P610/generic/fp_generic.c b/SIKE_sw/src/P610/generic/fp_generic.c
new file mode 100644
index 0000000..f6bb529
--- /dev/null
+++ b/SIKE_sw/src/P610/generic/fp_generic.c
@@ -0,0 +1,259 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: portable modular arithmetic for P610
+*********************************************************************************************/
+
+#include "../P610_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p610[NWORDS64_FIELD];
+extern const uint64_t p610p1[NWORDS64_FIELD]; 
+extern const uint64_t p610x2[NWORDS64_FIELD];    
+extern const uint64_t p610x4[NWORDS64_FIELD];
+
+
+__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p. 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p610x2)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p610x4)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void fpadd610(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p610.
+  // Inputs: a, b in [0, 2*p610-1] 
+  // Output: c in [0, 2*p610-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p610x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p610x2)[i] & mask, carry, c[i]); 
+    }
+} 
+
+
+__inline void fpsub610(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p610.
+  // Inputs: a, b in [0, 2*p610-1] 
+  // Output: c in [0, 2*p610-1] 
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p610x2)[i] & mask, borrow, c[i]); 
+    }
+}
+
+
+__inline void fpneg610(digit_t* a)
+{ // Modular negation, a = -a mod p610.
+  // Input/output: a in [0, 2*p610-1] 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p610x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_610(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p610.
+  // Input : a in [0, 2*p610-1] 
+  // Output: c in [0, 2*p610-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p610
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p610)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+} 
+
+
+void fpcorrection610(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p610-1] to [0, p610-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p610)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p610)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result    
+    register digit_t al, ah, bl, bh, temp;
+    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
+
+    al = a & mask_low;                        // Low part
+    ah = a >> (sizeof(digit_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(digit_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                   // C00
+
+    res1 = albl >> (sizeof(digit_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;  
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(digit_t) * 4);
+    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   
+
+    res1 = ahbl >> (sizeof(digit_t) * 4);
+    res2 = albh >> (sizeof(digit_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                   // C10 
+    carry = temp & mask_high; 
+    c[1] ^= (ahbh & mask_high) + carry;       // C11
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
+    unsigned int i, j;
+    digit_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+    
+    for (i = 0; i < nwords; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+
+    for (i = nwords; i < 2*nwords-1; i++) {
+        for (j = i-nwords+1; j < nwords; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+    c[2*nwords-1] = v; 
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610.
+  // mc = ma*R^-1 mod p610x2, where R = 2^768.
+  // If ma < 2^768*p610, the output mc is in the range [0, 2*p610-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = p610_ZERO_WORDS;
+    digit_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-p610_ZERO_WORDS+1)) { 
+                MUL(mc[j], ((digit_t*)p610p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry; 
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }    
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) { 
+                MUL(mc[j], ((digit_t*)p610p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); 
+    mc[NWORDS_FIELD-1] = v;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P697/AMD64/fp_x64.c b/SIKE_sw/src/P697/AMD64/fp_x64.c
new file mode 100644
index 0000000..a0545a3
--- /dev/null
+++ b/SIKE_sw/src/P697/AMD64/fp_x64.c
@@ -0,0 +1,802 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: modular arithmetic optimized for x64 platforms for P697
+*********************************************************************************************/
+
+#include "../P697_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p697[NWORDS_FIELD];
+extern const uint64_t p697p1[NWORDS_FIELD];
+extern const uint64_t p697x2[NWORDS_FIELD];
+extern const uint64_t p697x4[NWORDS_FIELD];
+
+
+__inline void mp_sub697_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p697x2)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub697_p2_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void mp_sub697_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p697x4)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub697_p4_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpadd697(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p697.
+  // Inputs: a, b in [0, 2*p697-1] 
+  // Output: c in [0, 2*p697-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p697x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p697x2)[i] & mask, carry, c[i]); 
+    } 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpadd697_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpsub697(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p697.
+  // Inputs: a, b in [0, 2*p697-1] 
+  // Output: c in [0, 2*p697-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p697x2)[i] & mask, borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpsub697_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpneg697(digit_t* a)
+{ // Modular negation, a = -a mod p697.
+  // Input/output: a in [0, 2*p697-1] 
+    unsigned int i, borrow = 0;
+    
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p697x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_697(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p697.
+  // Input : a in [0, 2*p697-1] 
+  // Output: c in [0, 2*p697-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p697
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p697)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+}  
+
+
+void fpcorrection697(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p697-1] to [0, p697-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p697)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p697)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+        
+    (void)nwords;
+
+#if (OS_TARGET == OS_WIN)
+    digit_t t = 0;
+    uint128_t uv = {0};
+    unsigned int carry = 0;
+        
+    MULADD128(a[0], b[0], uv, carry, uv);
+    t += carry;
+    c[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[0], uv, carry, uv);
+    t += carry;
+    c[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[0], uv, carry, uv);
+    t += carry;
+    c[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[0], uv, carry, uv);
+    t += carry;
+    c[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[0], uv, carry, uv);
+    t += carry;
+    c[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[0], uv, carry, uv);
+    t += carry;
+    c[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[0], uv, carry, uv);
+    t += carry;
+    c[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[0], uv, carry, uv);
+    t += carry;
+    c[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[0], uv, carry, uv);
+    t += carry;
+    c[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[0], uv, carry, uv);
+    t += carry;
+    c[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[0], uv, carry, uv);
+    t += carry;
+    c[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[10], uv, carry, uv);
+    t += carry;
+    c[11] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;    
+    
+    MULADD128(a[10], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[10], uv, carry, uv);
+    t += carry;
+    c[12] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[10], uv, carry, uv);
+    t += carry;
+    c[13] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[10], uv, carry, uv);
+    t += carry;
+    c[14] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[10], uv, carry, uv);
+    t += carry;
+    c[15] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[10], uv, carry, uv);
+    t += carry;
+    c[16] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[10], uv, carry, uv);
+    t += carry;
+    c[17] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[10], uv, carry, uv);
+    t += carry;
+    c[18] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[10], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[10], uv, carry, uv);
+    t += carry;
+    c[19] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    
+    MULADD128(a[10], b[10], uv, carry, uv);
+    c[20] = uv[0];
+    c[21] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+    
+    mul697_asm(a, b, c);
+
+#endif
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Montgomery reduction exploiting special form of the prime.
+  // mc = ma*R^-1 mod p697x2, where R = 2^704.
+  // If ma < 2^704*p697, the output mc is in the range [0, 2*p697-1].
+  // ma is assumed to be in Montgomery representation.
+        
+#if (OS_TARGET == OS_WIN)
+    unsigned int carry;
+    digit_t t = 0;
+    uint128_t uv = {0};
+    
+    mc[0] = ma[0];
+    mc[1] = ma[1];
+    mc[2] = ma[2];
+    mc[3] = ma[3];
+    mc[4] = ma[4];
+    MUL128(mc[0], ((digit_t*)p697p1)[5], uv);
+    ADDC(0, uv[0], ma[5], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = 0;
+
+    MULADD128(mc[0], ((digit_t*)p697p1)[6], uv, carry, uv);
+    MULADD128(mc[1], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[6], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[7], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[8], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[9], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[10], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[1], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[11], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[2], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[12], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[3], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[13], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[4], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[14], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[5], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p697p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[15], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[6], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p697p1)[6], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[16], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[7], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p697p1)[7], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[17], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[8], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p697p1)[8], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[18], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[9], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p697p1)[9], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[19], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[10], ((digit_t*)p697p1)[10], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[20], carry, mc[9]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]);
+    ADDC(0, uv[1], ma[21], carry, mc[10]); 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    rdc697_asm(ma, mc);    
+
+#endif
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P697/AMD64/fp_x64_asm.S b/SIKE_sw/src/P697/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000..e00fab4
--- /dev/null
+++ b/SIKE_sw/src/P697/AMD64/fp_x64_asm.S
@@ -0,0 +1,1681 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny cryptography library 
+// Copyright (c) Microsoft Corporation
+//
+// Website: https://github.com/microsoft/PQCrypto-SIDH
+// Released under MIT license 
+//
+// Abstract: field arithmetic in x64 assembly for P697 on Linux 
+//*******************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Format function and variable names for Mac OS X
+#if defined(__APPLE__)
+    #define fmt(f)    _##f
+#else
+    #define fmt(f)    f
+#endif
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpadd697_asm)
+fmt(fpadd697_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  mov    rax, [reg_p1+72]
+  mov    rdi, [reg_p1+80]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+  adc    rcx, [reg_p2+64]
+  adc    rax, [reg_p2+72]
+  adc    rdi, [reg_p2+80] 
+  mov    [reg_p3+72], rax
+
+  mov    rax, [rip+fmt(p697x2)]
+  sub    r8, rax
+  mov    rax, [rip+fmt(p697x2)+8]
+  sbb    r9, rax
+  sbb    r10, rax
+  sbb    r11, rax
+  sbb    r12, rax
+  mov    rax, [rip+fmt(p697x2)+40]
+  sbb    r13, rax
+  mov    rax, [rip+fmt(p697x2)+48]
+  sbb    r14, rax
+  mov    rax, [rip+fmt(p697x2)+56]
+  sbb    r15, rax
+  mov    rax, [rip+fmt(p697x2)+64]
+  sbb    rcx, rax
+  mov    rsi, [reg_p3+72]
+  sbb    rsi, [rip+fmt(p697x2)+72]
+  mov    rax, [rip+fmt(p697x2)+80]
+  sbb    rdi, rax
+  mov    [reg_p3+64], rcx
+  mov    [reg_p3+72], rsi
+  mov    rax, 0
+  sbb    rax, 0
+  
+  mov    rcx, [rip+fmt(p697x2)]
+  and    rcx, rax
+  mov    rsi, [rip+fmt(p697x2)+8]
+  and    rsi, rax
+  
+  add    r8, rcx  
+  adc    r9, rsi 
+  adc    r10, rsi 
+  adc    r11, rsi
+  adc    r12, rsi
+  mov    [reg_p3], r8 
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  mov    [reg_p3+32], r12  
+  setc   cl
+  
+  mov    rsi, [rip+fmt(p697x2)+40]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p697x2)+48]
+  and    r8, rax
+  mov    r9, [rip+fmt(p697x2)+56]
+  and    r9, rax
+  mov    r10, [rip+fmt(p697x2)+64]
+  and    r10, rax
+  mov    r11, [rip+fmt(p697x2)+72]
+  and    r11, rax
+  mov    r12, [rip+fmt(p697x2)+80]
+  and    r12, rax
+  
+  bt     rcx, 0
+  adc    r13, rsi  
+  adc    r14, r8
+  adc    r15, r9
+  mov    rax, [reg_p3+64]
+  mov    rcx, [reg_p3+72]
+  adc    r10, rax 
+  adc    r11, rcx
+  adc    r12, rdi
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15  
+  mov    [reg_p3+64], r10
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], r12
+
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpsub697_asm)
+fmt(fpsub697_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64] 
+  mov    rax, [reg_p1+72]
+  mov    rdi, [reg_p1+80]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  sbb    rcx, [reg_p2+64]
+  sbb    rax, [reg_p2+72]
+  sbb    rdi, [reg_p2+80]
+  mov    [reg_p3+64], rcx
+  mov    [reg_p3+72], rax
+  mov    rax, 0
+  sbb    rax, 0
+    
+  mov    rcx, [rip+fmt(p697x2)]
+  and    rcx, rax
+  mov    rsi, [rip+fmt(p697x2)+8]
+  and    rsi, rax
+  
+  add    r8, rcx  
+  adc    r9, rsi 
+  adc    r10, rsi 
+  adc    r11, rsi
+  adc    r12, rsi
+  mov    [reg_p3], r8 
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11 
+  mov    [reg_p3+32], r12  
+  setc   cl
+  
+  mov    rsi, [rip+fmt(p697x2)+40]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p697x2)+48]
+  and    r8, rax
+  mov    r9, [rip+fmt(p697x2)+56]
+  and    r9, rax
+  mov    r10, [rip+fmt(p697x2)+64]
+  and    r10, rax
+  mov    r11, [rip+fmt(p697x2)+72]
+  and    r11, rax
+  mov    r12, [rip+fmt(p697x2)+80]
+  and    r12, rax
+  
+  bt     rcx, 0
+  adc    r13, rsi  
+  adc    r14, r8
+  adc    r15, r9
+  mov    rax, [reg_p3+64]
+  mov    rcx, [reg_p3+72]
+  adc    r10, rax  
+  adc    r11, rcx 
+  adc    r12, rdi
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15 
+  mov    [reg_p3+64], r10
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], r12
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret 
+
+
+///////////////////////////////////////////////////////////////// MACRO
+.macro SUB697_PX  P0
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56]
+  mov    rax, [reg_p1+64]
+  mov    rcx, [reg_p1+72]
+  mov    rdi, [reg_p1+80]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40]
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56] 
+  sbb    rax, [reg_p2+64] 
+  sbb    rcx, [reg_p2+72] 
+  sbb    rdi, [reg_p2+80] 
+
+  mov    rsi, [rip+\P0]
+  add    r8, rsi  
+  mov    rsi, [rip+\P0+8]
+  adc    r9, rsi  
+  adc    r10, rsi 
+  adc    r11, rsi 
+  adc    r12, rsi  
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    r8, [rip+\P0+40]
+  mov    r9, [rip+\P0+48]
+  mov    r10, [rip+\P0+56] 
+  adc    r13, r8   
+  adc    r14, r9 
+  adc    r15, r10  
+  mov    r8, [rip+\P0+64]
+  mov    r9, [rip+\P0+72]
+  mov    r10, [rip+\P0+80]
+  adc    r8, rax 
+  adc    r9, rcx  
+  adc    r10, rdi 
+  mov    [reg_p3+40], r13 
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15 
+  mov    [reg_p3+64], r8
+  mov    [reg_p3+72], r9
+  mov    [reg_p3+80], r10
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  .endm
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 2*p697
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p697
+//*********************************************************************** 
+.global fmt(mp_sub697_p2_asm)
+fmt(mp_sub697_p2_asm):
+
+  SUB697_PX  fmt(p697x2)
+  ret
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 4*p697
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p697
+//*********************************************************************** 
+.global fmt(mp_sub697_p4_asm)
+fmt(mp_sub697_p4_asm):
+
+  SUB697_PX  fmt(p697x4)
+  ret
+
+
+#ifdef _MULX_
+
+/////////////////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C
+// Temps:   stack space for two 64-bit values (case w/o _ADX_), regs T0:T7
+///////////////////////////////////////////////////////////////////////////
+#ifdef _ADX_
+
+.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    adox   \T0, \T3               
+    adox   \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adox   \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T1, \T6        
+    mulx   \T3, \T7, 40\M1    
+    adox   \T5, \T7       
+    adox   \T3, rax        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adcx   \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T2, \T7 
+    adcx   \T4, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T4, \T6  
+    adcx   \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T1, \T5  
+    mulx   \T5, \T6, 32\M1     
+    adcx   \T3, \T5   
+    mulx   \T5, rdx, 40\M1
+    adcx   \T5, rax 
+        
+    adox   \T0, \T7  
+    adox   \T1, \T6  
+    adox   \T3, rdx  
+    adox   \T5, rax         
+    
+    mov    rdx, 16\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adcx   \T4, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T4, \T7 
+    adcx   \T0, \T6        
+    mulx   \T2, \T6, 16\M1
+    adox   \T0, \T6 
+    adcx   \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adcx   \T3, \T2  
+    mulx   \T2, \T6, 32\M1     
+    adcx   \T5, \T2   
+    mulx   \T2, rdx, 40\M1     
+    adcx   \T2, rax 
+         
+    adox   \T1, \T7  
+    adox   \T3, \T6  
+    adox   \T5, rdx 
+    adox   \T2, rax           
+    
+    mov    rdx, 24\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T4, \T7 
+    mov    24\C, \T4           // C3_final 
+    adcx   \T0, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T0, \T7
+    adcx   \T1, \T6        
+    mulx   \T4, \T6, 16\M1
+    adox   \T1, \T6  
+    adcx   \T3, \T4     
+    mulx   \T4, \T7, 24\M1   
+    adcx   \T5, \T4  
+    mulx   \T4, \T6, 32\M1     
+    adcx   \T2, \T4   
+    mulx   \T4, rdx, 40\M1     
+    adcx   \T4, rax
+        
+    adox   \T3, \T7  
+    adox   \T5, \T6  
+    adox   \T2, rdx  
+    adox   \T4, rax         
+    
+    mov    rdx, 32\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    32\C, \T0           // C4_final 
+    adcx   \T1, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T1, \T7 
+    adcx   \T3, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T3, \T6 
+    adcx   \T5, \T0     
+    mulx   \T0, \T7, 24\M1   
+    adcx   \T2, \T0  
+    mulx   \T0, \T6, 32\M1     
+    adcx   \T4, \T0   
+    mulx   \T0, rdx, 40\M1     
+    adcx   \T0, rax 
+         
+    adox   \T5, \T7  
+    adox   \T2, \T6  
+    adox   \T4, rdx  
+    adox   \T0, rax           
+    
+    mov    rdx, 40\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T1, \T7 
+    mov    40\C, \T1           // C5_final 
+    adcx   \T3, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T3, \T7 
+    adcx   \T5, \T6        
+    mulx   \T1, \T6, 16\M1
+    adox   \T5, \T6 
+    adcx   \T2, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T4, \T1  
+    mulx   \T1, \T6, 32\M1     
+    adcx   \T0, \T1   
+    mulx   \T1, rdx, 40\M1     
+    adcx   \T1, rax 
+         
+    adox   \T2, \T7 
+    adox   \T4, \T6 
+    adox   \T0, rdx 
+    adox   \T1, rax 
+    mov    48\C, \T3 
+    mov    56\C, \T5 
+    mov    64\C, \T2 
+    mov    72\C, \T4
+    mov    80\C, \T0 
+    mov    88\C, \T1 
+.endm
+
+.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    adox   \T0, \T3               
+    adox   \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adox   \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T1, \T6     
+    adox   \T5, rax        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adcx   \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T2, \T7 
+    adcx   \T4, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T4, \T6  
+    adcx   \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T1, \T5 
+    adox   \T0, \T7   
+    mulx   \T5, \T6, 32\M1 
+    adcx   \T5, rax         
+    adox   \T1, \T6  
+    adox   \T5, rax         
+    
+    mov    rdx, 16\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adcx   \T4, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T4, \T7 
+    adcx   \T0, \T6        
+    mulx   \T2, \T6, 16\M1
+    adox   \T0, \T6 
+    adcx   \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adcx   \T5, \T2          
+    adox   \T1, \T7   
+    mulx   \T2, \T6, 32\M1   
+    adcx   \T2, rax 
+    adox   \T5, \T6 
+    adox   \T2, rax           
+    
+    mov    rdx, 24\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T4, \T7 
+    mov    24\C, \T4           // C3_final 
+    adcx   \T0, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T0, \T7
+    adcx   \T1, \T6        
+    mulx   \T4, \T6, 16\M1
+    adox   \T1, \T6  
+    adcx   \T5, \T4     
+    mulx   \T4, \T7, 24\M1   
+    adcx   \T2, \T4        
+    adox   \T5, \T7   
+    mulx   \T4, \T6, 32\M1   
+    adcx   \T4, rax 
+    adox   \T2, \T6  
+    adox   \T4, rax         
+    
+    mov    rdx, 32\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    32\C, \T0           // C4_final 
+    adcx   \T1, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T1, \T7 
+    adcx   \T5, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T5, \T6 
+    adcx   \T2, \T0     
+    mulx   \T0, \T7, 24\M1   
+    adcx   \T4, \T0 
+    adox   \T2, \T7  
+    mulx   \T0, \T6, 32\M1   
+    adcx   \T0, rax           
+    adox   \T4, \T6 
+    adox   \T0, rax 
+
+    mov    40\C, \T1 
+    mov    48\C, \T5 
+    mov    56\C, \T2 
+    mov    64\C, \T4
+    mov    72\C, \T0
+.endm
+
+#else
+
+.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    add    \T0, \T3               
+    adc    \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adc    \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adc    \T1, \T6        
+    mulx   \T3, \T7, 40\M1    
+    adc    \T5, \T7       
+    adc    \T3, rax        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adc    \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T4, \T6        
+    mulx   \T0, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adc    \T1, \T5  
+    mulx   \T5, \T6, 32\M1     
+    adc    \T3, \T5   
+    mulx   \T5, rdx, 40\M1
+    adc    \T5, rax 
+        
+    xor    rax, rax
+    add    \T2, \S 
+    adc    \T4, 8\S  
+    adc    \T0, \T7  
+    adc    \T1, \T6  
+    adc    \T3, rdx  
+    adc    \T5, rax         
+    
+    mov    rdx, 16\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adc    \T4, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T0, \T6        
+    mulx   \T2, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adc    \T3, \T2  
+    mulx   \T2, \T6, 32\M1     
+    adc    \T5, \T2   
+    mulx   \T2, rdx, 40\M1     
+    adc    \T2, rax 
+        
+    xor    rax, rax
+    add    \T4, \S 
+    adc    \T0, 8\S  
+    adc    \T1, \T7  
+    adc    \T3, \T6  
+    adc    \T5, rdx 
+    adc    \T2, rax           
+    
+    mov    rdx, 24\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T4, \T7 
+    mov    24\C, \T4           // C3_final 
+    adc    \T0, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T1, \T6        
+    mulx   \T4, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T3, \T4     
+    mulx   \T4, \T7, 24\M1   
+    adc    \T5, \T4  
+    mulx   \T4, \T6, 32\M1     
+    adc    \T2, \T4   
+    mulx   \T4, rdx, 40\M1     
+    adc    \T4, rax
+        
+    xor    rax, rax
+    add    \T0, \S 
+    adc    \T1, 8\S  
+    adc    \T3, \T7  
+    adc    \T5, \T6  
+    adc    \T2, rdx  
+    adc    \T4, rax         
+    
+    mov    rdx, 32\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T0, \T7 
+    mov    32\C, \T0           // C4_final 
+    adc    \T1, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T3, \T6        
+    mulx   \T0, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T5, \T0     
+    mulx   \T0, \T7, 24\M1   
+    adc    \T2, \T0  
+    mulx   \T0, \T6, 32\M1     
+    adc    \T4, \T0   
+    mulx   \T0, rdx, 40\M1     
+    adc    \T0, rax 
+        
+    xor    rax, rax
+    add    \T1, \S 
+    adc    \T3, 8\S  
+    adc    \T5, \T7  
+    adc    \T2, \T6  
+    adc    \T4, rdx  
+    adc    \T0, rax           
+    
+    mov    rdx, 40\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T1, \T7 
+    mov    40\C, \T1           // C5_final 
+    adc    \T3, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T5, \T6        
+    mulx   \T1, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T2, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adc    \T4, \T1  
+    mulx   \T1, \T6, 32\M1     
+    adc    \T0, \T1   
+    mulx   \T1, rdx, 40\M1     
+    adc    \T1, rax 
+        
+    add    \T3, \S 
+    adc    \T5, 8\S  
+    adc    \T2, \T7 
+    adc    \T4, \T6 
+    adc    \T0, rdx 
+    adc    \T1, 0 
+    mov    48\C, \T3 
+    mov    56\C, \T5 
+    mov    64\C, \T2 
+    mov    72\C, \T4
+    mov    80\C, \T0 
+    mov    88\C, \T1 
+.endm
+
+.macro MUL320_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    mulx   \T4, \T5, 16\M1 
+    add    \T0, \T3               
+    adc    \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adc    \T3, \T4         
+    mulx   \T5, \T6, 32\M1 
+    adc    \T1, \T6     
+    adc    \T5, 0        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adc    \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adc    \T3, \T6        
+    mulx   \T0, \T4, 16\M1
+    adc    \T0, \T1     
+    mulx   \T1, \T6, 24\M1   
+    adc    \T5, \T1  
+    mulx   \T1, rax, 32\M1     
+    adc    \T1, 0 
+        
+    add    \T2, \T7 
+    adc    \T3, \T4  
+    adc    \T0, \T6  
+    adc    \T5, rax  
+    adc    \T1, 0         
+    
+    mov    rdx, 16\M0 
+    mulx   \T4, \T6, \M1 
+    add    \T2, \T6 
+    mov    16\C, \T2           // C2_final 
+    adc    \T3, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T0, \T6        
+    mulx   \T2, \T4, 16\M1 
+    adc    \T2, \T5     
+    mulx   \T5, \T6, 24\M1   
+    adc    \T1, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0 
+        
+    add    \T3, \T7
+    adc    \T0, \T4  
+    adc    \T2, \T6  
+    adc    \T1, rax 
+    adc    \T5, 0          
+    
+    mov    rdx, 24\M0
+    mulx   \T4, \T6, \M1 
+    add    \T3, \T6 
+    mov    24\C, \T3           // C3_final 
+    adc    \T0, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T2, \T6        
+    mulx   \T3, \T4, 16\M1 
+    adc    \T1, \T3     
+    mulx   \T3, \T6, 24\M1   
+    adc    \T3, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0
+        
+    add    \T0, \T7
+    adc    \T2, \T4  
+    adc    \T1, \T6  
+    adc    \T3, rax 
+    adc    \T5, 0       
+    
+    mov    rdx, 32\M0 
+    mulx   \T4, \T6, \M1 
+    add    \T0, \T6 
+    mov    32\C, \T0           // C4_final 
+    adc    \T2, \T4     
+    mulx   \T6, \T7, 8\M1
+    adc    \T1, \T6        
+    mulx   \T0, \T4, 16\M1 
+    adc    \T3, \T0     
+    mulx   \T0, \T6, 24\M1   
+    adc    \T0, \T5 
+    mulx   \T5, rax, 32\M1     
+    adc    \T5, 0
+        
+    add    \T2, \T7 
+    adc    \T1, \T4  
+    adc    \T3, \T6 
+    adc    \T0, rax 
+    adc    \T5, 0 
+    mov    40\C, \T2 
+    mov    48\C, \T1 
+    mov    56\C, \T3 
+    mov    64\C, \T0
+    mov    72\C, \T5 
+.endm
+
+#endif
+
+
+//*****************************************************************************
+//  697-bit multiplication using Karatsuba (one level), schoolbook (two levels)
+//***************************************************************************** 
+.global fmt(mul697_asm)
+fmt(mul697_asm):    
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15
+    mov    rcx, reg_p3 
+
+    // [rsp] <- AH + AL, rax <- mask
+    xor    rax, rax
+    mov    r8, [reg_p1]
+    mov    r9, [reg_p1+8]
+    mov    r10, [reg_p1+16]
+    mov    r11, [reg_p1+24] 
+    mov    r12, [reg_p1+32] 
+    mov    r13, [reg_p1+40] 
+    push   rbx 
+    push   rbp
+    sub    rsp, 224
+    add    r8, [reg_p1+48]
+    adc    r9, [reg_p1+56]
+    adc    r10, [reg_p1+64]
+    adc    r11, [reg_p1+72]
+    adc    r12, [reg_p1+80]
+    adc    r13, 0
+    sbb    rax, 0
+    mov    [rsp], r8
+    mov    [rsp+8], r9
+    mov    [rsp+16], r10
+    mov    [rsp+24], r11
+    mov    [rsp+32], r12
+    mov    [rsp+40], r13
+
+    // [rsp+48] <- BH + BL, rdx <- mask
+    xor    rdx, rdx
+    mov    r8, [reg_p2]
+    mov    r9, [reg_p2+8]
+    mov    rbx, [reg_p2+16]
+    mov    rbp, [reg_p2+24] 
+    mov    r14, [reg_p2+32]     
+    mov    r15, [reg_p2+40]     
+    add    r8, [reg_p2+48]
+    adc    r9, [reg_p2+56]
+    adc    rbx, [reg_p2+64]
+    adc    rbp, [reg_p2+72]
+    adc    r14, [reg_p2+80]
+    adc    r15, 0
+    sbb    rdx, 0
+    mov    [rsp+48], r8
+    mov    [rsp+56], r9
+    mov    [rsp+64], rbx
+    mov    [rsp+72], rbp
+    mov    [rsp+80], r14     
+    mov    [rsp+88], r15     
+    
+    // [rcx] <- masked (BH + BL)
+    and    r8, rax
+    and    r9, rax
+    and    rbx, rax
+    and    rbp, rax
+    and    r14, rax     
+    and    r15, rax     
+    mov    [rcx], r8
+    mov    [rcx+8], r9
+
+    // r8-r13 <- masked (AH + AL)
+    mov    r8, [rsp]
+    mov    r9, [rsp+8]
+    and    r8, rdx
+    and    r9, rdx
+    and    r10, rdx
+    and    r11, rdx
+    and    r12, rdx
+    and    r13, rdx
+
+    // [rsp+96] <- masked (AH + AL) + masked (AH + AL)
+    mov    rax, [rcx]
+    mov    rdx, [rcx+8]
+    add    r8, rax
+    adc    r9, rdx
+    adc    r10, rbx
+    adc    r11, rbp
+    adc    r12, r14         
+    adc    r13, r15         
+    mov    [rsp+96], r8
+    mov    [rsp+104], r9
+    mov    [rsp+112], r10
+    mov    [rsp+120], r11
+
+    // [rcx] <- AL x BL
+    MUL384_SCHOOL  [reg_p1], [reg_p2], [rcx], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15     // Result C0-C5 
+
+    // [rcx+96] <- (AH+AL) x (BH+BL), low part 
+    MUL384_SCHOOL  [rsp], [rsp+48], [rsp+128], [rcx+96], r8, r9, r10, r11, rbx, rbp, r14, r15
+
+    // [rsp] <- AH x BH 
+    MUL320_SCHOOL  [reg_p1+48], [reg_p2+48], [rsp], r8, r9, r10, r11, rbx, rbp, r14, r15
+    
+    // r8-r13 <- (AH+AL) x (BH+BL), final step
+    mov    r8, [rsp+96]
+    mov    r9, [rsp+104]
+    mov    r10, [rsp+112]
+    mov    r11, [rsp+120]
+    mov    rax, [rsp+176]
+    add    r8, rax
+    mov    rax, [rsp+184]
+    adc    r9, rax
+    mov    rax, [rsp+192]
+    adc    r10, rax
+    mov    rax, [rsp+200]
+    adc    r11, rax
+    mov    rax, [rsp+208]
+    adc    r12, rax
+    mov    rax, [rsp+216]
+    adc    r13, rax
+    
+    // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL
+    mov    rdi, [rsp+128]
+    sub    rdi, [rcx]
+    mov    rdx, [rsp+136]
+    sbb    rdx, [rcx+8]
+    mov    rbx, [rsp+144]
+    sbb    rbx, [rcx+16]
+    mov    rbp, [rsp+152]
+    sbb    rbp, [rcx+24]
+    mov    r14, [rsp+160]     
+    sbb    r14, [rcx+32]   
+    mov    r15, [rsp+168]     
+    sbb    r15, [rcx+40]     
+    sbb    r8, [rcx+48]
+    sbb    r9, [rcx+56]
+    sbb    r10, [rcx+64]
+    sbb    r11, [rcx+72]
+    sbb    r12, [rcx+80]
+    sbb    r13, [rcx+88]
+    
+    // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+    sub    rdi, [rsp]
+    sbb    rdx, [rsp+8]
+    sbb    rbx, [rsp+16]
+    sbb    rbp, [rsp+24]
+    sbb    r14, [rsp+32]     
+    sbb    r15, [rsp+40]   
+    sbb    r8, [rsp+48]
+    sbb    r9, [rsp+56]
+    sbb    r10, [rsp+64]
+    sbb    r11, [rsp+72]
+    sbb    r12, 0
+    sbb    r13, 0
+    
+    mov    rax, [rcx+48]
+    add    rax, rdi
+    mov    [rcx+48], rax    // Result C6-C11
+    mov    rax, [rcx+56]
+    adc    rax, rdx
+    mov    [rcx+56], rax 
+    mov    rax, [rcx+64]
+    adc    rax, rbx
+    mov    [rcx+64], rax 
+    mov    rax, [rcx+72]
+    adc    rax, rbp
+    mov    [rcx+72], rax 
+    mov    rax, [rcx+80]
+    adc    rax, r14           
+    mov    [rcx+80], rax 
+    mov    rax, [rcx+88]
+    adc    rax, r15             
+    mov    [rcx+88], rax
+    mov    rax, [rsp]
+    adc    r8, rax 
+    mov    [rcx+96], r8    // Result C8-C15
+    mov    rax, [rsp+8]
+    adc    r9, rax
+    mov    [rcx+104], r9 
+    mov    rax, [rsp+16]
+    adc    r10, rax
+    mov    [rcx+112], r10 
+    mov    rax, [rsp+24]
+    adc    r11, rax
+    mov    [rcx+120], r11 
+    mov    rax, [rsp+32]
+    adc    r12, rax
+    mov    [rcx+128], r12 
+    mov    rax, [rsp+40]
+    adc    r13, rax
+    mov    [rcx+136], r13
+    mov    r8, [rsp+48]
+    mov    r9, [rsp+56]
+    mov    r10, [rsp+64]
+    mov    r11, [rsp+72]
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    add    rsp, 224   
+    mov    [rcx+144], r8 
+    mov    [rcx+152], r9 
+    mov    [rcx+160], r10 
+    mov    [rcx+168], r11
+     
+    pop    rbp  
+    pop    rbx
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+#else
+
+//***********************************************************************
+//  Integer multiplication
+//  Based on Karatsuba method
+//  Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
+//  NOTE: a=c or b=c are not allowed
+//***********************************************************************
+.global fmt(mul697_asm)
+fmt(mul697_asm):
+
+  ret
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+#endif
+
+#ifdef _MULX_
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: regs T0:T7
+// Temps:   regs T8
+/////////////////////////////////////////////////////////////////
+
+#ifdef _ADX_
+.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8, TT
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final    
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    adox   \T1, \T4               
+    adox   \T2, \T5     
+    mulx   \T4, \T7, 24\M1
+    adox   \T3, \T7         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T4, \T6         
+    mulx   \T7, \T8, 40\M1           
+    adox   \T5, \T8         
+    adox   \T7, \TT   
+    
+    mov    rdx, 8\M0 
+    mulx   \T8, \T6, \M1 
+    adcx   \T1, \T6            // T1 <- C1_final 
+    adcx   \T2, \T8    
+    mulx   \T6, \T8, 8\M1
+    adox   \T2, \T8  
+    adcx   \T3, \T6        
+    mulx   \T6, \T8, 16\M1
+    adox   \T3, \T8
+    adcx   \T4, \T6     
+    mulx   \T6, \T8, 24\M1
+    adox   \T4, \T8     
+    adcx   \T5, \T6  
+    mulx   \T6, \T8, 32\M1 
+    adox   \T5, \T8 
+    adcx   \T6, \T7 
+    mulx   \T7, \T8, 40\M1
+    adcx   \T7, rax  
+    adox   \T6, \T8          
+    adox   \T7, rax
+.endm
+
+.macro MUL64x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final    
+    mulx   \T2, \T4, 8\M1
+    xor    rax, rax
+    mulx   \T3, \T5, 16\M1 
+    adox   \T1, \T4               
+    adox   \T2, \T5     
+    mulx   \T4, \T7, 24\M1
+    adox   \T3, \T7         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T4, \T6          
+    mulx   \T6, \T7, 40\M1 
+    adox   \T5, \T7                    
+    adox   \T6, rax
+.endm
+
+#else
+
+.macro MUL128x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7, T8, TT
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final 
+    mulx   \T2, \T3, 8\M1
+    add    \T1, \T3               
+    adc    \T2, 0  
+
+    mov    rdx, 8\M0   
+    xor    \T5, \T5
+    mulx   \T3, \T4, \M1 
+    add    \T1, \T4               
+    adc    \T2, \T3  
+    adc    \T5, 0  
+      
+    xor    \T6, \T6
+    mulx   \T3, \T4, 8\M1
+    add    \T2, \T4  
+    adc    \T3, \T5           
+    adc    \T6, 0 
+        
+    mov    rdx, \M0         
+    mulx   \T4, \T5, 16\M1 
+    add    \T2, \T5  
+    adc    \T3, \T4           
+    adc    \T6, 0  
+        
+    xor    \T7, \T7        
+    mulx   \T4, \T5, 24\M1 
+    add    \T3, \T5  
+    adc    \T4, \T6           
+    adc    \T7, 0  
+
+    mov    rdx, 8\M0 
+    mulx   \T5, \T6, 16\M1 
+    add    \T3, \T6               
+    adc    \T4, \T5  
+    adc    \T7, 0    
+        
+    xor    \T6, \T6        
+    mulx   \T5, \T8, 24\M1 
+    add    \T4, \T8  
+    adc    \T5, \T7           
+    adc    \T6, 0  
+        
+    mov    rdx, \M0        
+    mulx   \T7, \T8, 32\M1 
+    add    \T4, \T8  
+    adc    \T5, \T7           
+    adc    \T6, 0      
+        
+    xor    \T7, \T7        
+    mulx   \T8, rax, 40\M1 
+    add    \T5, rax  
+    adc    \T6, \T8          
+    adc    \T7, 0  
+        
+    mov    rdx, 8\M0        
+    mulx   \T8, rax, 32\M1 
+    add    \T5, rax  
+    adc    \T6, \T8         
+    adc    \T7, 0   
+        
+    mov    rdx, 8\M0        
+    mulx   \T8, rax, 40\M1 
+    add    \T6, rax  
+    adc    \T7, \T8 
+
+    add    \T6, \TT  
+    adc    \T7, 0 
+.endm
+
+.macro MUL64x384_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6, T7
+    mov    rdx, \M0
+    mulx   \T1, \T0, \M1       // T0 <- C0_final 
+    mulx   \T2, \T3, 8\M1
+    add    \T1, \T3               
+    adc    \T2, 0  
+  
+    mulx   \T3, \T4, 16\M1 
+    add    \T2, \T4  
+    adc    \T3, 0  
+      
+    mulx   \T4, \T5, 24\M1
+    add    \T3, \T5          
+    adc    \T4, 0 
+                
+    mulx   \T5, \T6, 32\M1 
+    add    \T4, \T6           
+    adc    \T5, 0 
+                
+    mulx   \T6, \T7, 40\M1 
+    add    \T5, \T7           
+    adc    \T6, 0
+.endm  
+#endif
+
+  
+//**************************************************************************************
+//  Montgomery reduction
+//  Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015  
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//************************************************************************************** 
+.global fmt(rdc697_asm)
+fmt(rdc697_asm):
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15 
+    push   rbx 	
+    xor    rcx, rcx
+
+    // a[0-1] x p697p1_nz --> result: r8:r15 
+    MUL128x384_SCHOOL [reg_p1], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx     
+	
+    xor    rcx, rcx
+    add    r8, [reg_p1+40]  
+    adc    r9, [reg_p1+48]  
+    adc    r10, [reg_p1+56]   
+    adc    r11, [reg_p1+64]   
+    adc    r12, [reg_p1+72]   
+    adc    r13, [reg_p1+80]   
+    adc    r14, [reg_p1+88]   
+    adc    r15, [reg_p1+96]
+	adc    rcx, 0  
+    mov    [reg_p1+40], r8  
+    mov    [reg_p1+48], r9  
+    mov    [reg_p1+56], r10  
+    mov    [reg_p1+64], r11  
+    mov    [reg_p1+72], r12  
+    mov    [reg_p1+80], r13  
+    mov    [reg_p1+88], r14
+    mov    [reg_p1+96], r15 
+
+    // a[2-3] x p697p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+16], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+56]  
+    adc    r9, [reg_p1+64]  
+    adc    r10, [reg_p1+72]   
+    adc    r11, [reg_p1+80]  
+    adc    r12, [reg_p1+88]   
+    adc    r13, [reg_p1+96]   
+    adc    r14, [reg_p1+104]    
+    adc    r15, [reg_p1+112]
+    adc    rcx, 0 
+    mov    [reg_p1+56], r8  
+    mov    [reg_p1+64], r9  
+    mov    [reg_p1+72], r10  
+    mov    [reg_p1+80], r11  
+    mov    [reg_p1+88], r12  
+    mov    [reg_p1+96], r13
+    mov    [reg_p1+104], r14 
+    mov    [reg_p1+112], r15 
+
+    // a[4-5] x p697p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+32], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx 
+
+    xor    rcx, rcx
+    add    r8, [reg_p1+72]  
+    adc    r9, [reg_p1+80]  
+    adc    r10, [reg_p1+88]   
+    adc    r11, [reg_p1+96]  
+    adc    r12, [reg_p1+104]   
+    adc    r13, [reg_p1+112]   
+    adc    r14, [reg_p1+120]    
+    adc    r15, [reg_p1+128] 
+    adc    rcx, 0 
+    mov    [reg_p1+72], r8  
+    mov    [reg_p1+80], r9  
+    mov    [reg_p1+88], r10  
+    mov    [reg_p1+96], r11
+    mov    [reg_p1+104], r12 
+    mov    [reg_p1+112], r13 
+    mov    [reg_p1+120], r14 
+    mov    [reg_p1+128], r15 
+
+    // a[6-7] x p697p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+48], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx 
+
+    xor    rcx, rcx 
+    add    r8, [reg_p1+88]  
+    adc    r9, [reg_p1+96]   
+    adc    r10, [reg_p1+104]  
+    adc    r11, [reg_p1+112]   
+    adc    r12, [reg_p1+120]   
+    adc    r13, [reg_p1+128]
+    adc    r14, [reg_p1+136]
+    adc    r15, [reg_p1+144]
+    adc    rcx, 0 
+    mov    [reg_p2], r8        // C0_final
+    mov    [reg_p2+8], r9      // C1_final
+    mov    [reg_p1+104], r10  
+    mov    [reg_p1+112], r11
+    mov    [reg_p1+120], r12 
+    mov    [reg_p1+128], r13 
+    mov    [reg_p1+136], r14 
+    mov    [reg_p1+144], r15 
+
+    // a[8-9] x p697p1_nz --> result: r8:r15
+    MUL128x384_SCHOOL [reg_p1+64], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rcx 
+
+    xor    rcx, rcx 
+    add    r8, [reg_p1+104]  
+    adc    r9, [reg_p1+112]   
+    adc    r10, [reg_p1+120]   
+    adc    r11, [reg_p1+128]
+    adc    r12, [reg_p1+136]
+    adc    r13, [reg_p1+144]
+    adc    r14, [reg_p1+152]
+    adc    r15, [reg_p1+160]
+    adc    rcx, [reg_p1+168]
+    mov    [reg_p2+16], r8      // C3_final
+    mov    [reg_p2+24], r9      // C4_final
+    mov    [reg_p1+120], r10  
+    mov    [reg_p1+128], r11   
+    mov    [reg_p1+136], r12  
+    mov    [reg_p1+144], r13  
+    mov    [reg_p1+152], r14  
+    mov    [reg_p1+160], r15
+
+    // a[10] x p697p1_nz --> result: r8:r14
+    MUL64x384_SCHOOL [reg_p1+80], [rip+fmt(p697p1)+40], r8, r9, r10, r11, r12, r13, r14, r15
+    
+    // Final result C5:C10
+    add    r8, [reg_p1+120]  
+    adc    r9, [reg_p1+128]  
+    adc    r10, [reg_p1+136]   
+    adc    r11, [reg_p1+144]  
+    adc    r12, [reg_p1+152]  
+    adc    r13, [reg_p1+160]   
+    adc    r14, rcx
+    mov    [reg_p2+32], r8
+    mov    [reg_p2+40], r9  
+    mov    [reg_p2+48], r10   
+    mov    [reg_p2+56], r11  
+    mov    [reg_p2+64], r12  
+    mov    [reg_p2+72], r13  
+    mov    [reg_p2+80], r14
+	
+    pop    rbx
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+  #else
+  
+//***********************************************************************
+//  Montgomery reduction
+//  Based on comba method
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//*********************************************************************** 
+.global fmt(rdc697_asm)
+fmt(rdc697_asm):
+
+  ret
+
+# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"
+
+  #endif
+
+//***********************************************************************
+//  697-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_add697_asm)
+fmt(mp_add697_asm):  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rax, [reg_p1+32]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    rax, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rax
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48] 
+  mov    r10, [reg_p1+56]
+  mov    r11, [reg_p1+64] 
+  mov    rax, [reg_p1+72]  
+  mov    rcx, [reg_p1+80] 
+  adc    r8, [reg_p2+40] 
+  adc    r9, [reg_p2+48]
+  adc    r10, [reg_p2+56] 
+  adc    r11, [reg_p2+64]
+  adc    rax, [reg_p2+72]
+  adc    rcx, [reg_p2+80]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  mov    [reg_p3+64], r11
+  mov    [reg_p3+72], rax
+  mov    [reg_p3+80], rcx
+  ret
+
+
+//***********************************************************************
+//  2x697-bit multiprecision subtraction/addition
+//  Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p697*2^704
+//*********************************************************************** 
+.global fmt(mp_subadd697x2_asm)
+fmt(mp_subadd697x2_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15
+  push   rbx
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rcx, [reg_p1+32]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    rcx, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rcx
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48]
+  mov    r10, [reg_p1+56] 
+  mov    r11, [reg_p1+64]
+  mov    rcx, [reg_p1+72] 
+  sbb    r8, [reg_p2+40] 
+  sbb    r9, [reg_p2+48] 
+  sbb    r10, [reg_p2+56]
+  sbb    r11, [reg_p2+64] 
+  sbb    rcx, [reg_p2+72]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  mov    [reg_p3+64], r11
+  mov    [reg_p3+72], rcx
+  
+  mov    r8, [reg_p1+80]
+  mov    r9, [reg_p1+88] 
+  mov    r10, [reg_p1+96]
+  mov    r11, [reg_p1+104]
+  mov    rcx, [reg_p1+112]
+  sbb    r8, [reg_p2+80]
+  sbb    r9, [reg_p2+88]
+  sbb    r10, [reg_p2+96] 
+  sbb    r11, [reg_p2+104] 
+  sbb    rcx, [reg_p2+112]
+  mov    [reg_p3+80], r8 
+  mov    [reg_p3+88], r9
+  mov    [reg_p3+96], r10
+  mov    [reg_p3+104], r11
+  mov    [reg_p3+112], rcx
+  
+  mov    r8, [reg_p1+120]
+  mov    r9, [reg_p1+128]
+  mov    r10, [reg_p1+136]
+  mov    r11, [reg_p1+144]
+  mov    rcx, [reg_p1+152]
+  mov    r14, [reg_p1+160]
+  mov    r15, [reg_p1+168]
+  sbb    r8, [reg_p2+120] 
+  sbb    r9, [reg_p2+128] 
+  sbb    r10, [reg_p2+136] 
+  sbb    r11, [reg_p2+144] 
+  sbb    rcx, [reg_p2+152] 
+  sbb    r14, [reg_p2+160] 
+  sbb    r15, [reg_p2+168]
+  mov    [reg_p3+160], r14
+  mov    [reg_p3+168], r15
+  sbb    rax, 0
+  
+  // Add p697 anded with the mask in rax 
+  mov    r12, [rip+fmt(p697)]
+  mov    r13, [rip+fmt(p697)+40]
+  mov    r14, [rip+fmt(p697)+48]
+  mov    r15, [rip+fmt(p697)+56]
+  mov    rdi, [rip+fmt(p697)+64]
+  mov    rsi, [rip+fmt(p697)+72]
+  mov    rbx, [rip+fmt(p697)+80]
+  and    r12, rax
+  and    r13, rax
+  and    r14, rax
+  and    r15, rax
+  and    rdi, rax
+  and    rsi, rax
+  and    rbx, rax
+  mov    rax, [reg_p3+88]
+  add    rax, r12
+  mov    [reg_p3+88], rax
+  mov    rax, [reg_p3+96]
+  adc    rax, r12
+  mov    [reg_p3+96], rax
+  mov    rax, [reg_p3+104]
+  adc    rax, r12
+  mov    [reg_p3+104], rax
+  mov    rax, [reg_p3+112]
+  adc    rax, r12
+  mov    [reg_p3+112], rax
+  adc    r8, r12
+  adc    r9, r13
+  mov    [reg_p3+120], r8
+  mov    [reg_p3+128], r9
+  adc    r10, r14
+  adc    r11, r15
+  mov    r8, [reg_p3+160]
+  mov    r9, [reg_p3+168]
+  adc    rcx, rdi
+  adc    r8, rsi
+  adc    r9, rbx
+  
+  mov    [reg_p3+136], r10
+  mov    [reg_p3+144], r11
+  mov    [reg_p3+152], rcx
+  mov    [reg_p3+160], r8
+  mov    [reg_p3+168], r9
+  pop    rbx
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Double 2x697-bit multiprecision subtraction
+//  Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_dblsub697x2_asm)
+fmt(mp_dblsub697x2_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p3]
+  mov    r9, [reg_p3+8]
+  mov    r10, [reg_p3+16]
+  mov    r11, [reg_p3+24]
+  mov    r12, [reg_p3+32]
+  mov    r13, [reg_p3+40]
+  mov    r14, [reg_p3+48]
+  mov    r15, [reg_p3+56]
+  sub    r8, [reg_p1]
+  sbb    r9, [reg_p1+8] 
+  sbb    r10, [reg_p1+16] 
+  sbb    r11, [reg_p1+24] 
+  sbb    r12, [reg_p1+32] 
+  sbb    r13, [reg_p1+40] 
+  sbb    r14, [reg_p1+48] 
+  sbb    r15, [reg_p1+56]
+  setc   al
+  sub    r8, [reg_p2]
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  setc   cl
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+    
+  mov    r8, [reg_p3+64]
+  mov    r9, [reg_p3+72]
+  mov    r10, [reg_p3+80]
+  mov    r11, [reg_p3+88]
+  mov    r12, [reg_p3+96]
+  mov    r13, [reg_p3+104]
+  mov    r14, [reg_p3+112]
+  mov    r15, [reg_p3+120]
+  bt     rax, 0 
+  sbb    r8, [reg_p1+64] 
+  sbb    r9, [reg_p1+72] 
+  sbb    r10, [reg_p1+80] 
+  sbb    r11, [reg_p1+88] 
+  sbb    r12, [reg_p1+96] 
+  sbb    r13, [reg_p1+104] 
+  sbb    r14, [reg_p1+112] 
+  sbb    r15, [reg_p1+120]
+  setc   al 
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+64] 
+  sbb    r9, [reg_p2+72] 
+  sbb    r10, [reg_p2+80] 
+  sbb    r11, [reg_p2+88] 
+  sbb    r12, [reg_p2+96] 
+  sbb    r13, [reg_p2+104] 
+  sbb    r14, [reg_p2+112] 
+  sbb    r15, [reg_p2+120]
+  setc   cl 
+  mov    [reg_p3+64], r8
+  mov    [reg_p3+72], r9
+  mov    [reg_p3+80], r10
+  mov    [reg_p3+88], r11
+  mov    [reg_p3+96], r12
+  mov    [reg_p3+104], r13
+  mov    [reg_p3+112], r14
+  mov    [reg_p3+120], r15
+  
+  mov    r8, [reg_p3+128]
+  mov    r9, [reg_p3+136]
+  mov    r10, [reg_p3+144]
+  mov    r11, [reg_p3+152]
+  mov    r12, [reg_p3+160]
+  mov    r13, [reg_p3+168]
+  bt     rax, 0 
+  sbb    r8, [reg_p1+128] 
+  sbb    r9, [reg_p1+136] 
+  sbb    r10, [reg_p1+144] 
+  sbb    r11, [reg_p1+152] 
+  sbb    r12, [reg_p1+160] 
+  sbb    r13, [reg_p1+168] 
+  bt     rcx, 0 
+  sbb    r8, [reg_p2+128] 
+  sbb    r9, [reg_p2+136] 
+  sbb    r10, [reg_p2+144] 
+  sbb    r11, [reg_p2+152] 
+  sbb    r12, [reg_p2+160] 
+  sbb    r13, [reg_p2+168]
+  mov    [reg_p3+128], r8
+  mov    [reg_p3+136], r9
+  mov    [reg_p3+144], r10
+  mov    [reg_p3+152], r11
+  mov    [reg_p3+160], r12
+  mov    [reg_p3+168], r13
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
\ No newline at end of file
diff --git a/SIKE_sw/src/P697/P697.c b/SIKE_sw/src/P697/P697.c
new file mode 100644
index 0000000..aef7321
--- /dev/null
+++ b/SIKE_sw/src/P697/P697.c
@@ -0,0 +1,139 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P697
+*********************************************************************************************/  
+
+#include "P697_api.h" 
+#include "P697_internal.h"
+#include "../internal.h"
+
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). 
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. 
+// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32.
+// For example, a 697-bit field element is represented with Ceil(697 / 64) = 11 64-bit digits or Ceil(697 / 32) = 22 32-bit digits.
+
+//
+// Curve isogeny system "SIDHp697". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p697^2), where A=6, B=1, C=1 and p697 = 2^356*3^215-1
+//
+         
+const uint64_t p697[NWORDS64_FIELD]              = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x604054AFFFFFFFFF,
+                                                     0xDF4970CF7313736F, 0x719AEC973BF54225, 0x40E474DA88B90FFE, 0x9A0E279D6CEB3C8E, 0x01B39F97671708CF };
+const uint64_t p697p1[NWORDS64_FIELD]            = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x604054B000000000,
+                                                     0xDF4970CF7313736F, 0x719AEC973BF54225, 0x40E474DA88B90FFE, 0x9A0E279D6CEB3C8E, 0x01B39F97671708CF };
+const uint64_t p697x2[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xC080A95FFFFFFFFF,
+                                                     0xBE92E19EE626E6DE, 0xE335D92E77EA844B, 0x81C8E9B511721FFC, 0x341C4F3AD9D6791C, 0x03673F2ECE2E119F }; 
+const uint64_t p697x4[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x810152BFFFFFFFFF,
+                                                     0x7D25C33DCC4DCDBD, 0xC66BB25CEFD50897, 0x0391D36A22E43FF9, 0x68389E75B3ACF239, 0x06CE7E5D9C5C233E };
+// Order of Alice's subgroup
+const uint64_t Alice_order[NWORDS64_ORDER]       = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000001000000000 }; 
+// Order of Bob's subgroup
+const uint64_t Bob_order[NWORDS64_ORDER]         = { 0xF7313736F604054B, 0x73BF54225DF4970C, 0xA88B90FFE719AEC9, 0xD6CEB3C8E40E474D, 0x7671708CF9A0E279, 0x00000000001B39F9 };
+// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} in GF(p697^2), expressed in Montgomery representation
+const uint64_t A_gen[6*NWORDS64_FIELD]           = { 0xAED913E7D94626F9, 0x6F163E13CE243B16, 0x63211BC832B204DD, 0x35C03D027DA18195, 0x4AC8AE5E92D9A2E0, 0x7901C981FA69E5F6,
+                                                     0xDC074593C4951783, 0xE039A85DA8C4CCCB, 0x238709FB5A391A27, 0x81C303327E8FDA3A, 0x000F36173BC9782E,   // XPA0
+                                                     0x266F82DA9F627219, 0xC25C277AD1F10869, 0x947D3148A5C130AB, 0xFC5142FE8F622A88, 0xB5F69FFF2BA5CDB9, 0xA5B6DC9C5B5A65E9,
+                                                     0xD1B526E7169AC83E, 0x0DAD5BA3BDB5F30D, 0xAF70A90042BC2A5E, 0xE55389C1D5AC115F, 0x012EFF54E3702B19,   // XPA1
+                                                     0x4C987E2710131A53, 0xC85EBC0B6964FC4E, 0x01064AF42ED201FE, 0x6C7F56903B372893, 0x70D22E68DEE9FB6E, 0x41DBA2F20C3FF934,
+                                                     0x741E3BC447063D35, 0x830A5DA2BB4C3381, 0x1896BD7E957480D5, 0x5FF6ABE18016BD72, 0x015B3A13274C3A5E,   // XQA0
+                                                     0xAB9DA605058DB5BD, 0x676326751136B419, 0xA012ED1457E7A8FB, 0x4D2C99E2BCBDBCBF, 0x847DAAAB8AF49694, 0x57E4A8EBEE16077A,
+                                                     0x253098F5145E024F, 0x2834FA2027602D7E, 0x67370BF01ECA39F5, 0xFD1988310BD8B371, 0x006E1C1994AAE711,   // XQA1
+                                                     0x388557F6D513BA2E, 0x985FC6241AF2D870, 0xAB4A1A0CB162217E, 0xEFE329C716283B0C, 0x1B8A160873A72DF3, 0xE788A8E93CE9A2BF,
+                                                     0x9208D779576BE635, 0x9F01542376C9CF14, 0xB4C147E4C823B27B, 0x14EBA3D4E36220A2, 0x00B5E9F1B8C6EB1F,   // XRA0
+                                                     0x56DA90C58CF6CF46, 0x81618C6931E0A49F, 0xE85EDF7AAA8E245E, 0x3EFAADBA6C218FE5, 0x070BC4D671757F0A, 0x33E57D453747A238,
+                                                     0xA1DE9DC8B2194C11, 0xD5C01615A266F9F3, 0x1FD965E5FB51C6F5, 0x86EA60BF172F4F54, 0x1568A2478263BE4 };  // XRA1
+// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p697^2), expressed in Montgomery representation
+const uint64_t B_gen[6*NWORDS64_FIELD]           = { 0x17004B45D6CD5264, 0x2120CCAD6F2560B1, 0x2037B4FC92D82662, 0x64A1CA7B3198E4F9, 0xA049034AC1A0019A, 0xA78FDEEA1525EFC7,
+                                                     0x1235E926EB190D51, 0x20808D93DDDEB13D, 0x4EE5F74BFA19F9E7, 0xB6325316EE6D75DD, 0x016E69166BA0015E,   // XPB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XPB1
+                                                     0x6BE584AC7B4EB4F9, 0xF80F2AD8BBBEED51, 0x3681798875177782, 0x50D3F6C3774A2F09, 0xFF3C23A377640B8D, 0x6033D3DF5745A962,
+                                                     0x2FF24E14C9699274, 0x83DA36836A97EB83, 0x25C8EF44B73BD1CD, 0x712062DF86ADEF09, 0x004CF039055BDB65,   // XQB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XQB1
+                                                     0x94D7DFC81C1E72E5, 0xE43215CB25F12508, 0x05C2FA2D3F4AE2F9, 0xBB0752FE5CE1746B, 0xA780994FB878A14B, 0x15F6979E08C55016,
+                                                     0xE520E266C3B11912, 0xA857D0496B40DA30, 0xEBACFFF0FDFA0DD2, 0x4C84A4D2485B1E15, 0x00A4F1A9A018A254,   // XRB0
+                                                     0x49940C6C65957574, 0xC475B85CD816F0A5, 0x52F4C5971D1E4573, 0xE695F0CD74372CBD, 0x53BC43AA1AFA579E, 0xE02CD95D4A267AE6,
+                                                     0x7B96626EBA6A4ECB, 0xF5E38B098E29F8D0, 0xEAED32068F11ACB9, 0xAFF1F42532675E47, 0x0078655255FA5626 }; // XRB1                                                     
+// Montgomery constant Montgomery_R2 = (2^704)^2 mod p697
+const uint64_t Montgomery_R2[NWORDS64_FIELD]     = { 0x90E8717898EB005C, 0x1DF9EB2CE3B0E597, 0x70EDDE1C2495B71C, 0x441E14E451B09CBC, 0x362ACF49015E62FF, 0x139D92FB72D960C4,
+                                                     0x7840FBE341B9CCE6, 0xFC3D2E62C11AEF2F, 0xE8053C8FF2621C9B, 0x7D2E06601F8D8373, 0x01634C22A8B7316F };                                                    
+// Value one in Montgomery representation 
+const uint64_t Montgomery_one[NWORDS64_FIELD]    = { 0x0000000000000096, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x9A4E60E000000000,
+                                                     0x2AF7E672929A5CBD, 0x6F395F62DE4B3DCF, 0xFA2387F3E390A0E9, 0xBBB4C9C22E2A84A5, 0x00C07D499880D65B };
+
+
+// Fixed parameters for isogeny tree computation
+const unsigned int strat_Alice[MAX_Alice-1] = { 
+72, 48, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1,
+4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1,
+1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2,
+1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1,
+1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 33, 17, 9, 5, 4, 2, 1, 1, 2, 1, 1,
+2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1,
+1, 16, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1,
+4, 2, 1, 1, 2, 1, 1 };
+
+const unsigned int strat_Bob[MAX_Bob-1] = { 
+109, 58, 27, 12, 5, 2, 1, 1, 3, 1, 2, 1,
+7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 8, 4, 2,
+1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 31, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 8, 4,
+2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1,
+1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 58, 27, 12, 7, 3, 1, 2, 1, 4, 2,
+1, 2, 1, 1, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1, 1, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2, 1,
+1, 8, 4, 2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 31, 15, 7, 3, 1, 2, 1, 4, 2, 1, 2,
+1, 1, 8, 4, 2, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 2, 1, 1, 4, 2,
+1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 };
+           
+// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions
+#define fpcopy                        fpcopy697
+#define fpzero                        fpzero697
+#define fpadd                         fpadd697
+#define fpsub                         fpsub697
+#define fpneg                         fpneg697
+#define fpdiv2                        fpdiv2_697
+#define fpcorrection                  fpcorrection697
+#define fpmul_mont                    fpmul697_mont
+#define fpsqr_mont                    fpsqr697_mont
+#define fpinv_mont                    fpinv697_mont
+#define fpinv_chain_mont              fpinv697_chain_mont
+#define fp2copy                       fp2copy697
+#define fp2zero                       fp2zero697
+#define fp2add                        fp2add697
+#define fp2sub                        fp2sub697
+#define mp_sub_p2                     mp_sub697_p2
+#define mp_sub_p4                     mp_sub697_p4
+#define sub_p4                        mp_sub_p4
+#define fp2neg                        fp2neg697
+#define fp2div2                       fp2div2_697
+#define fp2correction                 fp2correction697
+#define fp2mul_mont                   fp2mul697_mont
+#define fp2sqr_mont                   fp2sqr697_mont
+#define fp2inv_mont                   fp2inv697_mont
+#define fp2inv_mont_ct                fp2inv697_mont_ct
+#define fp2inv_mont_bingcd            fp2inv697_mont_bingcd
+#define mp_add_asm                    mp_add697_asm
+#define mp_subaddx2_asm               mp_subadd697x2_asm
+#define mp_dblsubx2_asm               mp_dblsub697x2_asm
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp697
+#define crypto_kem_enc                crypto_kem_enc_SIKEp697
+#define crypto_kem_dec                crypto_kem_dec_SIKEp697
+#define random_mod_order_A            random_mod_order_A_SIDHp697
+#define random_mod_order_B            random_mod_order_B_SIDHp697
+#define EphemeralKeyGeneration_A      EphemeralKeyGeneration_A_SIDHp697
+#define EphemeralKeyGeneration_B      EphemeralKeyGeneration_B_SIDHp697
+#define EphemeralSecretAgreement_A    EphemeralSecretAgreement_A_SIDHp697
+#define EphemeralSecretAgreement_B    EphemeralSecretAgreement_B_SIDHp697
+
+#include "../fpx.c"
+#include "../ec_isogeny.c"
+#include "../sidh.c"    
+#include "../sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/src/P697/P697_api.h b/SIKE_sw/src/P697/P697_api.h
new file mode 100644
index 0000000..25c3782
--- /dev/null
+++ b/SIKE_sw/src/P697/P697_api.h
@@ -0,0 +1,112 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: API header file for P697
+*********************************************************************************************/  
+
+#ifndef P697_API_H
+#define P697_API_H
+    
+
+/*********************** Key encapsulation mechanism API ***********************/
+
+#define CRYPTO_SECRETKEYBYTES     603    // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes
+#define CRYPTO_PUBLICKEYBYTES     528
+#define CRYPTO_BYTES               32
+#define CRYPTO_CIPHERTEXTBYTES    560    // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes  
+
+// Algorithm name
+#define CRYPTO_ALGNAME "SIKEp697"  
+
+// SIKE's key generation
+// It produces a private key sk and computes the public key pk.
+// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 603 bytes)
+//          public key pk (CRYPTO_PUBLICKEYBYTES = 528 bytes) 
+int crypto_kem_keypair_SIKEp697(unsigned char *pk, unsigned char *sk);
+
+// SIKE's encapsulation
+// Input:   public key pk         (CRYPTO_PUBLICKEYBYTES = 528 bytes)
+// Outputs: shared secret ss      (CRYPTO_BYTES = 32 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 560 bytes)
+int crypto_kem_enc_SIKEp697(unsigned char *ct, unsigned char *ss, const unsigned char *pk);
+
+// SIKE's decapsulation
+// Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = 603 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 560 bytes) 
+// Outputs: shared secret ss      (CRYPTO_BYTES = 32 bytes)
+int crypto_kem_dec_SIKEp697(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);
+
+
+// Encoding of keys for KEM-based isogeny system "SIKEp697" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p697) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p697^2), where a and b are defined over GF(p697), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys sk consist of the concatenation of a 32-byte random value, a value in the range [0, 2^Floor(Log(2,3^215))-1] and the public key pk. In the SIKE API, 
+// private keys are encoded in 603 octets in little endian format. 
+// Public keys pk consist of 3 elements in GF(p697^2). In the SIKE API, pk is encoded in 528 octets. 
+// Ciphertexts ct consist of the concatenation of a public key value and a 32-byte value. In the SIKE API, ct is encoded in 528 + 32 = 560 octets.  
+// Shared keys ss consist of a value of 32 octets.
+
+
+/*********************** Ephemeral key exchange API ***********************/
+
+#define SIDH_SECRETKEYBYTES_A    45
+#define SIDH_SECRETKEYBYTES_B    43
+#define SIDH_PUBLICKEYBYTES     528
+#define SIDH_BYTES              176
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016.
+// Extended version available at: http://eprint.iacr.org/2016/859  
+
+// Generation of Alice's secret key 
+// Outputs random value in [0, 2^356 - 1] to be used as Alice's private key
+void random_mod_order_A_SIDHp697(unsigned char* random_digits);
+
+// Generation of Bob's secret key 
+// Outputs random value in [0, 2^Floor(Log(2,3^215)) - 1] to be used as Bob's private key
+void random_mod_order_B_SIDHp697(unsigned char* random_digits);
+
+// Alice's ephemeral public key generation
+// Input:  a private key PrivateKeyA in the range [0, 2^356 - 1], stored in 45 bytes. 
+// Output: the public key PublicKeyA consisting of 3 GF(p697^2) elements encoded in 528 bytes.
+int EphemeralKeyGeneration_A_SIDHp697(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^215)) - 1], stored in 43 bytes. 
+// The public key consists of 3 GF(p697^2) elements encoded in 528 bytes.
+int EphemeralKeyGeneration_B_SIDHp697(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^356 - 1], stored in 45 bytes. 
+//         Bob's PublicKeyB consists of 3 GF(p697^2) elements encoded in 528 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p697^2) encoded in 176 bytes.
+int EphemeralSecretAgreement_A_SIDHp697(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^215)) - 1], stored in 43 bytes. 
+//         Alice's PublicKeyA consists of 3 GF(p697^2) elements encoded in 528 bytes.
+// Output: a shared secret SharedSecretB that consists of one element in GF(p697^2) encoded in 176 bytes.
+int EphemeralSecretAgreement_B_SIDHp697(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB);
+
+
+// Encoding of keys for KEX-based isogeny system "SIDHp697" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p697) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p697^2), where a and b are defined over GF(p697), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^356-1] and [0, 2^Floor(Log(2,3^215)) - 1], resp. In the SIDH API, 
+// Alice's and Bob's private keys are encoded in 45 and 43 octets, resp., in little endian format. 
+// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p697^2). In the SIDH API, they are encoded in 528 octets. 
+// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p697^2). In the SIDH API, they are encoded in 176 octets.
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/P697/P697_internal.h b/SIKE_sw/src/P697/P697_internal.h
new file mode 100644
index 0000000..f01c560
--- /dev/null
+++ b/SIKE_sw/src/P697/P697_internal.h
@@ -0,0 +1,175 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for P697
+*********************************************************************************************/  
+
+#ifndef P697_INTERNAL_H
+#define P697_INTERNAL_H
+
+#include "../config.h"
+ 
+
+#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64)
+    #define NWORDS_FIELD    11              // Number of words of a 697-bit field element
+    #define p697_ZERO_WORDS 5               // Number of "0" digits in the least significant part of p697 + 1    
+#elif (TARGET == TARGET_x86)
+    #define NWORDS_FIELD    22
+    #define p697_ZERO_WORDS 11
+#endif
+    
+
+// Basic constants
+
+#define NBITS_FIELD             697  
+#define MAXBITS_FIELD           704                
+#define MAXWORDS_FIELD          ((MAXBITS_FIELD+RADIX-1)/RADIX)     // Max. number of words to represent field elements
+#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 697-bit field element 
+#define NBITS_ORDER             384
+#define NWORDS_ORDER            ((NBITS_ORDER+RADIX-1)/RADIX)       // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)               // Number of 64-bit words of a 256-bit element 
+#define MAXBITS_ORDER           NBITS_ORDER
+#define ALICE                   0
+#define BOB                     1 
+#define OALICE_BITS             356  
+#define OBOB_BITS               341     
+#define OBOB_EXPON              215    
+#define MASK_ALICE              0x0F 
+#define MASK_BOB                0x0F 
+#define PRIME                   p697 
+#define PARAM_A                 6  
+#define PARAM_C                 1
+// Fixed parameters for isogeny tree computation
+#define MAX_INT_POINTS_ALICE    8
+#define MAX_INT_POINTS_BOB      9
+#define MAX_Alice               178
+#define MAX_Bob                 215
+#define MSG_BYTES               32
+#define SECRETKEY_A_BYTES       ((OALICE_BITS + 7) / 8)
+#define SECRETKEY_B_BYTES       ((OBOB_BITS - 1 + 7) / 8)
+#define FP2_ENCODED_BYTES       2*((NBITS_FIELD + 7) / 8)
+
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t felm_t[NWORDS_FIELD];                                 // Datatype for representing 697-bit field elements (704-bit max.)
+typedef digit_t dfelm_t[2*NWORDS_FIELD];                              // Datatype for representing double-precision 2x697-bit field elements (2x704-bit max.) 
+typedef felm_t  f2elm_t[2];                                           // Datatype for representing quadratic extension field elements GF(p697^2)
+        
+typedef struct { f2elm_t X; f2elm_t Z; } point_proj;                  // Point representation in projective XZ Montgomery coordinates.
+typedef point_proj point_proj_t[1]; 
+
+#ifdef COMPRESS
+    typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj;  // Point representation in full projective XYZ Montgomery coordinates 
+    typedef point_full_proj point_full_proj_t[1]; 
+
+    typedef struct { f2elm_t x; f2elm_t y; } point_affine;                // Point representation in affine coordinates.
+    typedef point_affine point_t[1]; 
+
+    typedef f2elm_t publickey_t[3];      
+#endif
+
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// 697-bit multiprecision addition, c = a+b
+void mp_add697(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_add697_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// 697-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p
+extern void mp_sub697_p2(const digit_t* a, const digit_t* b, digit_t* c);
+extern void mp_sub697_p4(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_sub697_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+void mp_sub697_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+
+// 2x697-bit multiprecision subtraction followed by addition with p697*2^704, c = a-b+(p697*2^704) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_subadd697x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Double 2x697-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
+void mp_dblsub697x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void fpcopy697(const digit_t* a, digit_t* c);
+
+// Zeroing a field element, a = 0
+void fpzero697(digit_t* a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool fpequal697_non_constant_time(const digit_t* a, const digit_t* b); 
+
+// Modular addition, c = a+b mod p697
+extern void fpadd697(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpadd697_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular subtraction, c = a-b mod p697
+extern void fpsub697(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpsub697_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular negation, a = -a mod p697        
+extern void fpneg697(digit_t* a);  
+
+// Modular division by two, c = a/2 mod p697.
+void fpdiv2_697(const digit_t* a, digit_t* c);
+
+// Modular correction to reduce field element a in [0, 2*p697-1] to [0, p697-1].
+void fpcorrection697(digit_t* a);
+
+// 697-bit Montgomery reduction, c = a mod p
+void rdc697_asm(digit_t* ma, digit_t* mc);
+            
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p697, where R=2^768
+void fpmul697_mont(const digit_t* a, const digit_t* b, digit_t* c);
+void mul697_asm(const digit_t* a, const digit_t* b, digit_t* c);
+   
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p697, where R=2^768
+void fpsqr697_mont(const digit_t* ma, digit_t* mc);
+
+// Field inversion, a = a^-1 in GF(p697)
+void fpinv697_mont(digit_t* a);
+
+// Chain to compute (p697-3)/4 using Montgomery arithmetic
+void fpinv697_chain_mont(digit_t* a);
+
+/************ GF(p^2) arithmetic functions *************/
+    
+// Copy of a GF(p697^2) element, c = a
+void fp2copy697(const f2elm_t a, f2elm_t c);
+
+// Zeroing a GF(p697^2) element, a = 0
+void fp2zero697(f2elm_t a);
+
+// GF(p697^2) negation, a = -a in GF(p697^2)
+void fp2neg697(f2elm_t a);
+
+// GF(p697^2) addition, c = a+b in GF(p697^2)
+extern void fp2add697(const f2elm_t a, const f2elm_t b, f2elm_t c);           
+
+// GF(p697^2) subtraction, c = a-b in GF(p697^2)
+extern void fp2sub697(const f2elm_t a, const f2elm_t b, f2elm_t c); 
+
+// GF(p697^2) division by two, c = a/2  in GF(p697^2) 
+void fp2div2_697(const f2elm_t a, f2elm_t c);
+
+// Modular correction, a = a in GF(p697^2)
+void fp2correction697(f2elm_t a);
+            
+// GF(p697^2) squaring using Montgomery arithmetic, c = a^2 in GF(p697^2)
+void fp2sqr697_mont(const f2elm_t a, f2elm_t c);
+ 
+// GF(p697^2) multiplication using Montgomery arithmetic, c = a*b in GF(p697^2)
+void fp2mul697_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+
+// GF(p697^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void fp2inv697_mont(f2elm_t a);
+
+
+#endif
diff --git a/SIKE_sw/src/P697/generic/fp_generic.c b/SIKE_sw/src/P697/generic/fp_generic.c
new file mode 100644
index 0000000..421dfa8
--- /dev/null
+++ b/SIKE_sw/src/P697/generic/fp_generic.c
@@ -0,0 +1,259 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: portable modular arithmetic for P697
+*********************************************************************************************/
+
+#include "../P697_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p697[NWORDS64_FIELD];
+extern const uint64_t p697p1[NWORDS64_FIELD]; 
+extern const uint64_t p697x2[NWORDS64_FIELD];
+extern const uint64_t p697x4[NWORDS64_FIELD];
+
+
+__inline void mp_sub697_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p697x2)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void mp_sub697_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p697x4)[i], borrow, c[i]); 
+    }
+}
+
+
+__inline void fpadd697(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p697.
+  // Inputs: a, b in [0, 2*p697-1] 
+  // Output: c in [0, 2*p697-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p697x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p697x2)[i] & mask, carry, c[i]); 
+    }
+} 
+
+
+__inline void fpsub697(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p697.
+  // Inputs: a, b in [0, 2*p697-1] 
+  // Output: c in [0, 2*p697-1] 
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p697x2)[i] & mask, borrow, c[i]); 
+    }
+}
+
+
+__inline void fpneg697(digit_t* a)
+{ // Modular negation, a = -a mod p697.
+  // Input/output: a in [0, 2*p697-1] 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p697x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_697(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p697.
+  // Input : a in [0, 2*p697-1] 
+  // Output: c in [0, 2*p697-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p697
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p697)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+} 
+
+
+void fpcorrection697(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p697-1] to [0, p697-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p697)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p697)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result    
+    register digit_t al, ah, bl, bh, temp;
+    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
+
+    al = a & mask_low;                        // Low part
+    ah = a >> (sizeof(digit_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(digit_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                   // C00
+
+    res1 = albl >> (sizeof(digit_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;  
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(digit_t) * 4);
+    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   
+
+    res1 = ahbl >> (sizeof(digit_t) * 4);
+    res2 = albh >> (sizeof(digit_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                   // C10 
+    carry = temp & mask_high; 
+    c[1] ^= (ahbh & mask_high) + carry;       // C11
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
+    unsigned int i, j;
+    digit_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+    
+    for (i = 0; i < nwords; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+
+    for (i = nwords; i < 2*nwords-1; i++) {
+        for (j = i-nwords+1; j < nwords; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+    c[2*nwords-1] = v; 
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p697.
+  // mc = ma*R^-1 mod p697x2, where R = 2^704.
+  // If ma < 2^704*p697, the output mc is in the range [0, 2*p697-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = p697_ZERO_WORDS;
+    digit_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-p697_ZERO_WORDS+1)) { 
+                MUL(mc[j], ((digit_t*)p697p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry; 
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }    
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) { 
+                MUL(mc[j], ((digit_t*)p697p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); 
+    mc[NWORDS_FIELD-1] = v;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P751/AMD64/fp_x64.c b/SIKE_sw/src/P751/AMD64/fp_x64.c
new file mode 100644
index 0000000..9dd2078
--- /dev/null
+++ b/SIKE_sw/src/P751/AMD64/fp_x64.c
@@ -0,0 +1,910 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: modular arithmetic optimized for x64 platforms for P751
+*********************************************************************************************/
+
+#include "../P751_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p751[NWORDS_FIELD];
+extern const uint64_t p751p1[NWORDS_FIELD]; 
+extern const uint64_t p751x2[NWORDS_FIELD];
+extern const uint64_t p751x4[NWORDS_FIELD];
+
+
+__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p751x2)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub751_p2_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p751x4)[i], borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    mp_sub751_p4_asm(a, b, c);    
+
+#endif
+}   
+
+
+__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p751.
+  // Inputs: a, b in [0, 2*p751-1] 
+  // Output: c in [0, 2*p751-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p751x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p751x2)[i] & mask, carry, c[i]); 
+    } 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpadd751_asm(a, b, c);    
+
+#endif
+} 
+
+
+__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p751.
+  // Inputs: a, b in [0, 2*p751-1] 
+  // Output: c in [0, 2*p751-1] 
+    
+#if (OS_TARGET == OS_WIN)
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p751x2)[i] & mask, borrow, c[i]); 
+    }
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    fpsub751_asm(a, b, c);    
+
+#endif
+}
+
+
+__inline void fpneg751(digit_t* a)
+{ // Modular negation, a = -a mod p751.
+  // Input/output: a in [0, 2*p751-1] 
+    unsigned int i, borrow = 0;
+    
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p751x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_751(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p751.
+  // Input : a in [0, 2*p751-1] 
+  // Output: c in [0, 2*p751-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p751
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p751)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+}  
+
+
+void fpcorrection751(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p751)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p751)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+        
+    (void)nwords;
+
+#if (OS_TARGET == OS_WIN)
+    digit_t t = 0;
+    uint128_t uv = {0};
+    unsigned int carry = 0;
+        
+    MULADD128(a[0], b[0], uv, carry, uv);
+    t += carry;
+    c[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[0], uv, carry, uv);
+    t += carry;
+    c[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[0], uv, carry, uv);
+    t += carry;
+    c[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[0], uv, carry, uv);
+    t += carry;
+    c[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[0], uv, carry, uv);
+    t += carry;
+    c[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[0], uv, carry, uv);
+    t += carry;
+    c[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[0], uv, carry, uv);
+    t += carry;
+    c[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[0], uv, carry, uv);
+    t += carry;
+    c[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[0], uv, carry, uv);
+    t += carry;
+    c[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[0], uv, carry, uv);
+    t += carry;
+    c[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[0], uv, carry, uv);
+    t += carry;
+    c[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[0], b[11], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[1], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[1], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[11], b[0], uv, carry, uv);
+    t += carry;
+    c[11] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;    
+    
+    MULADD128(a[1], b[11], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[11], b[1], uv, carry, uv);
+    t += carry;
+    c[12] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[2], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[2], b[11], uv, carry, uv);
+    t += carry;
+    c[13] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[3], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[3], b[11], uv, carry, uv);
+    t += carry;
+    c[14] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[4], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[4], b[11], uv, carry, uv);
+    t += carry;
+    c[15] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[5], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[5], b[11], uv, carry, uv);
+    t += carry;
+    c[16] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[6], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[6], b[11], uv, carry, uv);
+    t += carry;
+    c[17] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[7], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[7], b[11], uv, carry, uv);
+    t += carry;
+    c[18] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[8], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[8], b[11], uv, carry, uv);
+    t += carry;
+    c[19] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[9], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[9], b[11], uv, carry, uv);
+    t += carry;
+    c[20] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+    
+    MULADD128(a[11], b[10], uv, carry, uv);
+    t += carry;
+    MULADD128(a[10], b[11], uv, carry, uv);
+    t += carry;
+    c[21] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    
+    MULADD128(a[11], b[11], uv, carry, uv);
+    c[22] = uv[0];
+    c[23] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+    
+    mul751_asm(a, b, c);
+
+#endif
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Montgomery reduction exploiting special form of the prime.
+  // mc = ma*R^-1 mod p751x2, where R = 2^768.
+  // If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
+  // ma is assumed to be in Montgomery representation.
+        
+#if (OS_TARGET == OS_WIN)
+    unsigned int carry;
+    digit_t t = 0;
+    uint128_t uv = {0};
+    
+    mc[0] = ma[0];
+    mc[1] = ma[1];
+    mc[2] = ma[2];
+    mc[3] = ma[3];
+    mc[4] = ma[4];
+    MUL128(mc[0], ((digit_t*)p751p1)[5], uv);
+    ADDC(0, uv[0], ma[5], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = 0;
+
+    MULADD128(mc[0], ((digit_t*)p751p1)[6], uv, carry, uv);
+    MULADD128(mc[1], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[6], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[7], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[8], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[9], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[10], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[10] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[0], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[1], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[11], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[11] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[1], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[2], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[12], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[0] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[2], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[3], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[13], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[1] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[3], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[4], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[14], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[2] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[4], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[5], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[15], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[3] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[5], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[6], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[11], ((digit_t*)p751p1)[5], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[16], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[4] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[6], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[7], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[11], ((digit_t*)p751p1)[6], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[17], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[5] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[7], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[8], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[11], ((digit_t*)p751p1)[7], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[18], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[6] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[8], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[9], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[11], ((digit_t*)p751p1)[8], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[19], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[7] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[9], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[10], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[11], ((digit_t*)p751p1)[9], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[20], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[8] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[10], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    MULADD128(mc[11], ((digit_t*)p751p1)[10], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[21], carry, uv[0]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    t += carry; 
+    mc[9] = uv[0];
+    uv[0] = uv[1];
+    uv[1] = t;
+    t = 0;
+
+    MULADD128(mc[11], ((digit_t*)p751p1)[11], uv, carry, uv);
+    t += carry;
+    ADDC(0, uv[0], ma[22], carry, mc[10]); 
+    ADDC(carry, uv[1], 0, carry, uv[1]); 
+    ADDC(0, uv[1], ma[23], carry, mc[11]); 
+    
+#elif (OS_TARGET == OS_LINUX)                 
+    
+    rdc751_asm(ma, mc);    
+
+#endif
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/P751/AMD64/fp_x64_asm.S b/SIKE_sw/src/P751/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000..cca6992
--- /dev/null
+++ b/SIKE_sw/src/P751/AMD64/fp_x64_asm.S
@@ -0,0 +1,3147 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny cryptography library 
+// Copyright (c) Microsoft Corporation
+//
+// Website: https://github.com/microsoft/PQCrypto-SIDH
+// Released under MIT license 
+//
+// Abstract: field arithmetic in x64 assembly for P751 on Linux 
+//*******************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Format function and variable names for Mac OS X
+#if defined(__APPLE__)
+    #define fmt(f)    _##f
+#else
+    #define fmt(f)    f
+#endif
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpadd751_asm)
+fmt(fpadd751_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+  adc    rcx, [reg_p2+64] 
+  mov    rax, [reg_p1+72]
+  adc    rax, [reg_p2+72] 
+  mov    [reg_p3+72], rax
+  mov    rax, [reg_p1+80]
+  adc    rax, [reg_p2+80] 
+  mov    [reg_p3+80], rax
+  mov    rax, [reg_p1+88]
+  adc    rax, [reg_p2+88] 
+  mov    [reg_p3+88], rax
+
+  mov    rax, [rip+fmt(p751x2)]
+  sub    r8, rax
+  mov    rax, [rip+fmt(p751x2)+8]
+  sbb    r9, rax
+  sbb    r10, rax
+  sbb    r11, rax
+  sbb    r12, rax
+  mov    rax, [rip+fmt(p751x2)+40]
+  sbb    r13, rax
+  mov    rax, [rip+fmt(p751x2)+48]
+  sbb    r14, rax
+  mov    rax, [rip+fmt(p751x2)+56]
+  sbb    r15, rax
+  mov    rax, [rip+fmt(p751x2)+64]
+  sbb    rcx, rax
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+  mov    [reg_p3+64], rcx
+  mov    r8, [reg_p3+72]
+  mov    r9, [reg_p3+80]
+  mov    r10, [reg_p3+88]
+  mov    rax, [rip+fmt(p751x2)+72]
+  sbb    r8, rax
+  mov    rax, [rip+fmt(p751x2)+80]
+  sbb    r9, rax
+  mov    rax, [rip+fmt(p751x2)+88]
+  sbb    r10, rax
+  mov    [reg_p3+72], r8
+  mov    [reg_p3+80], r9
+  mov    [reg_p3+88], r10
+  mov    rax, 0
+  sbb    rax, 0
+  
+  mov    rsi, [rip+fmt(p751x2)]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p751x2)+8]
+  and    r8, rax
+  mov    r9, [rip+fmt(p751x2)+40]
+  and    r9, rax
+  mov    r10, [rip+fmt(p751x2)+48]
+  and    r10, rax
+  mov    r11, [rip+fmt(p751x2)+56]
+  and    r11, rax
+  mov    r12, [rip+fmt(p751x2)+64]
+  and    r12, rax
+  mov    r13, [rip+fmt(p751x2)+72]
+  and    r13, rax
+  mov    r14, [rip+fmt(p751x2)+80]
+  and    r14, rax
+  mov    r15, [rip+fmt(p751x2)+88]
+  and    r15, rax
+  
+  add    rsi, [reg_p3]  
+  mov    [reg_p3], rsi
+  mov    rax, [reg_p3+8]
+  adc    rax, r8 
+  mov    [reg_p3+8], rax  
+  mov    rax, [reg_p3+16]
+  adc    rax, r8 
+  mov    [reg_p3+16], rax
+  mov    rax, [reg_p3+24]  
+  adc    rax, r8 
+  mov    [reg_p3+24], rax 
+  mov    rax, [reg_p3+32]  
+  adc    rax, r8 
+  mov    [reg_p3+32], rax 
+  adc    r9, [reg_p3+40]
+  adc    r10, [reg_p3+48]
+  adc    r11, [reg_p3+56]
+  adc    r12, [reg_p3+64]
+  adc    r13, [reg_p3+72] 
+  adc    r14, [reg_p3+80]
+  adc    r15, [reg_p3+88]
+  mov    [reg_p3+40], r9 
+  mov    [reg_p3+48], r10 
+  mov    [reg_p3+56], r11 
+  mov    [reg_p3+64], r12  
+  mov    [reg_p3+72], r13 
+  mov    [reg_p3+80], r14 
+  mov    [reg_p3+88], r15 
+
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(fpsub751_asm)
+fmt(fpsub751_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  sbb    rcx, [reg_p2+64] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+  mov    [reg_p3+64], rcx
+  mov    rax, [reg_p1+72]
+  sbb    rax, [reg_p2+72] 
+  mov    [reg_p3+72], rax
+  mov    rax, [reg_p1+80]
+  sbb    rax, [reg_p2+80] 
+  mov    [reg_p3+80], rax
+  mov    rax, [reg_p1+88]
+  sbb    rax, [reg_p2+88] 
+  mov    [reg_p3+88], rax
+  mov    rax, 0
+  sbb    rax, 0
+  
+  mov    rsi, [rip+fmt(p751x2)]
+  and    rsi, rax
+  mov    r8, [rip+fmt(p751x2)+8]
+  and    r8, rax
+  mov    r9, [rip+fmt(p751x2)+40]
+  and    r9, rax
+  mov    r10, [rip+fmt(p751x2)+48]
+  and    r10, rax
+  mov    r11, [rip+fmt(p751x2)+56]
+  and    r11, rax
+  mov    r12, [rip+fmt(p751x2)+64]
+  and    r12, rax
+  mov    r13, [rip+fmt(p751x2)+72]
+  and    r13, rax
+  mov    r14, [rip+fmt(p751x2)+80]
+  and    r14, rax
+  mov    r15, [rip+fmt(p751x2)+88]
+  and    r15, rax
+  
+  mov    rax, [reg_p3]
+  add    rax, rsi  
+  mov    [reg_p3], rax
+  mov    rax, [reg_p3+8]
+  adc    rax, r8 
+  mov    [reg_p3+8], rax  
+  mov    rax, [reg_p3+16]
+  adc    rax, r8 
+  mov    [reg_p3+16], rax  
+  mov    rax, [reg_p3+24]  
+  adc    rax, r8 
+  mov    [reg_p3+24], rax 
+  mov    rax, [reg_p3+32]  
+  adc    rax, r8 
+  mov    [reg_p3+32], rax 
+  adc    r9, [reg_p3+40] 
+  adc    r10, [reg_p3+48] 
+  adc    r11, [reg_p3+56]
+  adc    r12, [reg_p3+64] 
+  adc    r13, [reg_p3+72]
+  adc    r14, [reg_p3+80]
+  adc    r15, [reg_p3+88]
+  mov    [reg_p3+40], r9 
+  mov    [reg_p3+48], r10
+  mov    [reg_p3+56], r11 
+  mov    [reg_p3+64], r12
+  mov    [reg_p3+72], r13 
+  mov    [reg_p3+80], r14  
+  mov    [reg_p3+88], r15 
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret 
+
+
+///////////////////////////////////////////////////////////////// MACRO
+.macro SUB751_PX  P0 
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40]
+  setc   al
+
+  mov    r14, [rip+\P0]
+  mov    r15, [rip+\P0+8]
+  add    r8, r14  
+  adc    r9, r15  
+  adc    r10, r15 
+  adc    r11, r15 
+  adc    r12, r15   
+  mov    r14, [rip+\P0+40]
+  adc    r13, r14   
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9 
+  mov    [reg_p3+16], r10 
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12 
+  mov    [reg_p3+40], r13
+  setc   cl
+
+  bt     rax, 0 
+  mov    r8, [reg_p1+48]
+  mov    r9, [reg_p1+56]
+  mov    r10, [reg_p1+64]
+  mov    r11, [reg_p1+72]
+  mov    r12, [reg_p1+80]
+  mov    r13, [reg_p1+88]
+  sbb    r8, [reg_p2+48] 
+  sbb    r9, [reg_p2+56] 
+  sbb    r10, [reg_p2+64] 
+  sbb    r11, [reg_p2+72] 
+  sbb    r12, [reg_p2+80] 
+  sbb    r13, [reg_p2+88] 
+
+  bt     rcx, 0
+  mov    r14, [rip+\P0+48]
+  mov    r15, [rip+\P0+56]
+  adc    r8, r14  
+  adc    r9, r15  
+  mov    r14, [rip+\P0+64]
+  mov    r15, [rip+\P0+72]
+  adc    r10, r14 
+  adc    r11, r15    
+  mov    r14, [rip+\P0+80]
+  mov    r15, [rip+\P0+88]
+  adc    r12, r14 
+  adc    r13, r15  
+  mov    [reg_p3+48], r8
+  mov    [reg_p3+56], r9 
+  mov    [reg_p3+64], r10 
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], r12 
+  mov    [reg_p3+88], r13
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  .endm
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 2*p751
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 2*p751
+//*********************************************************************** 
+.global fmt(mp_sub751_p2_asm)
+fmt(mp_sub751_p2_asm):
+
+  SUB751_PX  fmt(p751x2)
+  ret 
+
+
+//***********************************************************************
+//  Multiprecision subtraction with correction with 4*p751
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] + 4*p751
+//*********************************************************************** 
+.global fmt(mp_sub751_p4_asm)
+fmt(mp_sub751_p4_asm):
+
+  SUB751_PX  fmt(p751x4)
+  ret 
+
+
+#ifdef _MULX_
+
+/////////////////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory pointer C
+// Temps:   stack space for two 64-bit values (case w/o _ADX_), regs T0:T7
+///////////////////////////////////////////////////////////////////////////
+#ifdef _ADX_
+
+.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    adox   \T0, \T3               
+    adox   \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adox   \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T1, \T6        
+    mulx   \T3, \T7, 40\M1    
+    adox   \T5, \T7       
+    adox   \T3, rax        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adcx   \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T2, \T7 
+    adcx   \T4, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T4, \T6  
+    adcx   \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T1, \T5  
+    mulx   \T5, \T6, 32\M1     
+    adcx   \T3, \T5   
+    mulx   \T5, rdx, 40\M1
+    adcx   \T5, rax 
+        
+    adox   \T0, \T7  
+    adox   \T1, \T6  
+    adox   \T3, rdx  
+    adox   \T5, rax         
+    
+    mov    rdx, 16\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adcx   \T4, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T4, \T7 
+    adcx   \T0, \T6        
+    mulx   \T2, \T6, 16\M1
+    adox   \T0, \T6 
+    adcx   \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adcx   \T3, \T2  
+    mulx   \T2, \T6, 32\M1     
+    adcx   \T5, \T2   
+    mulx   \T2, rdx, 40\M1     
+    adcx   \T2, rax 
+         
+    adox   \T1, \T7  
+    adox   \T3, \T6  
+    adox   \T5, rdx 
+    adox   \T2, rax           
+    
+    mov    rdx, 24\M0 
+    mulx   \T6, \T7, \M1
+    xor    rax, rax 
+    adcx   \T4, \T7 
+    mov    24\C, \T4           // C3_final 
+    adcx   \T0, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T0, \T7
+    adcx   \T1, \T6        
+    mulx   \T4, \T6, 16\M1
+    adox   \T1, \T6  
+    adcx   \T3, \T4     
+    mulx   \T4, \T7, 24\M1   
+    adcx   \T5, \T4  
+    mulx   \T4, \T6, 32\M1     
+    adcx   \T2, \T4   
+    mulx   \T4, rdx, 40\M1     
+    adcx   \T4, rax
+        
+    adox   \T3, \T7  
+    adox   \T5, \T6  
+    adox   \T2, rdx  
+    adox   \T4, rax         
+    
+    mov    rdx, 32\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T0, \T7 
+    mov    32\C, \T0           // C4_final 
+    adcx   \T1, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T1, \T7 
+    adcx   \T3, \T6        
+    mulx   \T0, \T6, 16\M1 
+    adox   \T3, \T6 
+    adcx   \T5, \T0     
+    mulx   \T0, \T7, 24\M1   
+    adcx   \T2, \T0  
+    mulx   \T0, \T6, 32\M1     
+    adcx   \T4, \T0   
+    mulx   \T0, rdx, 40\M1     
+    adcx   \T0, rax 
+         
+    adox   \T5, \T7  
+    adox   \T2, \T6  
+    adox   \T4, rdx  
+    adox   \T0, rax           
+    
+    mov    rdx, 40\M0 
+    mulx   \T6, \T7, \M1 
+    xor    rax, rax
+    adcx   \T1, \T7 
+    mov    40\C, \T1           // C5_final 
+    adcx   \T3, \T6     
+    mulx   \T6, \T7, 8\M1
+    adox   \T3, \T7 
+    adcx   \T5, \T6        
+    mulx   \T1, \T6, 16\M1
+    adox   \T5, \T6 
+    adcx   \T2, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T4, \T1  
+    mulx   \T1, \T6, 32\M1     
+    adcx   \T0, \T1   
+    mulx   \T1, rdx, 40\M1     
+    adcx   \T1, rax 
+         
+    adox   \T2, \T7 
+    adox   \T4, \T6 
+    adox   \T0, rdx 
+    adox   \T1, rax 
+    mov    48\C, \T3 
+    mov    56\C, \T5 
+    mov    64\C, \T2 
+    mov    72\C, \T4
+    mov    80\C, \T0 
+    mov    88\C, \T1 
+.endm
+
+#else
+
+.macro MUL384_SCHOOL M0, M1, C, S, T0, T1, T2, T3, T4, T5, T6, T7 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    add    \T0, \T3               
+    adc    \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adc    \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adc    \T1, \T6        
+    mulx   \T3, \T7, 40\M1    
+    adc    \T5, \T7       
+    adc    \T3, rax        
+    
+    mov    rdx, 8\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adc    \T2, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T4, \T6        
+    mulx   \T0, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adc    \T1, \T5  
+    mulx   \T5, \T6, 32\M1     
+    adc    \T3, \T5   
+    mulx   \T5, rdx, 40\M1
+    adc    \T5, rax 
+        
+    xor    rax, rax
+    add    \T2, \S 
+    adc    \T4, 8\S  
+    adc    \T0, \T7  
+    adc    \T1, \T6  
+    adc    \T3, rdx  
+    adc    \T5, rax         
+    
+    mov    rdx, 16\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adc    \T4, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T0, \T6        
+    mulx   \T2, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adc    \T3, \T2  
+    mulx   \T2, \T6, 32\M1     
+    adc    \T5, \T2   
+    mulx   \T2, rdx, 40\M1     
+    adc    \T2, rax 
+        
+    xor    rax, rax
+    add    \T4, \S 
+    adc    \T0, 8\S  
+    adc    \T1, \T7  
+    adc    \T3, \T6  
+    adc    \T5, rdx 
+    adc    \T2, rax           
+    
+    mov    rdx, 24\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T4, \T7 
+    mov    24\C, \T4           // C3_final 
+    adc    \T0, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T1, \T6        
+    mulx   \T4, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T3, \T4     
+    mulx   \T4, \T7, 24\M1   
+    adc    \T5, \T4  
+    mulx   \T4, \T6, 32\M1     
+    adc    \T2, \T4   
+    mulx   \T4, rdx, 40\M1     
+    adc    \T4, rax
+        
+    xor    rax, rax
+    add    \T0, \S 
+    adc    \T1, 8\S  
+    adc    \T3, \T7  
+    adc    \T5, \T6  
+    adc    \T2, rdx  
+    adc    \T4, rax         
+    
+    mov    rdx, 32\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T0, \T7 
+    mov    32\C, \T0           // C4_final 
+    adc    \T1, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T3, \T6        
+    mulx   \T0, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T5, \T0     
+    mulx   \T0, \T7, 24\M1   
+    adc    \T2, \T0  
+    mulx   \T0, \T6, 32\M1     
+    adc    \T4, \T0   
+    mulx   \T0, rdx, 40\M1     
+    adc    \T0, rax 
+        
+    xor    rax, rax
+    add    \T1, \S 
+    adc    \T3, 8\S  
+    adc    \T5, \T7  
+    adc    \T2, \T6  
+    adc    \T4, rdx  
+    adc    \T0, rax           
+    
+    mov    rdx, 40\M0 
+    mulx   \T6, \T7, \M1 
+    add    \T1, \T7 
+    mov    40\C, \T1           // C5_final 
+    adc    \T3, \T6     
+    mulx   \T6, \T7, 8\M1
+    mov    \S, \T7             // store T7
+    adc    \T5, \T6        
+    mulx   \T1, \T6, 16\M1   
+    mov    8\S, \T6            // store T6 
+    adc    \T2, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adc    \T4, \T1  
+    mulx   \T1, \T6, 32\M1     
+    adc    \T0, \T1   
+    mulx   \T1, rdx, 40\M1     
+    adc    \T1, rax 
+        
+    add    \T3, \S 
+    adc    \T5, 8\S  
+    adc    \T2, \T7 
+    adc    \T4, \T6 
+    adc    \T0, rdx 
+    adc    \T1, 0 
+    mov    48\C, \T3 
+    mov    56\C, \T5 
+    mov    64\C, \T2 
+    mov    72\C, \T4
+    mov    80\C, \T0 
+    mov    88\C, \T1 
+.endm
+
+#endif
+
+
+//*****************************************************************************
+//  751-bit multiplication using Karatsuba (one level), schoolbook (two levels)
+//***************************************************************************** 
+.global fmt(mul751_asm)
+fmt(mul751_asm):    
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15
+    mov    rcx, reg_p3 
+
+    // [rsp] <- AH + AL, rax <- mask
+    xor    rax, rax
+    mov    r8, [reg_p1]
+    mov    r9, [reg_p1+8]
+    mov    r10, [reg_p1+16]
+    mov    r11, [reg_p1+24] 
+    mov    r12, [reg_p1+32] 
+    mov    r13, [reg_p1+40] 
+    push   rbx 
+    push   rbp
+    sub    rsp, 152
+    add    r8, [reg_p1+48]
+    adc    r9, [reg_p1+56]
+    adc    r10, [reg_p1+64]
+    adc    r11, [reg_p1+72]
+    adc    r12, [reg_p1+80]
+    adc    r13, [reg_p1+88]
+    sbb    rax, 0
+    mov    [rsp], r8
+    mov    [rsp+8], r9
+    mov    [rsp+16], r10
+    mov    [rsp+24], r11
+    mov    [rsp+32], r12
+    mov    [rsp+40], r13
+
+    // [rsp+48] <- BH + BL, rdx <- mask
+    xor    rdx, rdx
+    mov    r8, [reg_p2]
+    mov    r9, [reg_p2+8]
+    mov    rbx, [reg_p2+16]
+    mov    rbp, [reg_p2+24] 
+    mov    r14, [reg_p2+32]     
+    mov    r15, [reg_p2+40]     
+    add    r8, [reg_p2+48]
+    adc    r9, [reg_p2+56]
+    adc    rbx, [reg_p2+64]
+    adc    rbp, [reg_p2+72]
+    adc    r14, [reg_p2+80]
+    adc    r15, [reg_p2+88]
+    sbb    rdx, 0
+    mov    [rsp+48], r8
+    mov    [rsp+56], r9
+    mov    [rsp+64], rbx
+    mov    [rsp+72], rbp
+    mov    [rsp+80], r14     
+    mov    [rsp+88], r15     
+    
+    // [rcx] <- masked (BH + BL)
+    and    r8, rax
+    and    r9, rax
+    and    rbx, rax
+    and    rbp, rax
+    and    r14, rax     
+    and    r15, rax     
+    mov    [rcx], r8
+    mov    [rcx+8], r9
+
+    // r8-r13 <- masked (AH + AL)
+    mov    r8, [rsp]
+    mov    r9, [rsp+8]
+    and    r8, rdx
+    and    r9, rdx
+    and    r10, rdx
+    and    r11, rdx
+    and    r12, rdx
+    and    r13, rdx
+
+    // [rsp+96] <- masked (AH + AL) + masked (AH + AL)
+    mov    rax, [rcx]
+    mov    rdx, [rcx+8]
+    add    r8, rax
+    adc    r9, rdx
+    adc    r10, rbx
+    adc    r11, rbp
+    adc    r12, r14         
+    adc    r13, r15         
+    mov    [rsp+96], r8
+    mov    [rsp+104], r9
+    mov    [rsp+112], r10
+    mov    [rsp+120], r11
+
+    // [rcx] <- AL x BL
+    MUL384_SCHOOL  [reg_p1], [reg_p2], [rcx], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15     // Result C0-C5 
+
+    // [rcx+96] <- (AH+AL) x (BH+BL), low part 
+    MUL384_SCHOOL  [rsp], [rsp+48], [rcx+96], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15
+
+    // [rsp] <- AH x BH 
+    MUL384_SCHOOL  [reg_p1+48], [reg_p2+48], [rsp], [rsp+128], r8, r9, r10, r11, rbx, rbp, r14, r15
+    
+    // r8-r13 <- (AH+AL) x (BH+BL), final step
+    mov    r8, [rsp+96]
+    mov    r9, [rsp+104]
+    mov    r10, [rsp+112]
+    mov    r11, [rsp+120]
+    mov    rax, [rcx+144]
+    add    r8, rax
+    mov    rax, [rcx+152]
+    adc    r9, rax
+    mov    rax, [rcx+160]
+    adc    r10, rax
+    mov    rax, [rcx+168]
+    adc    r11, rax
+    mov    rax, [rcx+176]
+    adc    r12, rax
+    mov    rax, [rcx+184]
+    adc    r13, rax
+    
+    // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL
+    mov    rdi, [rcx+96]
+    sub    rdi, [rcx]
+    mov    rdx, [rcx+104]
+    sbb    rdx, [rcx+8]
+    mov    rbx, [rcx+112]
+    sbb    rbx, [rcx+16]
+    mov    rbp, [rcx+120]
+    sbb    rbp, [rcx+24]
+    mov    r14, [rcx+128]     
+    sbb    r14, [rcx+32]   
+    mov    r15, [rcx+136]     
+    sbb    r15, [rcx+40]     
+    sbb    r8, [rcx+48]
+    sbb    r9, [rcx+56]
+    sbb    r10, [rcx+64]
+    sbb    r11, [rcx+72]
+    sbb    r12, [rcx+80]
+    sbb    r13, [rcx+88]
+    
+    // rdi,rdx,rbx,rbp,r14,r15,r8-r13 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
+    sub    rdi, [rsp]
+    sbb    rdx, [rsp+8]
+    sbb    rbx, [rsp+16]
+    sbb    rbp, [rsp+24]
+    sbb    r14, [rsp+32]     
+    sbb    r15, [rsp+40]   
+    sbb    r8, [rsp+48]
+    sbb    r9, [rsp+56]
+    sbb    r10, [rsp+64]
+    sbb    r11, [rsp+72]
+    sbb    r12, [rsp+80]
+    sbb    r13, [rsp+88]
+    
+    mov    rax, [rcx+48]
+    add    rax, rdi
+    mov    [rcx+48], rax    // Result C6-C11
+    mov    rax, [rcx+56]
+    adc    rax, rdx
+    mov    [rcx+56], rax 
+    mov    rax, [rcx+64]
+    adc    rax, rbx
+    mov    [rcx+64], rax 
+    mov    rax, [rcx+72]
+    adc    rax, rbp
+    mov    [rcx+72], rax 
+    mov    rax, [rcx+80]
+    adc    rax, r14           
+    mov    [rcx+80], rax 
+    mov    rax, [rcx+88]
+    adc    rax, r15             
+    mov    [rcx+88], rax
+    mov    rax, [rsp]
+    adc    r8, rax 
+    mov    [rcx+96], r8    // Result C8-C15
+    mov    rax, [rsp+8]
+    adc    r9, rax
+    mov    [rcx+104], r9 
+    mov    rax, [rsp+16]
+    adc    r10, rax
+    mov    [rcx+112], r10 
+    mov    rax, [rsp+24]
+    adc    r11, rax
+    mov    [rcx+120], r11 
+    mov    rax, [rsp+32]
+    adc    r12, rax
+    mov    [rcx+128], r12 
+    mov    rax, [rsp+40]
+    adc    r13, rax
+    mov    [rcx+136], r13
+    mov    r8, [rsp+48]
+    mov    r9, [rsp+56]
+    mov    r10, [rsp+64]
+    mov    r11, [rsp+72]
+    mov    r12, [rsp+80]
+    mov    r13, [rsp+88]
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    adc    r12, 0
+    adc    r13, 0
+    add    rsp, 152   
+    mov    [rcx+144], r8 
+    mov    [rcx+152], r9 
+    mov    [rcx+160], r10 
+    mov    [rcx+168], r11 
+    mov    [rcx+176], r12 
+    mov    [rcx+184], r13 
+     
+    pop    rbp  
+    pop    rbx
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    ret
+
+#else
+
+//***********************************************************************
+//  Integer multiplication
+//  Based on Karatsuba method
+//  Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
+//  NOTE: a=c or b=c are not allowed
+//***********************************************************************
+.global fmt(mul751_asm)
+fmt(mul751_asm):
+  push   r12
+  push   r13
+  push   r14
+  mov    rcx, reg_p3
+  
+  // rcx[0-5] <- AH+AL
+  xor    rax, rax
+  mov    r8, [reg_p1+48]
+  mov    r9, [reg_p1+56]
+  mov    r10, [reg_p1+64]
+  mov    r11, [reg_p1+72]
+  mov    r12, [reg_p1+80]
+  mov    r13, [reg_p1+88]
+  add    r8, [reg_p1] 
+  adc    r9, [reg_p1+8] 
+  adc    r10, [reg_p1+16] 
+  adc    r11, [reg_p1+24] 
+  adc    r12, [reg_p1+32] 
+  adc    r13, [reg_p1+40] 
+  push   r15  
+  mov    [rcx], r8
+  mov    [rcx+8], r9
+  mov    [rcx+16], r10
+  mov    [rcx+24], r11
+  mov    [rcx+32], r12
+  mov    [rcx+40], r13
+  sbb    rax, 0 
+  sub    rsp, 96           // Allocating space in stack
+       
+  // rcx[6-11] <- BH+BL
+  xor    rdx, rdx
+  mov    r8, [reg_p2+48]
+  mov    r9, [reg_p2+56]
+  mov    r10, [reg_p2+64]
+  mov    r11, [reg_p2+72]
+  mov    r12, [reg_p2+80]
+  mov    r13, [reg_p2+88]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  mov    [rcx+48], r8
+  mov    [rcx+56], r9
+  mov    [rcx+64], r10
+  mov    [rcx+72], r11
+  mov    [rcx+80], r12
+  mov    [rcx+88], r13
+  sbb    rdx, 0 
+  mov    [rsp+80], rax
+  mov    [rsp+88], rdx
+  
+  // (rsp[0-8],r10,r8,r9) <- (AH+AL)*(BH+BL)
+  mov    r11, [rcx]
+  mov    rax, r8 
+  mul    r11
+  mov    [rsp], rax        // c0
+  mov    r14, rdx
+  
+  xor    r15, r15
+  mov    rax, r9
+  mul    r11
+  xor    r9, r9
+  add    r14, rax
+  adc    r9, rdx
+  
+  mov    r12, [rcx+8] 
+  mov    rax, r8 
+  mul    r12
+  add    r14, rax
+  mov    [rsp+8], r14      // c1 
+  adc    r9, rdx
+  adc    r15, 0
+  
+  xor    r8, r8
+  mov    rax, r10 
+  mul    r11
+  add    r9, rax
+  mov    r13, [rcx+48] 
+  adc    r15, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+16] 
+  mul    r13
+  add    r9, rax
+  adc    r15, rdx 
+  mov    rax, [rcx+56] 
+  adc    r8, 0
+  
+  mul    r12
+  add    r9, rax
+  mov    [rsp+16], r9      // c2 
+  adc    r15, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rcx+72] 
+  mul    r11
+  add    r15, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+24] 
+  mul    r13
+  add    r15, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, r10 
+  mul    r12
+  add    r15, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r14, [rcx+16] 
+  mov    rax, [rcx+56] 
+  mul    r14
+  add    r15, rax
+  mov    [rsp+24], r15     // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rcx+80] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+64] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r15, [rcx+48] 
+  mov    rax, [rcx+32] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r13, [rcx+24] 
+  mov    rax, [rcx+56] 
+  mul    r13
+  add    r8, rax
+  mov    [rsp+32], r8      // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rcx+88] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+40] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+80] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r15, [rcx+32] 
+  mov    rax, [rcx+56] 
+  mul    r15
+  add    r9, rax
+  mov    [rsp+40], r9      // c5 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rcx+64] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+88] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+80] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r11, [rcx+40] 
+  mov    rax, [rcx+56] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r13
+  add    r10, rax
+  mov    [rsp+48], r10     // c6 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rcx+88] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+64] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+80]
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r15
+  add    r8, rax
+  mov    [rsp+56], r8      // c7 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rcx+72] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+80] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+88] 
+  mul    r13
+  add    r9, rax
+  mov    [rsp+64], r9      // c8 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rcx+88]
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [rcx+80] 
+  mul    r11
+  add    r10, rax          // c9 
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [rcx+88] 
+  mul    r11
+  add    r8, rax           // c10 
+  adc    r9, rdx           // c11 
+  
+  mov    rax, [rsp+88]
+  mov    rdx, [rcx]
+  and    r12, rax
+  and    r14, rax
+  and    rdx, rax
+  and    r13, rax
+  and    r15, rax
+  and    r11, rax
+  mov    rax, [rsp+48]
+  add    rdx, rax
+  mov    rax, [rsp+56]
+  adc    r12, rax
+  mov    rax, [rsp+64]
+  adc    r14, rax
+  adc    r13, r10
+  adc    r15, r8
+  adc    r11, r9
+  mov    rax, [rsp+80]
+  mov    [rsp+48], rdx
+  mov    [rsp+56], r12
+  mov    [rsp+64], r14
+  mov    [rsp+72], r13
+  mov    [rsp+80], r15
+  mov    [rsp+88], r11
+  
+  mov    r8, [rcx+48]
+  mov    r9, [rcx+56]
+  mov    r10, [rcx+64]
+  mov    r11, [rcx+72]
+  mov    r12, [rcx+80]
+  mov    r13, [rcx+88]
+  and    r8, rax
+  and    r9, rax
+  and    r10, rax
+  and    r11, rax
+  and    r12, rax
+  and    r13, rax
+  mov    rax, [rsp+48]
+  add    r8, rax
+  mov    rax, [rsp+56]
+  adc    r9, rax
+  mov    rax, [rsp+64]
+  adc    r10, rax
+  mov    rax, [rsp+72]
+  adc    r11, rax
+  mov    rax, [rsp+80]
+  adc    r12, rax
+  mov    rax, [rsp+88]
+  adc    r13, rax
+  mov    [rsp+48], r8
+  mov    [rsp+56], r9
+  mov    [rsp+72], r11
+  
+  // rcx[0-11] <- AL*BL
+  mov    r11, [reg_p1]
+  mov    rax, [reg_p2] 
+  mul    r11
+  xor    r9, r9
+  mov    [rcx], rax        // c0
+  mov    [rsp+64], r10
+  mov    r8, rdx
+
+  mov    rax, [reg_p2+8]
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  mov    [rsp+80], r12
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+8] 
+  mov    rax, [reg_p2] 
+  mul    r12
+  add    r8, rax
+  mov    [rcx+8], r8       // c1 
+  adc    r9, rdx
+  mov    [rsp+88], r13
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+16] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r13, [reg_p2] 
+  mov    rax, [reg_p1+16] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+8] 
+  mul    r12
+  add    r9, rax
+  mov    [rcx+16], r9      // c2 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+24] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p1+24] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r14, [reg_p1+16] 
+  mov    rax, [reg_p2+8] 
+  mul    r14
+  add    r10, rax
+  mov    [rcx+24], r10     // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+32] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p1+32] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r13, [reg_p1+24] 
+  mov    rax, [reg_p2+8] 
+  mul    r13
+  add    r8, rax
+  mov    [rcx+32], r8      // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+40] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r11, [reg_p1+40] 
+  mov    rax, [reg_p2] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+32] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r15, [reg_p1+32] 
+  mov    rax, [reg_p2+8] 
+  mul    r15
+  add    r9, rax
+  mov    [rcx+40], r9      // c5 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+16] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+40] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+32] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+8] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r13
+  add    r10, rax
+  mov    [rcx+48], r10     // c6 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+40] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+32]
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r15
+  add    r8, rax
+  mov    [rcx+56], r8      // c7 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+24] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+32] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+40] 
+  mul    r13
+  add    r9, rax
+  mov    [rcx+64], r9     // c8 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+40]
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [reg_p2+32] 
+  mul    r11
+  add    r10, rax
+  mov    [rcx+72], r10     // c9 
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [reg_p2+40] 
+  mul    r11
+  add    r8, rax
+  mov    [rcx+80], r8      // c10 
+  adc    r9, rdx   
+  mov    [rcx+88], r9      // c11 
+
+  // rcx[12-23] <- AH*BH
+  mov    r11, [reg_p1+48]
+  mov    rax, [reg_p2+48] 
+  mul    r11
+  xor    r9, r9
+  mov    [rcx+96], rax       // c0
+  mov    r8, rdx
+
+  mov    rax, [reg_p2+56]
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+56] 
+  mov    rax, [reg_p2+48] 
+  mul    r12
+  add    r8, rax
+  mov    [rcx+104], r8      // c1 
+  adc    r9, rdx
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+64] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r13, [reg_p2+48] 
+  mov    rax, [reg_p1+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+56] 
+  mul    r12
+  add    r9, rax
+  mov    [rcx+112], r9     // c2 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+72] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p1+72] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r14, [reg_p1+64] 
+  mov    rax, [reg_p2+56] 
+  mul    r14
+  add    r10, rax
+  mov    [rcx+120], r10    // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+80] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r15, [reg_p1+80] 
+  mov    rax, r13 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r13, [reg_p1+72] 
+  mov    rax, [reg_p2+56] 
+  mul    r13
+  add    r8, rax
+  mov    [rcx+128], r8     // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+88] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r11, [reg_p1+88] 
+  mov    rax, [reg_p2+48] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+80] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+56] 
+  mul    r15
+  add    r9, rax
+  mov    [rcx+136], r9     // c5 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+64] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+88] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+80] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+56] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r13
+  add    r10, rax
+  mov    [rcx+144], r10    // c6 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+88] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+80]
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r15
+  add    r8, rax
+  mov    [rcx+152], r8     // c7 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+72] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+80] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+88] 
+  mul    r13
+  add    r9, rax
+  mov    [rcx+160], r9     // c8 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+88]
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+
+  mov    rax, [reg_p2+80] 
+  mul    r11
+  add    r10, rax
+  mov    [rcx+168], r10     // c9 
+  adc    r8, rdx
+
+  mov    rax, [reg_p2+88] 
+  mul    r11
+  add    r8, rax
+  mov    [rcx+176], r8      // c10 
+  adc    rdx, 0   
+  mov    [rcx+184], rdx     // c11  
+      
+  // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL 
+  mov    r8,  [rsp]
+  sub    r8,  [rcx] 
+  mov    r9,  [rsp+8]
+  sbb    r9,  [rcx+8]
+  mov    r10, [rsp+16]
+  sbb    r10, [rcx+16]
+  mov    r11, [rsp+24]
+  sbb    r11, [rcx+24] 
+  mov    r12, [rsp+32]
+  sbb    r12, [rcx+32]
+  mov    r13, [rsp+40]
+  sbb    r13, [rcx+40] 
+  mov    r14, [rsp+48]
+  sbb    r14, [rcx+48] 
+  mov    r15, [rsp+56]
+  sbb    r15, [rcx+56] 
+  mov    rax, [rsp+64]
+  sbb    rax, [rcx+64]
+  mov    rdx, [rsp+72]
+  sbb    rdx, [rcx+72] 
+  mov    rdi, [rsp+80]
+  sbb    rdi, [rcx+80] 
+  mov    rsi, [rsp+88]
+  sbb    rsi, [rcx+88] 
+  mov    [rsp], rsi
+      
+  // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH
+  mov    rsi, [rcx+96]
+  sub    r8,  rsi 
+  mov    rsi, [rcx+104]
+  sbb    r9,  rsi
+  mov    rsi, [rcx+112]
+  sbb    r10, rsi
+  mov    rsi, [rcx+120]
+  sbb    r11, rsi 
+  mov    rsi, [rcx+128]
+  sbb    r12, rsi
+  mov    rsi, [rcx+136]
+  sbb    r13, rsi
+  mov    rsi, [rcx+144]
+  sbb    r14, rsi 
+  mov    rsi, [rcx+152]
+  sbb    r15, rsi 
+  mov    rsi, [rcx+160]
+  sbb    rax, rsi
+  mov    rsi, [rcx+168]
+  sbb    rdx, rsi
+  mov    rsi, [rcx+176] 
+  sbb    rdi, rsi
+  mov    rsi, [rsp] 
+  sbb    rsi, [rcx+184]
+      
+  // Final result
+  add    r8,  [rcx+48] 
+  mov    [rcx+48], r8
+  adc    r9,  [rcx+56]
+  mov    [rcx+56], r9
+  adc    r10, [rcx+64]
+  mov    [rcx+64], r10
+  adc    r11, [rcx+72]
+  mov    [rcx+72], r11
+  adc    r12, [rcx+80]
+  mov    [rcx+80], r12
+  adc    r13, [rcx+88]
+  mov    [rcx+88], r13
+  adc    r14, [rcx+96] 
+  mov    [rcx+96], r14
+  adc    r15, [rcx+104] 
+  mov    [rcx+104], r15
+  adc    rax, [rcx+112]
+  mov    [rcx+112], rax
+  adc    rdx, [rcx+120]
+  mov    [rcx+120], rdx
+  adc    rdi, [rcx+128]
+  mov    [rcx+128], rdi
+  adc    rsi, [rcx+136]
+  mov    [rcx+136], rsi  
+  mov    rax, [rcx+144]
+  adc    rax, 0
+  mov    [rcx+144], rax
+  mov    rax, [rcx+152]
+  adc    rax, 0
+  mov    [rcx+152], rax
+  mov    rax, [rcx+160]
+  adc    rax, 0
+  mov    [rcx+160], rax
+  mov    rax, [rcx+168]
+  adc    rax, 0
+  mov    [rcx+168], rax
+  mov    rax, [rcx+176]
+  adc    rax, 0
+  mov    [rcx+176], rax
+  mov    rax, [rcx+184]
+  adc    rax, 0
+  mov    [rcx+184], rax
+    
+  add    rsp, 96           // Restoring space in stack
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+#endif
+
+
+#ifdef _MULX_
+
+///////////////////////////////////////////////////////////////// MACRO
+// Schoolbook integer multiplication
+// Inputs:  memory pointers M0 and M1
+// Outputs: memory locations C, C+8, C+16, and regs T0:T7
+// Temps:   memory locations regs T7:T9
+/////////////////////////////////////////////////////////////////
+#ifdef _ADX_
+
+.macro MUL256x448_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    adox   \T0, \T3               
+    adox   \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adox   \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adox   \T1, \T6        
+    mulx   \T3, \T7, 40\M1    
+    adox   \T5, \T7          
+    mulx   \T6, \T8, 48\M1    
+    adox   \T3, \T8          
+    adox   \T6, rax  
+    
+    mov    rdx, 8\M0 
+    mulx   \T8, \T7, \M1 
+    xor    rax, rax 
+    adcx   \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adcx   \T2, \T8     
+    mulx   \T7, \T8, 8\M1
+    adox   \T2, \T8 
+    adcx   \T4, \T7        
+    mulx   \T0, \T8, 16\M1  
+    adox   \T4, \T8  
+    adcx   \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adcx   \T1, \T5  
+    mulx   \T5, \T8, 32\M1     
+    adcx   \T3, \T5   
+    mulx   \T5, \T9, 40\M1    
+    adcx   \T6, \T5   
+    mulx   \T5, rdx, 48\M1
+    adcx   \T5, rax 
+        
+    adox   \T0, \T7  
+    adox   \T1, \T8  
+    adox   \T3, \T9  
+    adox   \T6, rdx    
+    adox   \T5, rax      
+    
+    mov    rdx, 16\M0 
+    mulx   \T8, \T7, \M1 
+    xor    rax, rax 
+    adcx   \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adcx   \T4, \T8     
+    mulx   \T8, \T7, 8\M1
+    adox   \T4, \T7 
+    adcx   \T0, \T8        
+    mulx   \T2, \T8, 16\M1 
+    adox   \T0, \T8 
+    adcx   \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adcx   \T3, \T2  
+    mulx   \T2, \T8, 32\M1     
+    adcx   \T6, \T2  
+    mulx   \T2, \T9, 40\M1    
+    adcx   \T5, \T2   
+    mulx   \T2, rdx, 48\M1     
+    adcx   \T2, rax 
+         
+    adox   \T1, \T7  
+    adox   \T3, \T8   
+    adox   \T6, \T9  
+    adox   \T5, rdx 
+    adox   \T2, rax        
+    
+    mov    rdx, 24\M0 
+    mulx   \T8, \T7, \M1
+    xor    rax, rax
+    adcx   \T7, \T4 
+    adcx   \T0, \T8                 
+    mulx   \T8, \T10, 8\M1
+    adox   \T0, \T10 
+    adcx   \T1, \T8        
+    mulx   \T4, \T8, 16\M1
+    adox   \T1, \T8  
+    adcx   \T3, \T4     
+    mulx   \T4, \T10, 24\M1   
+    adcx   \T6, \T4  
+    mulx   \T4, \T8, 32\M1     
+    adcx   \T5, \T4  
+    mulx   \T4, \T9, 40\M1    
+    adcx   \T2, \T4   
+    mulx   \T4, rdx, 48\M1     
+    adcx   \T4, rax 
+        
+    adox   \T3, \T10  
+    adox   \T6, \T8   
+    adox   \T5, \T9  
+    adox   \T2, rdx 
+    adox   \T4, rax      
+.endm
+
+#else
+
+.macro MUL256x448_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10 
+    mov    rdx, \M0
+    mulx   \T0, \T1, \M1    
+    mulx   \T2, \T3, 8\M1
+    mov    \C, \T1             // C0_final 
+    xor    rax, rax
+    mulx   \T4, \T5, 16\M1 
+    add    \T0, \T3               
+    adc    \T2, \T5     
+    mulx   \T1, \T3, 24\M1
+    adc    \T4, \T3         
+    mulx   \T5, \T6, 32\M1 
+    adc    \T1, \T6        
+    mulx   \T3, \T7, 40\M1    
+    adc    \T5, \T7          
+    mulx   \T6, \T8, 48\M1    
+    adc    \T3, \T8          
+    adc    \T6, rax   
+    
+    mov    rdx, 8\M0 
+    mulx   \T8, \T7, \M1 
+    add    \T0, \T7 
+    mov    8\C, \T0            // C1_final 
+    adc    \T2, \T8     
+    mulx   \T7, \T8, 8\M1
+    mov    32\C, \T8           // store
+    adc    \T4, \T7        
+    mulx   \T0, \T8, 16\M1   
+    mov    40\C, \T8           // store 
+    adc    \T0, \T1     
+    mulx   \T1, \T7, 24\M1   
+    adc    \T1, \T5  
+    mulx   \T5, \T8, 32\M1     
+    adc    \T3, \T5   
+    mulx   \T5, \T9, 40\M1    
+    adc    \T6, \T5   
+    mulx   \T5, rdx, 48\M1
+    adc    \T5, rax 
+        
+    xor    rax, rax
+    add    \T2, 32\C 
+    adc    \T4, 40\C  
+    adc    \T0, \T7  
+    adc    \T1, \T8  
+    adc    \T3, \T9  
+    adc    \T6, rdx    
+    adc    \T5, rax        
+    
+    mov    rdx, 16\M0 
+    mulx   \T8, \T7, \M1
+    add    \T2, \T7 
+    mov    16\C, \T2           // C2_final 
+    adc    \T4, \T8     
+    mulx   \T8, \T7, 8\M1
+    mov    32\C, \T7           // store
+    adc    \T0, \T8        
+    mulx   \T2, \T8, 16\M1   
+    mov    40\C, \T8           // store 
+    adc    \T1, \T2     
+    mulx   \T2, \T7, 24\M1   
+    adc    \T3, \T2  
+    mulx   \T2, \T8, 32\M1     
+    adc    \T6, \T2  
+    mulx   \T2, \T9, 40\M1    
+    adc    \T5, \T2   
+    mulx   \T2, rdx, 48\M1     
+    adc    \T2, rax 
+        
+    xor    rax, rax
+    add    \T4, 32\C 
+    adc    \T0, 40\C  
+    adc    \T1, \T7  
+    adc    \T3, \T8   
+    adc    \T6, \T9  
+    adc    \T5, rdx 
+    adc    \T2, rax        
+    
+    mov    rdx, 24\M0 
+    mulx   \T8, \T7, \M1
+    add    \T7, \T4 
+    adc    \T0, \T8                 
+    mulx   \T8, \T10, 8\M1
+    mov    32\C, \T10          // store
+    adc    \T1, \T8        
+    mulx   \T4, \T8, 16\M1   
+    mov    40\C, \T8           // store 
+    adc    \T3, \T4     
+    mulx   \T4, \T10, 24\M1   
+    adc    \T6, \T4  
+    mulx   \T4, \T8, 32\M1     
+    adc    \T5, \T4  
+    mulx   \T4, \T9, 40\M1    
+    adc    \T2, \T4   
+    mulx   \T4, rdx, 48\M1     
+    adc    \T4, rax 
+        
+    xor    rax, rax
+    add    \T0, 32\C 
+    adc    \T1, 40\C  
+    adc    \T3, \T10  
+    adc    \T6, \T8   
+    adc    \T5, \T9  
+    adc    \T2, rdx 
+    adc    \T4, rax      
+.endm
+
+#endif
+
+  
+//**************************************************************************************
+//  Montgomery reduction
+//  Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015  
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//************************************************************************************** 
+.global fmt(rdc751_asm)
+fmt(rdc751_asm):
+    push   rbx
+    push   rbp
+    push   r12
+    push   r13 
+    push   r14 
+    push   r15  
+
+    // a[0-3] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 
+    MUL256x448_SCHOOL [reg_p1], [rip+fmt(p751p1)+40], [reg_p2+48], r8, r9, r13, r10, r14, r12, r11, rbp, rbx, rcx, r15     
+
+    xor    r15, r15
+    mov    rax, [reg_p2+48]
+    mov    rdx, [reg_p2+56]
+    mov    rbx, [reg_p2+64]
+    add    rax, [reg_p1+40]  
+    adc    rdx, [reg_p1+48]  
+    adc    rbx, [reg_p1+56]
+    mov    [reg_p1+40], rax
+    mov    [reg_p1+48], rdx 
+    mov    [reg_p1+56], rbx  
+    adc    rbp, [reg_p1+64]
+    adc    r8, [reg_p1+72]  
+    adc    r9, [reg_p1+80]  
+    adc    r10, [reg_p1+88]   
+    adc    r11, [reg_p1+96]   
+    adc    r12, [reg_p1+104]   
+    adc    r13, [reg_p1+112]   
+    adc    r14, [reg_p1+120]  
+    adc    r15, [reg_p1+128]
+    mov    [reg_p1+64], rbp   
+    mov    [reg_p1+72], r8  
+    mov    [reg_p1+80], r9  
+    mov    [reg_p1+88], r10  
+    mov    [reg_p1+96], r11  
+    mov    [reg_p1+104], r12  
+    mov    [reg_p1+112], r13  
+    mov    [reg_p1+120], r14
+    mov    [reg_p1+128], r15   
+    mov    r8, [reg_p1+136]  
+    mov    r9, [reg_p1+144]  
+    mov    r10, [reg_p1+152]
+    mov    r11, [reg_p1+160]
+    mov    r12, [reg_p1+168]
+    mov    r13, [reg_p1+176]
+    mov    r14, [reg_p1+184] 
+    adc    r8, 0
+    adc    r9, 0
+    adc    r10, 0
+    adc    r11, 0
+    adc    r12, 0
+    adc    r13, 0
+    adc    r14, 0  
+    mov    [reg_p1+136], r8  
+    mov    [reg_p1+144], r9  
+    mov    [reg_p1+152], r10  
+    mov    [reg_p1+160], r11  
+    mov    [reg_p1+168], r12  
+    mov    [reg_p1+176], r13  
+    mov    [reg_p1+184], r14
+
+    // a[4-7] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 
+    MUL256x448_SCHOOL [reg_p1+32], [rip+fmt(p751p1)+40], [reg_p2+48], r8, r9, r13, r10, r14, r12, r11, rbp, rbx, rcx, r15 
+
+    xor    r15, r15
+    mov    rax, [reg_p2+48]
+    mov    rdx, [reg_p2+56]
+    mov    rbx, [reg_p2+64]
+    add    rax, [reg_p1+72]  
+    adc    rdx, [reg_p1+80]  
+    adc    rbx, [reg_p1+88]
+    mov    [reg_p1+72], rax
+    mov    [reg_p1+80], rdx 
+    mov    [reg_p1+88], rbx
+    adc    rbp, [reg_p1+96]
+    adc    r8, [reg_p1+104]  
+    adc    r9, [reg_p1+112]  
+    adc    r10, [reg_p1+120]   
+    adc    r11, [reg_p1+128]  
+    adc    r12, [reg_p1+136]   
+    adc    r13, [reg_p1+144]   
+    adc    r14, [reg_p1+152]  
+    adc    r15, [reg_p1+160]
+    mov    [reg_p2], rbp       // Final result c0    
+    mov    [reg_p1+104], r8  
+    mov    [reg_p1+112], r9  
+    mov    [reg_p1+120], r10  
+    mov    [reg_p1+128], r11   
+    mov    [reg_p1+136], r12  
+    mov    [reg_p1+144], r13  
+    mov    [reg_p1+152], r14
+    mov    [reg_p1+160], r15
+    mov    r12, [reg_p1+168]
+    mov    r13, [reg_p1+176]
+    mov    r14, [reg_p1+184] 
+    adc    r12, 0
+    adc    r13, 0
+    adc    r14, 0   
+    mov    [reg_p1+168], r12  
+    mov    [reg_p1+176], r13  
+    mov    [reg_p1+184], r14 
+
+    // a[8-11] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 
+    MUL256x448_SCHOOL [reg_p1+64], [rip+fmt(p751p1)+40], [reg_p2+48], r8, r9, r13, r10, r14, r12, r11, rbp, rbx, rcx, r15 
+
+    // Final result c1:c11
+    mov    rax, [reg_p2+48]
+    mov    rdx, [reg_p2+56]
+    mov    rbx, [reg_p2+64] 
+    add    rax, [reg_p1+104] 
+    adc    rdx, [reg_p1+112] 
+    adc    rbx, [reg_p1+120]
+    mov    [reg_p2+8], rax
+    mov    [reg_p2+16], rdx
+    mov    [reg_p2+24], rbx
+    adc    rbp, [reg_p1+128] 
+    adc    r8, [reg_p1+136]  
+    adc    r9, [reg_p1+144] 
+    adc    r10, [reg_p1+152]
+    adc    r11, [reg_p1+160]
+    adc    r12, [reg_p1+168]
+    adc    r13, [reg_p1+176]
+    adc    r14, [reg_p1+184]
+    mov    [reg_p2+32], rbp
+    mov    [reg_p2+40], r8
+    mov    [reg_p2+48], r9
+    mov    [reg_p2+56], r10
+    mov    [reg_p2+64], r11
+    mov    [reg_p2+72], r12
+    mov    [reg_p2+80], r13
+    mov    [reg_p2+88], r14 
+
+    pop    r15
+    pop    r14
+    pop    r13
+    pop    r12
+    pop    rbp
+    pop    rbx
+   ret
+
+  #else
+  
+//***********************************************************************
+//  Montgomery reduction
+//  Based on comba method
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//*********************************************************************** 
+.global fmt(rdc751_asm)
+fmt(rdc751_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15 
+
+  mov    r11, [reg_p1]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r11
+  xor    r8, r8
+  add    rax, [reg_p1+40]
+  mov    [reg_p2+40], rax    // z5
+  adc    r8, rdx
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+8]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+48]
+  mov    [reg_p2+48], r8    // z6
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    r13, [reg_p1+16]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+56]
+  mov    [reg_p2+56], r9    // z7
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r14, [reg_p1+24]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+64]
+  mov    [reg_p2+64], r10   // z8
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r15, [reg_p1+32]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+72]
+  mov    [reg_p2+72], r8    // z9
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rcx, [reg_p2+40]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+80]
+  mov    [reg_p2+80], r9    // z10
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r11, [reg_p2+48]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+88]
+  mov    [reg_p2+88], r10    // z11
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r12, [reg_p2+56]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+96]
+  mov    [reg_p2], r8        // z0
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [rip+fmt(p751p1)+72]
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [rip+fmt(p751p1)+64]
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [rip+fmt(p751p1)+56]
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  mov    rax, [rip+fmt(p751p1)+48]
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    r13, [reg_p2+64]
+  mov    rax, [rip+fmt(p751p1)+40]
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+104]
+  mov    [reg_p2+8], r9      // z1
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r14, [reg_p2+72]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+112]
+  mov    [reg_p2+16], r10    // z2
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r15, [reg_p2+80]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+120]
+  mov    [reg_p2+24], r8     // z3
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rcx, [reg_p2+88]
+  mov    rax, [rip+fmt(p751p1)+40] 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+128]
+  mov    [reg_p2+32], r9     // z4
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+48] 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+136]
+  mov    [reg_p2+40], r10    // z5
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+56] 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+144]
+  mov    [reg_p2+48], r8     // z6
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rax, [rip+fmt(p751p1)+64] 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+152]
+  mov    [reg_p2+56], r9     // z7
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    rax, [rip+fmt(p751p1)+72] 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+160]
+  mov    [reg_p2+64], r10    // z8
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+
+  mov    rax, [rip+fmt(p751p1)+80] 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+168]    // z9
+  mov    [reg_p2+72], r8     // z9
+  adc    r9, 0
+  adc    r10, 0
+  
+  mov    rax, [rip+fmt(p751p1)+88] 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  add    r9, [reg_p1+176]    // z10
+  mov    [reg_p2+80], r9     // z10
+  adc    r10, 0  
+  add    r10, [reg_p1+184]   // z11
+  mov    [reg_p2+88], r10    // z11
+
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+  #endif
+
+
+//***********************************************************************
+//  751-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_add751_asm)
+fmt(mp_add751_asm):  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rax, [reg_p1+32]
+  mov    rcx, [reg_p1+40]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    rax, [reg_p2+32] 
+  adc    rcx, [reg_p2+40] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rax
+  mov    [reg_p3+40], rcx
+
+  mov    r8, [reg_p1+48]
+  mov    r9, [reg_p1+56] 
+  mov    r10, [reg_p1+64]
+  mov    r11, [reg_p1+72] 
+  mov    rax, [reg_p1+80]  
+  mov    rcx, [reg_p1+88] 
+  adc    r8, [reg_p2+48] 
+  adc    r9, [reg_p2+56]
+  adc    r10, [reg_p2+64] 
+  adc    r11, [reg_p2+72]
+  adc    rax, [reg_p2+80]
+  adc    rcx, [reg_p2+88]
+  mov    [reg_p3+48], r8
+  mov    [reg_p3+56], r9
+  mov    [reg_p3+64], r10
+  mov    [reg_p3+72], r11
+  mov    [reg_p3+80], rax
+  mov    [reg_p3+88], rcx
+  ret
+
+
+//***********************************************************************
+//  2x751-bit multiprecision subtraction/addition
+//  Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p751*2^768
+//*********************************************************************** 
+.global fmt(mp_subadd751x2_asm)
+fmt(mp_subadd751x2_asm):
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15
+  push   rbx
+  xor    rax, rax
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    rcx, [reg_p1+32]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    rcx, [reg_p2+32] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], rcx
+
+  mov    r8, [reg_p1+40]
+  mov    r9, [reg_p1+48]
+  mov    r10, [reg_p1+56] 
+  mov    r11, [reg_p1+64]
+  mov    rcx, [reg_p1+72] 
+  sbb    r8, [reg_p2+40] 
+  sbb    r9, [reg_p2+48] 
+  sbb    r10, [reg_p2+56]
+  sbb    r11, [reg_p2+64] 
+  sbb    rcx, [reg_p2+72]
+  mov    [reg_p3+40], r8
+  mov    [reg_p3+48], r9
+  mov    [reg_p3+56], r10
+  mov    [reg_p3+64], r11
+  mov    [reg_p3+72], rcx
+  
+  mov    r8, [reg_p1+80]
+  mov    r9, [reg_p1+88] 
+  mov    r10, [reg_p1+96]
+  mov    r11, [reg_p1+104]
+  mov    rcx, [reg_p1+112]
+  sbb    r8, [reg_p2+80]
+  sbb    r9, [reg_p2+88]
+  sbb    r10, [reg_p2+96] 
+  sbb    r11, [reg_p2+104] 
+  sbb    rcx, [reg_p2+112]
+  mov    [reg_p3+80], r8 
+  mov    [reg_p3+88], r9
+  mov    [reg_p3+96], r10
+  mov    [reg_p3+104], r11
+  mov    [reg_p3+112], rcx
+  
+  mov    r8, [reg_p1+120]
+  mov    r9, [reg_p1+128]
+  mov    r10, [reg_p1+136]
+  mov    r11, [reg_p1+144]
+  mov    rcx, [reg_p1+152]
+  sbb    r8, [reg_p2+120] 
+  sbb    r9, [reg_p2+128] 
+  sbb    r10, [reg_p2+136] 
+  sbb    r11, [reg_p2+144] 
+  sbb    rcx, [reg_p2+152]
+  mov    [reg_p3+120], r8
+  mov    [reg_p3+128], r9
+  mov    [reg_p3+136], r10
+  mov    [reg_p3+144], r11
+  mov    [reg_p3+152], rcx  
+   
+  mov    r8, [reg_p1+160]
+  mov    r9, [reg_p1+168] 
+  mov    r10, [reg_p1+176]  
+  mov    r11, [reg_p1+184]
+  sbb    r8, [reg_p2+160]
+  sbb    r9, [reg_p2+168]
+  sbb    r10, [reg_p2+176]
+  sbb    r11, [reg_p2+184]
+  sbb    rax, 0
+  
+  // Add p751 anded with the mask in rax 
+  mov    r12, [rip+fmt(p751)]
+  mov    r13, [rip+fmt(p751)+40]
+  mov    r14, [rip+fmt(p751)+48]
+  mov    r15, [rip+fmt(p751)+56]
+  mov    rdi, [rip+fmt(p751)+64]
+  mov    rsi, [rip+fmt(p751)+72]
+  mov    rbx, [rip+fmt(p751)+80]
+  mov    rcx, [rip+fmt(p751)+88]
+  and    r12, rax
+  and    r13, rax
+  and    r14, rax
+  and    r15, rax
+  and    rdi, rax
+  and    rsi, rax
+  and    rbx, rax
+  and    rcx, rax
+  mov    rax, [reg_p3+96]
+  add    rax, r12
+  mov    [reg_p3+96], rax
+  mov    rax, [reg_p3+104]
+  adc    rax, r12
+  mov    [reg_p3+104], rax
+  mov    rax, [reg_p3+112]
+  adc    rax, r12
+  mov    [reg_p3+112], rax
+  mov    rax, [reg_p3+120]
+  adc    rax, r12
+  mov    [reg_p3+120], rax
+  adc    r12, [reg_p3+128]
+  adc    r13, [reg_p3+136]
+  mov    [reg_p3+128], r12
+  mov    [reg_p3+136], r13
+  mov    r12, [reg_p3+144]
+  mov    r13, [reg_p3+152]
+  adc    r12, r14
+  adc    r13, r15
+  adc    r8, rdi
+  adc    r9, rsi
+  adc    r10, rbx
+  adc    r11, rcx
+  
+  mov    [reg_p3+144], r12
+  mov    [reg_p3+152], r13
+  mov    [reg_p3+160], r8
+  mov    [reg_p3+168], r9
+  mov    [reg_p3+176], r10
+  mov    [reg_p3+184], r11
+  pop    rbx
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+  ret
+
+
+//***********************************************************************
+//  Double 2x751-bit multiprecision subtraction
+//  Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.global fmt(mp_dblsub751x2_asm)
+fmt(mp_dblsub751x2_asm):
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p3]
+  mov    r9, [reg_p3+8]
+  mov    r10, [reg_p3+16]
+  mov    r11, [reg_p3+24]
+  mov    r12, [reg_p3+32]
+  mov    r13, [reg_p3+40]
+  mov    r14, [reg_p3+48]
+  mov    r15, [reg_p3+56]
+  sub    r8, [reg_p1]
+  sbb    r9, [reg_p1+8] 
+  sbb    r10, [reg_p1+16] 
+  sbb    r11, [reg_p1+24] 
+  sbb    r12, [reg_p1+32] 
+  sbb    r13, [reg_p1+40] 
+  sbb    r14, [reg_p1+48] 
+  sbb    r15, [reg_p1+56]
+  setc   al
+  sub    r8, [reg_p2]
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  setc   cl
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+    
+  mov    r8, [reg_p3+64]
+  mov    r9, [reg_p3+72]
+  mov    r10, [reg_p3+80]
+  mov    r11, [reg_p3+88]
+  mov    r12, [reg_p3+96]
+  mov    r13, [reg_p3+104]
+  mov    r14, [reg_p3+112]
+  mov    r15, [reg_p3+120]
+  bt     rax, 0 
+  sbb    r8, [reg_p1+64] 
+  sbb    r9, [reg_p1+72] 
+  sbb    r10, [reg_p1+80] 
+  sbb    r11, [reg_p1+88] 
+  sbb    r12, [reg_p1+96] 
+  sbb    r13, [reg_p1+104] 
+  sbb    r14, [reg_p1+112] 
+  sbb    r15, [reg_p1+120]
+  setc   al 
+  bt     rcx, 0  
+  sbb    r8, [reg_p2+64] 
+  sbb    r9, [reg_p2+72] 
+  sbb    r10, [reg_p2+80] 
+  sbb    r11, [reg_p2+88] 
+  sbb    r12, [reg_p2+96] 
+  sbb    r13, [reg_p2+104] 
+  sbb    r14, [reg_p2+112] 
+  sbb    r15, [reg_p2+120]
+  setc   cl 
+  mov    [reg_p3+64], r8
+  mov    [reg_p3+72], r9
+  mov    [reg_p3+80], r10
+  mov    [reg_p3+88], r11
+  mov    [reg_p3+96], r12
+  mov    [reg_p3+104], r13
+  mov    [reg_p3+112], r14
+  mov    [reg_p3+120], r15
+  
+  mov    r8, [reg_p3+128]
+  mov    r9, [reg_p3+136]
+  mov    r10, [reg_p3+144]
+  mov    r11, [reg_p3+152]
+  mov    r12, [reg_p3+160]
+  mov    r13, [reg_p3+168]
+  mov    r14, [reg_p3+176]
+  mov    r15, [reg_p3+184]
+  bt     rax, 0 
+  sbb    r8, [reg_p1+128] 
+  sbb    r9, [reg_p1+136] 
+  sbb    r10, [reg_p1+144] 
+  sbb    r11, [reg_p1+152] 
+  sbb    r12, [reg_p1+160] 
+  sbb    r13, [reg_p1+168] 
+  sbb    r14, [reg_p1+176] 
+  sbb    r15, [reg_p1+184]
+  bt     rcx, 0 
+  sbb    r8, [reg_p2+128] 
+  sbb    r9, [reg_p2+136] 
+  sbb    r10, [reg_p2+144] 
+  sbb    r11, [reg_p2+152] 
+  sbb    r12, [reg_p2+160] 
+  sbb    r13, [reg_p2+168] 
+  sbb    r14, [reg_p2+176] 
+  sbb    r15, [reg_p2+184]
+  mov    [reg_p3+128], r8
+  mov    [reg_p3+136], r9
+  mov    [reg_p3+144], r10
+  mov    [reg_p3+152], r11
+  mov    [reg_p3+160], r12
+  mov    [reg_p3+168], r13
+  mov    [reg_p3+176], r14
+  mov    [reg_p3+184], r15
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
\ No newline at end of file
diff --git a/SIKE_sw/src/P751/P751.c b/SIKE_sw/src/P751/P751.c
new file mode 100644
index 0000000..7ece10c
--- /dev/null
+++ b/SIKE_sw/src/P751/P751.c
@@ -0,0 +1,142 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P751
+*********************************************************************************************/  
+
+#include "P751_api.h" 
+#include "P751_internal.h"
+#include "../internal.h"
+
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). 
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. 
+// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32.
+// For example, a 751-bit field element is represented with Ceil(751 / 64) = 12 64-bit digits or Ceil(751 / 32) = 24 32-bit digits.
+
+//
+// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=6, B=1, C=1 and p751 = 2^372*3^239-1
+//
+         
+const uint64_t p751[NWORDS64_FIELD]              = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
+                                                     0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C };
+const uint64_t p751p1[NWORDS64_FIELD]            = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
+                                                     0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C };   
+const uint64_t p751x2[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF, 
+                                                     0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38 };
+const uint64_t p751x4[NWORDS64_FIELD]            = { 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xBABFFFFFFFFFFFFF, 
+                                                     0x8FB25A1527E1E2A3, 0x6A566C684FDF31DB, 0x213A619F5BAFA1DB, 0x158AD41172C95D20, 0x384A427E5EEB719A, 0x0001BF975507DC70 }; 
+const uint64_t p751x16p[2*NWORDS64_FIELD]        = { 0x0000000000000010, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x2A00000000000000, 
+                                                     0x826D2F56C0F0EAE2, 0xAD4C9CBD81067123, 0xF62CF3052282F124, 0x53A95F7469B516FE, 0x3DADEC0D08A4732F, 0x58AD934557C11C7E, 
+                                                     0x7F731B89B2DA43F2, 0x51AE9F5F5F6AFF3B, 0xD74319A6C9BCA375, 0x5BAB790796CF84D4, 0xA421554FE2E49CA8, 0x20AD617C8DF437CF, 
+                                                     0x3AB06E7A12F5FF7B, 0x70A25E037E40347E, 0x51F1D323FB4C1151, 0xAE0D99AA4835FED9, 0xDF5429960D2536B6, 0x000000030E91D466 };
+// Order of Alice's subgroup
+const uint64_t Alice_order[NWORDS64_ORDER]       = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0010000000000000 }; 
+// Order of Bob's subgroup
+const uint64_t Bob_order[NWORDS64_ORDER]         = { 0xC968549F878A8EEB, 0x59B1A13F7CC76E3E, 0xE9867D6EBE876DA9, 0x2B5045CB25748084, 0x2909F97BADC66856, 0x06FE5D541F71C0E1 };
+// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p751^2), expressed in Montgomery representation
+const uint64_t A_gen[6 * NWORDS64_FIELD]         = { 0x884F46B74000BAA8, 0xBA52630F939DEC20, 0xC16FB97BA714A04D, 0x082536745B1AB3DB, 0x1117157F446F9E82, 0xD2F27D621A018490,
+                                                     0x6B24AB523D544BCD, 0x9307D6AA2EA85C94, 0xE1A096729528F20F, 0x896446F868F3255C, 0x2401D996B1BFF8A5, 0x00000EF8786A5C0A,   // XPA0
+                                                     0xAEB78B3B96F59394, 0xAB26681E29C90B74, 0xE520AC30FDC4ACF1, 0x870AAAE3A4B8111B, 0xF875BDB738D64EFF, 0x50109A7ECD7ED6BC,
+                                                     0x4CC64848FF0C56FB, 0xE617CB6C519102C9, 0x9C74B3835921E609, 0xC91DDAE4A35A7146, 0x7FC82A155C1B9129, 0x0000214FA6B980B3,   // XPA1
+                                                     0x0F93CC38680A8CA9, 0x762E733822E7FED7, 0xE549F005AC0ADB67, 0x94A71FDD2C43A4ED, 0xD48645C2B04721C5, 0x432DA1FE4D4CA4DC,
+                                                     0xBC99655FAA7A80E8, 0xB2C6D502BCFD4823, 0xEE92F40CA2EC8BDB, 0x7B074132EFB6D16C, 0x3340B46FA38A7633, 0x0000215749657F6C,   // XQA0
+                                                     0xECFF375BF3079F4C, 0xFBFE74B043E80EF3, 0x17376CBE3C5C7AD1, 0xC06327A7E29CDBF2, 0x2111649C438BF3D4, 0xC1F9298261BA2E97,
+                                                     0x1F9FECE869CFD1C2, 0x01A39B4FC9346D62, 0x147CD1D3E82A3C9F, 0xDE84E9D249E533EE, 0x1C48A5ADFB7C578D, 0x000061ACA0B82E1D,   // XQA1
+                                                     0x1600C525D41059F1, 0xA596899A0A1D83F7, 0x6BFDEED6D2B23F35, 0x5C7E707270C23910, 0x276CA1A4E8369411, 0xB193651A602925A0,
+                                                     0x243D239F1CA1F04A, 0x543DC6DA457860AD, 0xCDA590F325181DE9, 0xD3AB7ACFDA80B395, 0x6C97468580FDDF7B, 0x0000352A3E5C4C77,   // XRA0
+                                                     0x9B794F9FD1CC3EE8, 0xDB32E40A9B2FD23E, 0x26192A2542E42B67, 0xA18E94FCA045BCE7, 0x96DC1BC38E7CDA2D, 0x9A1D91B752487DE2,
+                                                     0xCC63763987436DA3, 0x1316717AACCC551D, 0xC4C368A4632AFE72, 0x4B6EA85C9CCD5710, 0x7A12CAD582C7BC9A, 0x00001C7E240149BF }; // XRA1
+// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p751^2), expressed in Montgomery representation
+const uint64_t B_gen[6 * NWORDS64_FIELD]         = { 0x85691AAF4015F88C, 0x7478C5B8C36E9631, 0x7EF2A185DE4DD6E2, 0x943BBEE46BEB9DC7, 0x1A3EC62798792D22, 0x791BC4B084B31D69,
+                                                     0x03DBE6522CEA17C4, 0x04749AA65D665D83, 0x3D52B5C45EF450F3, 0x0B4219848E36947D, 0xA4CF7070466BDE27, 0x0000334B1FA6D193,   // XPB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XPB1
+                                                     0x8E7CB3FA53211340, 0xD67CE54F7A05EEE0, 0xFDDC2C8BCE46FC38, 0x08587FAE3110DF1E, 0xD6B8246FA22B058B, 0x4DAC3ACC905A5DBD,
+                                                     0x51D0BF2FADCED3E8, 0xE5A2406DF6484425, 0x907F177584F671B8, 0x4738A2FFCCED051C, 0x2B0067B4177E4853, 0x00002806AC948D3D,   // XQB0
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
+                                                     0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,   // XQB1
+                                                     0xB56457016D1D6D1C, 0x03DECCB38F39C491, 0xDFB910AC8A559452, 0xA9D0F17D1FF24883, 0x8562BBAF515C248C, 0x249B2A6DDB1CB67D,
+                                                     0x3131AF96FB46835C, 0xE10258398480C3E1, 0xEAB5E2B872D4FAB1, 0xB71E63875FAEB1DF, 0xF8384D4F13757CF6, 0x0000361EC9B09912,   // XRB0
+                                                     0x58C967899ED16EF4, 0x81998376DC622A4B, 0x3D1C1DCFE0B12681, 0x9347DEBB953E1730, 0x9ABB344D3A82C2D7, 0xE4881BD2820552B2,
+                                                     0x0037247923D90266, 0x2E3156EDB157E5A5, 0xF86A46A7506823F7, 0x8FE5523A7B7F1CFC, 0xFA3CFFA38372F67B, 0x0000692DCE85FFBD }; // XRB1
+// Montgomery constant Montgomery_R2 = (2^768)^2 mod p751
+const uint64_t Montgomery_R2[NWORDS64_FIELD]     = { 0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751 ,0x1F735F1F1EE7FC81, 
+                                                     0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35 };                                                    
+// Value one in Montgomery representation 
+const uint64_t Montgomery_one[NWORDS64_FIELD]    = { 0x00000000000249ad, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8310000000000000,
+                                                     0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x00002d5b24bce5e2 };
+
+
+// Fixed parameters for isogeny tree computation
+const unsigned int strat_Alice[MAX_Alice-1] = { 
+80, 48, 27, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 
+1, 3, 2, 1, 1, 1, 1, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 
+1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 
+1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 
+33, 20, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 
+1, 1, 8, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 
+1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 };
+
+const unsigned int strat_Bob[MAX_Bob-1] = { 
+112, 63, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 
+1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 
+1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 31, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 
+1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 
+2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 49, 31, 16, 8, 4, 2, 
+1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 
+15, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 
+1, 1, 1, 21, 12, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 3, 2, 1, 1, 1, 1, 
+2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1 };
+
+// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions
+#define fpcopy                        fpcopy751
+#define fpzero                        fpzero751
+#define fpadd                         fpadd751
+#define fpsub                         fpsub751
+#define fpneg                         fpneg751
+#define fpdiv2                        fpdiv2_751
+#define fpcorrection                  fpcorrection751
+#define fpmul_mont                    fpmul751_mont
+#define fpsqr_mont                    fpsqr751_mont
+#define fpinv_mont                    fpinv751_mont
+#define fpinv_chain_mont              fpinv751_chain_mont
+#define fp2copy                       fp2copy751
+#define fp2zero                       fp2zero751
+#define fp2add                        fp2add751
+#define fp2sub                        fp2sub751
+#define mp_sub_p2                     mp_sub751_p2
+#define mp_sub_p4                     mp_sub751_p4
+#define sub_p4                        mp_sub_p4
+#define fp2neg                        fp2neg751
+#define fp2div2                       fp2div2_751
+#define fp2correction                 fp2correction751
+#define fp2mul_mont                   fp2mul751_mont
+#define fp2sqr_mont                   fp2sqr751_mont
+#define fp2inv_mont                   fp2inv751_mont
+#define fp2inv_mont_ct                fp2inv751_mont_ct
+#define fp2inv_mont_bingcd            fp2inv751_mont_bingcd
+#define mp_add_asm                    mp_add751_asm
+#define mp_subaddx2_asm               mp_subadd751x2_asm
+#define mp_dblsubx2_asm               mp_dblsub751x2_asm
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp751
+#define crypto_kem_enc                crypto_kem_enc_SIKEp751
+#define crypto_kem_dec                crypto_kem_dec_SIKEp751
+#define random_mod_order_A            random_mod_order_A_SIDHp751
+#define random_mod_order_B            random_mod_order_B_SIDHp751
+#define EphemeralKeyGeneration_A      EphemeralKeyGeneration_A_SIDHp751
+#define EphemeralKeyGeneration_B      EphemeralKeyGeneration_B_SIDHp751
+#define EphemeralSecretAgreement_A    EphemeralSecretAgreement_A_SIDHp751
+#define EphemeralSecretAgreement_B    EphemeralSecretAgreement_B_SIDHp751
+
+#include "../fpx.c"
+#include "../ec_isogeny.c"
+#include "../sidh.c"
+#include "../sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/src/P751/P751_api.h b/SIKE_sw/src/P751/P751_api.h
new file mode 100644
index 0000000..94db7e8
--- /dev/null
+++ b/SIKE_sw/src/P751/P751_api.h
@@ -0,0 +1,112 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: API header file for P751
+*********************************************************************************************/  
+
+#ifndef P751_API_H
+#define P751_API_H
+    
+
+/*********************** Key encapsulation mechanism API ***********************/
+
+#define CRYPTO_SECRETKEYBYTES     644    // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes
+#define CRYPTO_PUBLICKEYBYTES     564
+#define CRYPTO_BYTES               32
+#define CRYPTO_CIPHERTEXTBYTES    596    // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes
+
+// Algorithm name
+#define CRYPTO_ALGNAME "SIKEp751"  
+
+// SIKE's key generation
+// It produces a private key sk and computes the public key pk.
+// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 644 bytes)
+//          public key pk (CRYPTO_PUBLICKEYBYTES = 564 bytes) 
+int crypto_kem_keypair_SIKEp751(unsigned char *pk, unsigned char *sk);
+
+// SIKE's encapsulation
+// Input:   public key pk         (CRYPTO_PUBLICKEYBYTES = 564 bytes)
+// Outputs: shared secret ss      (CRYPTO_BYTES = 32 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 596 bytes) 
+int crypto_kem_enc_SIKEp751(unsigned char *ct, unsigned char *ss, const unsigned char *pk);
+
+// SIKE's decapsulation
+// Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = 644 bytes)
+//          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = 596 bytes) 
+// Outputs: shared secret ss      (CRYPTO_BYTES = 32 bytes)
+int crypto_kem_dec_SIKEp751(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);
+
+
+// Encoding of keys for KEM-based isogeny system "SIKEp751" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p751) are encoded in 94 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p751^2), where a and b are defined over GF(p751), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys sk consist of the concatenation of a 32-byte random value, a value in the range [0, 2^Floor(Log(2,3^239))-1] and the public key pk. In the SIKE API, 
+// private keys are encoded in 644 octets in little endian format. 
+// Public keys pk consist of 3 elements in GF(p751^2). In the SIKE API, pk is encoded in 564 octets. 
+// Ciphertexts ct consist of the concatenation of a public key value and a 32-byte value. In the SIKE API, ct is encoded in 564 + 32 = 596 octets.  
+// Shared keys ss consist of a value of 32 octets.
+
+
+/*********************** Ephemeral key exchange API ***********************/
+
+#define SIDH_SECRETKEYBYTES_A    47
+#define SIDH_SECRETKEYBYTES_B    48
+#define SIDH_PUBLICKEYBYTES     564
+#define SIDH_BYTES              188 
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016.
+// Extended version available at: http://eprint.iacr.org/2016/859     
+
+// Generation of Alice's secret key 
+// Outputs random value in [0, 2^372 - 1] to be used as Alice's private key
+void random_mod_order_A_SIDHp751(unsigned char* random_digits);
+
+// Generation of Bob's secret key 
+// Outputs random value in [0, 2^Floor(Log(2,3^239)) - 1] to be used as Bob's private key
+void random_mod_order_B_SIDHp751(unsigned char* random_digits);
+
+// Alice's ephemeral public key generation
+// Input:  a private key PrivateKeyA in the range [0, 2^372 - 1], stored in 47 bytes. 
+// Output: the public key PublicKeyA consisting of 3 GF(p751^2) elements encoded in 564 bytes.
+int EphemeralKeyGeneration_A_SIDHp751(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^239)) - 1], stored in 48 bytes.  
+// The public key consists of 3 GF(p751^2) elements encoded in 564 bytes.
+int EphemeralKeyGeneration_B_SIDHp751(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^372 - 1], stored in 47 bytes. 
+//         Bob's PublicKeyB consists of 3 GF(p751^2) elements encoded in 564 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p751^2) encoded in 188 bytes.
+int EphemeralSecretAgreement_A_SIDHp751(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^239)) - 1], stored in 48 bytes.  
+//         Alice's PublicKeyA consists of 3 GF(p751^2) elements encoded in 564 bytes.
+// Output: a shared secret SharedSecretB that consists of one element in GF(p751^2) encoded in 188 bytes. 
+int EphemeralSecretAgreement_B_SIDHp751(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB);
+
+
+// Encoding of keys for KEX-based isogeny system "SIDHp751" (wire format):
+// ----------------------------------------------------------------------
+// Elements over GF(p751) are encoded in 94 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). 
+// Elements (a+b*i) over GF(p751^2), where a and b are defined over GF(p751), are encoded as {a, b}, with a in the lowest memory portion.
+//
+// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^372-1] and [0, 2^Floor(Log(2,3^239)) - 1], resp. In the SIDH API, 
+// Alice's and Bob's private keys are encoded in 47 and 48 octets, resp., in little endian format. 
+// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p751^2). In the SIDH API, they are encoded in 564 octets. 
+// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p751^2). In the SIDH API, they are encoded in 188 octets.
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/P751/P751_internal.h b/SIKE_sw/src/P751/P751_internal.h
new file mode 100644
index 0000000..a4c7ebd
--- /dev/null
+++ b/SIKE_sw/src/P751/P751_internal.h
@@ -0,0 +1,175 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for P751
+*********************************************************************************************/  
+
+#ifndef P751_INTERNAL_H
+#define P751_INTERNAL_H
+
+#include "../config.h"
+ 
+
+#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_ARM64)
+    #define NWORDS_FIELD    12              // Number of words of a 751-bit field element
+    #define p751_ZERO_WORDS 5               // Number of "0" digits in the least significant part of p751 + 1     
+#elif (TARGET == TARGET_x86)
+    #define NWORDS_FIELD    24 
+    #define p751_ZERO_WORDS 11 
+#endif 
+    
+
+// Basic constants
+
+#define NBITS_FIELD             751  
+#define MAXBITS_FIELD           768                
+#define MAXWORDS_FIELD          ((MAXBITS_FIELD+RADIX-1)/RADIX)     // Max. number of words to represent field elements
+#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 751-bit field element 
+#define NBITS_ORDER             384
+#define NWORDS_ORDER            ((NBITS_ORDER+RADIX-1)/RADIX)       // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)               // Number of 64-bit words of a 384-bit element 
+#define MAXBITS_ORDER           NBITS_ORDER
+#define ALICE                   0
+#define BOB                     1 
+#define OALICE_BITS             372  
+#define OBOB_BITS               379    
+#define OBOB_EXPON              239 
+#define MASK_ALICE              0x0F
+#define MASK_BOB                0x03  
+#define PRIME                   p751  
+#define PARAM_A                 6  
+#define PARAM_C                 1
+// Fixed parameters for isogeny tree computation
+#define MAX_INT_POINTS_ALICE    8      
+#define MAX_INT_POINTS_BOB      10 
+#define MAX_Alice               186
+#define MAX_Bob                 239
+#define MSG_BYTES               32
+#define SECRETKEY_A_BYTES       ((OALICE_BITS + 7) / 8)
+#define SECRETKEY_B_BYTES       ((OBOB_BITS - 1 + 7) / 8)
+#define FP2_ENCODED_BYTES       2*((NBITS_FIELD + 7) / 8)
+
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t felm_t[NWORDS_FIELD];                                 // Datatype for representing 751-bit field elements (768-bit max.)
+typedef digit_t dfelm_t[2*NWORDS_FIELD];                              // Datatype for representing double-precision 2x751-bit field elements (2x768-bit max.) 
+typedef felm_t  f2elm_t[2];                                           // Datatype for representing quadratic extension field elements GF(p751^2)
+        
+typedef struct { f2elm_t X; f2elm_t Z; } point_proj;                  // Point representation in projective XZ Montgomery coordinates.
+typedef point_proj point_proj_t[1]; 
+
+#ifdef COMPRESS
+    typedef struct { f2elm_t X; f2elm_t Y; f2elm_t Z; } point_full_proj;  // Point representation in full projective XYZ Montgomery coordinates 
+    typedef point_full_proj point_full_proj_t[1];
+
+    typedef struct { f2elm_t x; f2elm_t y; } point_affine;                // Point representation in affine coordinates.
+    typedef point_affine point_t[1];
+
+    typedef f2elm_t publickey_t[3];
+#endif 
+
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// 751-bit multiprecision addition, c = a+b
+void mp_add751(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_add751_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// 751-bit multiprecision subtraction, c = a-b+2p or c = a-b+4p
+extern void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c);
+extern void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_sub751_p2_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+void mp_sub751_p4_asm(const digit_t* a, const digit_t* b, digit_t* c); 
+
+// 2x751-bit multiprecision subtraction followed by addition with p751*2^768, c = a-b+(p751*2^768) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+void mp_subadd751x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Double 2x751-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
+void mp_dblsub751x2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void fpcopy751(const digit_t* a, digit_t* c);
+
+// Zeroing a field element, a = 0
+void fpzero751(digit_t* a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool fpequal751_non_constant_time(const digit_t* a, const digit_t* b); 
+
+// Modular addition, c = a+b mod p751
+extern void fpadd751(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpadd751_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular subtraction, c = a-b mod p751
+extern void fpsub751(const digit_t* a, const digit_t* b, digit_t* c);
+extern void fpsub751_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Modular negation, a = -a mod p751        
+extern void fpneg751(digit_t* a);  
+
+// Modular division by two, c = a/2 mod p751.
+void fpdiv2_751(const digit_t* a, digit_t* c);
+
+// Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1].
+void fpcorrection751(digit_t* a);
+
+// 751-bit Montgomery reduction, c = a mod p
+void rdc751_asm(digit_t* ma, digit_t* mc);
+            
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
+void fpmul751_mont(const digit_t* a, const digit_t* b, digit_t* c);
+void mul751_asm(const digit_t* a, const digit_t* b, digit_t* c);
+   
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
+void fpsqr751_mont(const digit_t* ma, digit_t* mc);
+
+// Field inversion, a = a^-1 in GF(p751)
+void fpinv751_mont(digit_t* a);
+
+// Chain to compute (p751-3)/4 using Montgomery arithmetic
+void fpinv751_chain_mont(digit_t* a);
+
+/************ GF(p^2) arithmetic functions *************/
+    
+// Copy of a GF(p751^2) element, c = a
+void fp2copy751(const f2elm_t a, f2elm_t c);
+
+// Zeroing a GF(p751^2) element, a = 0
+void fp2zero751(f2elm_t a);
+
+// GF(p751^2) negation, a = -a in GF(p751^2)
+void fp2neg751(f2elm_t a);
+
+// GF(p751^2) addition, c = a+b in GF(p751^2)
+extern void fp2add751(const f2elm_t a, const f2elm_t b, f2elm_t c);           
+
+// GF(p751^2) subtraction, c = a-b in GF(p751^2)
+extern void fp2sub751(const f2elm_t a, const f2elm_t b, f2elm_t c); 
+
+// GF(p751^2) division by two, c = a/2  in GF(p751^2) 
+void fp2div2_751(const f2elm_t a, f2elm_t c);
+
+// Modular correction, a = a in GF(p751^2)
+void fp2correction751(f2elm_t a);
+            
+// GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2)
+void fp2sqr751_mont(const f2elm_t a, f2elm_t c);
+ 
+// GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2)
+void fp2mul751_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+
+// GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void fp2inv751_mont(f2elm_t a);
+
+
+#endif
diff --git a/SIKE_sw/src/P751/generic/fp_generic.c b/SIKE_sw/src/P751/generic/fp_generic.c
new file mode 100644
index 0000000..9b73939
--- /dev/null
+++ b/SIKE_sw/src/P751/generic/fp_generic.c
@@ -0,0 +1,259 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: portable modular arithmetic for P751
+*********************************************************************************************/
+
+#include "../P751_internal.h"
+#include "../../internal.h"
+
+// Global constants
+extern const uint64_t p751[NWORDS64_FIELD];
+extern const uint64_t p751p1[NWORDS64_FIELD]; 
+extern const uint64_t p751x2[NWORDS64_FIELD]; 
+extern const uint64_t p751x4[NWORDS64_FIELD];
+
+
+__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p751x2)[i], borrow, c[i]); 
+    }
+} 
+
+
+__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p751x4)[i], borrow, c[i]); 
+    }
+}   
+
+
+__inline void fpadd751(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular addition, c = a+b mod p751.
+  // Inputs: a, b in [0, 2*p751-1] 
+  // Output: c in [0, 2*p751-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]); 
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], ((digit_t*)p751x2)[i], carry, c[i]); 
+    }
+    mask = 0 - (digit_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], ((digit_t*)p751x2)[i] & mask, carry, c[i]); 
+    }
+} 
+
+
+__inline void fpsub751(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Modular subtraction, c = a-b mod p751.
+  // Inputs: a, b in [0, 2*p751-1] 
+  // Output: c in [0, 2*p751-1] 
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], ((digit_t*)p751x2)[i] & mask, borrow, c[i]); 
+    }
+}
+
+
+__inline void fpneg751(digit_t* a)
+{ // Modular negation, a = -a mod p751.
+  // Input/output: a in [0, 2*p751-1] 
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, ((digit_t*)p751x2)[i], a[i], borrow, a[i]); 
+    }
+}
+
+
+void fpdiv2_751(const digit_t* a, digit_t* c)
+{ // Modular division by two, c = a/2 mod p751.
+  // Input : a in [0, 2*p751-1] 
+  // Output: c in [0, 2*p751-1] 
+    unsigned int i, carry = 0;
+    digit_t mask;
+        
+    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p751
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], ((digit_t*)p751)[i] & mask, carry, c[i]); 
+    }
+
+    mp_shiftr1(c, NWORDS_FIELD);
+} 
+
+
+void fpcorrection751(digit_t* a)
+{ // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1].
+    unsigned int i, borrow = 0;
+    digit_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], ((digit_t*)p751)[i], borrow, a[i]); 
+    }
+    mask = 0 - (digit_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, a[i], ((digit_t*)p751)[i] & mask, borrow, a[i]); 
+    }
+}
+
+
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result    
+    register digit_t al, ah, bl, bh, temp;
+    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
+
+    al = a & mask_low;                        // Low part
+    ah = a >> (sizeof(digit_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(digit_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                   // C00
+
+    res1 = albl >> (sizeof(digit_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;  
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(digit_t) * 4);
+    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   
+
+    res1 = ahbl >> (sizeof(digit_t) * 4);
+    res2 = albh >> (sizeof(digit_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                   // C10 
+    carry = temp & mask_high; 
+    c[1] ^= (ahbh & mask_high) + carry;       // C11
+}
+
+
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
+    unsigned int i, j;
+    digit_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+    
+    for (i = 0; i < nwords; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+
+    for (i = nwords; i < 2*nwords-1; i++) {
+        for (j = i-nwords+1; j < nwords; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]); 
+            ADDC(0, UV[0], v, carry, v); 
+            ADDC(carry, UV[1], u, carry, u); 
+            t += carry;
+        }
+        c[i] = v;
+        v = u; 
+        u = t;
+        t = 0;
+    }
+    c[2*nwords-1] = v; 
+}
+
+
+void rdc_mont(digit_t* ma, digit_t* mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
+  // mc = ma*R^-1 mod p751x2, where R = 2^768.
+  // If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = p751_ZERO_WORDS;
+    digit_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-p751_ZERO_WORDS+1)) { 
+                MUL(mc[j], ((digit_t*)p751p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry; 
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }    
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) { 
+                MUL(mc[j], ((digit_t*)p751p1)[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v); 
+                ADDC(carry, UV[1], u, carry, u); 
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v); 
+        ADDC(carry, u, 0, carry, u); 
+        t += carry; 
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); 
+    mc[NWORDS_FIELD-1] = v;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/config.h b/SIKE_sw/src/config.h
new file mode 100644
index 0000000..4f8d368
--- /dev/null
+++ b/SIKE_sw/src/config.h
@@ -0,0 +1,271 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: configuration file and platform-dependent macros
+*********************************************************************************************/
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+// Definition of operating system
+
+#define OS_WIN   1
+#define OS_LINUX 2
+
+#if defined(__WINDOWS__)  // Microsoft Windows OS
+#define OS_TARGET OS_WIN
+#elif defined(__LINUX__)  // Linux OS
+#define OS_TARGET OS_LINUX
+#else
+#error-- "Unsupported OS"
+#endif
+
+#if (OS_TARGET == OS_LINUX)
+#define ALIGN_FOOTER(N) __attribute__((aligned(N)))
+#else
+#define ALIGN_FOOTER(N)
+#endif
+
+// Definition of compiler
+
+#define COMPILER_VC    1
+#define COMPILER_GCC   2
+#define COMPILER_CLANG 3
+
+#if defined(_MSC_VER) // Microsoft Visual C compiler
+#define COMPILER COMPILER_VC
+#elif defined(__GNUC__) // GNU GCC compiler
+#define COMPILER COMPILER_GCC
+#elif defined(__clang__) // Clang compiler
+#define COMPILER COMPILER_CLANG
+#else
+#error-- "Unsupported COMPILER"
+#endif
+
+// Definition of the targeted architecture and basic data types
+
+#define TARGET_AMD64 1
+#define TARGET_ARM64 2
+#define TARGET_x86   3
+
+#if defined(_AMD64_)
+#define TARGET TARGET_AMD64
+#define RADIX      64
+#define LOG2RADIX   6
+typedef uint64_t digit_t;  // Unsigned 64-bit digit
+#elif defined(_ARM64_)
+#define TARGET TARGET_ARM64
+#define RADIX      64
+#define LOG2RADIX   6
+typedef uint64_t digit_t;  // Unsigned 64-bit digit
+#elif defined(_X86_)
+#define TARGET TARGET_x86
+#define RADIX      32
+#define LOG2RADIX   5
+typedef uint32_t digit_t;  // Unsigned 32-bit digit
+#else
+#error-- "Unsupported ARCHITECTURE"
+#endif
+
+#define RADIX64 64
+
+// Selection of generic, portable implementation
+
+#if defined(_GENERIC_)
+#define GENERIC_IMPLEMENTATION
+#elif defined(_FAST_)
+#define FAST_IMPLEMENTATION
+#endif
+
+// Extended datatype support
+
+#if defined(GENERIC_IMPLEMENTATION)
+typedef uint64_t uint128_t[2];
+#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG)
+#define UINT128_SUPPORT
+typedef unsigned uint128_t __attribute__((mode(TI)));
+#elif (TARGET == TARGET_ARM64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG)
+#define UINT128_SUPPORT
+typedef unsigned uint128_t __attribute__((mode(TI)));
+#elif (TARGET == TARGET_AMD64) && (OS_TARGET == OS_WIN && COMPILER == COMPILER_VC)
+#define SCALAR_INTRIN_SUPPORT
+typedef uint64_t uint128_t[2];
+#else
+#error-- "Unsupported configuration"
+#endif
+
+// Macro definitions
+
+#define NBITS_TO_NBYTES(nbits) (((nbits) + 7) / 8)                                             // Conversion macro from number of bits to number of bytes
+#define NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words
+#define NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t))          // Conversion macro from number of bytes to number of computer words
+
+
+/********************** Constant-time unsigned comparisons ***********************/
+
+// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
+
+static __inline unsigned int is_digit_nonzero_ct(digit_t x)
+{ // Is x != 0?
+    return (unsigned int)((x | (0 - x)) >> (RADIX - 1));
+}
+
+static __inline unsigned int is_digit_zero_ct(digit_t x)
+{ // Is x = 0?
+    return (unsigned int)(1 ^ is_digit_nonzero_ct(x));
+}
+
+static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
+{ // Is x < y?
+    return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1));
+}
+
+/********************** Macros for platform-dependent operations **********************/
+
+#if defined(GENERIC_IMPLEMENTATION)
+
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo) \
+    digit_x_digit((multiplier), (multiplicand), &(lo));
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                                                           \
+    {                                                                                                               \
+        digit_t tempReg = (addend1) + (digit_t)(carryIn);                                                           \
+        (sumOut) = (addend2) + tempReg;                                                                             \
+        (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \
+    }
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)                                                      \
+    {                                                                                                                      \
+        digit_t tempReg = (minuend) - (subtrahend);                                                                        \
+        unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn)&is_digit_zero_ct(tempReg))); \
+        (differenceOut) = tempReg - (digit_t)(borrowIn);                                                                   \
+        (borrowOut) = borrowReg;                                                                                           \
+    }
+
+// Shift right with flexible datatype
+#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
+    (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift)));
+
+// Shift left with flexible datatype
+#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
+    (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift)));
+
+// 64x64-bit multiplication
+#define MUL128(multiplier, multiplicand, product) \
+    mp_mul((digit_t *)&(multiplier), (digit_t *)&(multiplicand), (digit_t *)&(product), NWORDS_FIELD / 2);
+
+// 128-bit addition, inputs < 2^127
+#define ADD128(addend1, addend2, addition) \
+    mp_add((digit_t *)(addend1), (digit_t *)(addend2), (digit_t *)(addition), NWORDS_FIELD);
+
+// 128-bit addition with output carry
+#define ADC128(addend1, addend2, carry, addition) \
+    (carry) = mp_add((digit_t *)(addend1), (digit_t *)(addend2), (digit_t *)(addition), NWORDS_FIELD);
+
+#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN)
+
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo) \
+    (lo) = _umul128((multiplier), (multiplicand), (hi));
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
+    (carryOut) = _addcarry_u64((carryIn), (addend1), (addend2), &(sumOut));
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
+    (borrowOut) = _subborrow_u64((borrowIn), (minuend), (subtrahend), &(differenceOut));
+
+// Digit shift right
+#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
+    (shiftOut) = __shiftright128((lowIn), (highIn), (shift));
+
+// Digit shift left
+#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
+    (shiftOut) = __shiftleft128((lowIn), (highIn), (shift));
+
+// 64x64-bit multiplication
+#define MUL128(multiplier, multiplicand, product) \
+    (product)[0] = _umul128((multiplier), (multiplicand), &(product)[1]);
+
+// 128-bit addition, inputs < 2^127
+#define ADD128(addend1, addend2, addition)                                                  \
+    {                                                                                       \
+        unsigned char carry = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \
+        _addcarry_u64(carry, (addend1)[1], (addend2)[1], &(addition)[1]);                   \
+    }
+
+// 128-bit addition with output carry
+#define ADC128(addend1, addend2, carry, addition)                           \
+    (carry) = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \
+    (carry) = _addcarry_u64((carry), (addend1)[1], (addend2)[1], &(addition)[1]);
+
+// 128-bit subtraction, subtrahend < 2^127
+#define SUB128(minuend, subtrahend, difference)                                                    \
+    {                                                                                              \
+        unsigned char borrow = _subborrow_u64(0, (minuend)[0], (subtrahend)[0], &(difference)[0]); \
+        _subborrow_u64(borrow, (minuend)[1], (subtrahend)[1], &(difference)[1]);                   \
+    }
+
+// 128-bit right shift, max. shift value is 64
+#define SHIFTR128(Input, shift, shiftOut)                             \
+    (shiftOut)[0] = __shiftright128((Input)[0], (Input)[1], (shift)); \
+    (shiftOut)[1] = (Input)[1] >> (shift);
+
+// 128-bit left shift, max. shift value is 64
+#define SHIFTL128(Input, shift, shiftOut)                            \
+    (shiftOut)[1] = __shiftleft128((Input)[0], (Input)[1], (shift)); \
+    (shiftOut)[0] = (Input)[0] << (shift);
+
+#define MULADD128(multiplier, multiplicand, addend, carry, result) \
+    ;                                                              \
+    {                                                              \
+        uint128_t product;                                         \
+        MUL128(multiplier, multiplicand, product);                 \
+        ADC128(addend, product, carry, result);                    \
+    }
+
+#elif ((TARGET == TARGET_AMD64 || TARGET == TARGET_ARM64) && OS_TARGET == OS_LINUX)
+
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo)                                    \
+    {                                                                            \
+        uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \
+        *(hi) = (digit_t)(tempReg >> RADIX);                                     \
+        (lo) = (digit_t)tempReg;                                                 \
+    }
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                                       \
+    {                                                                                           \
+        uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \
+        (carryOut) = (digit_t)(tempReg >> RADIX);                                               \
+        (sumOut) = (digit_t)tempReg;                                                            \
+    }
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)                               \
+    {                                                                                               \
+        uint128_t tempReg = (uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \
+        (borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t) * 8 - 1));                            \
+        (differenceOut) = (digit_t)tempReg;                                                         \
+    }
+
+// Digit shift right
+#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
+    (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (RADIX - (shift)));
+
+// Digit shift left
+#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
+    (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift)));
+
+#endif
+
+#endif
diff --git a/SIKE_sw/src/ec_isogeny.c b/SIKE_sw/src/ec_isogeny.c
new file mode 100644
index 0000000..7a0043e
--- /dev/null
+++ b/SIKE_sw/src/ec_isogeny.c
@@ -0,0 +1,416 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: Elliptic curve and isogeny functions
+*********************************************************************************************/
+
+
+void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
+{ // Doubling of a Montgomery point in projective coordinates (X:Z).
+  // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
+  // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
+    f2elm_t t0, t1;
+    
+    mp2_sub_p2(P->X, P->Z, t0);                     // t0 = X1-Z1
+    mp2_add(P->X, P->Z, t1);                        // t1 = X1+Z1
+    fp2sqr_mont(t0, t0);                            // t0 = (X1-Z1)^2 
+    fp2sqr_mont(t1, t1);                            // t1 = (X1+Z1)^2 
+    fp2mul_mont(C24, t0, Q->Z);                     // Z2 = C24*(X1-Z1)^2   
+    fp2mul_mont(t1, Q->Z, Q->X);                    // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
+    mp2_sub_p2(t1, t0, t1);                         // t1 = (X1+Z1)^2-(X1-Z1)^2 
+    fp2mul_mont(A24plus, t1, t0);                   // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
+    mp2_add(Q->Z, t0, Q->Z);                        // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
+    fp2mul_mont(Q->Z, t1, Q->Z);                    // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
+}
+
+
+void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, const int e)
+{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
+  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
+  // Output: projective Montgomery x-coordinates Q <- (2^e)*P.
+    int i;
+    
+    copy_words((digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD);
+
+    for (i = 0; i < e; i++) {
+        xDBL(Q, Q, A24plus, C24);
+    }
+}
+
+#if (OALICE_BITS % 2 == 1)
+
+void get_2_isog(const point_proj_t P, f2elm_t A, f2elm_t C)
+{ // Computes the corresponding 2-isogeny of a projective Montgomery point (X2:Z2) of order 2.
+  // Input:  projective point of order two P = (X2:Z2).
+  // Output: the 2-isogenous Montgomery curve with projective coefficients A/C.
+
+    fp2sqr_mont(P->X, A);                           // A = X2^2
+    fp2sqr_mont(P->Z, C);                           // C = Z2^2
+    mp2_sub_p2(C, A, A);                            // A = Z2^2 - X2^2
+}
+
+
+void eval_2_isog(point_proj_t P, point_proj_t Q)
+{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 2-isogeny phi.
+  // Inputs: the projective point P = (X:Z) and the 2-isogeny kernel projetive point Q = (X2:Z2).
+  // Output: the projective point P = phi(P) = (X:Z) in the codomain. 
+    f2elm_t t0, t1, t2, t3;    
+
+    mp2_add(Q->X, Q->Z, t0);                        // t0 = X2+Z2
+    mp2_sub_p2(Q->X, Q->Z, t1);                     // t1 = X2-Z2
+    mp2_add(P->X, P->Z, t2);                        // t2 = X+Z
+    mp2_sub_p2(P->X, P->Z, t3);                     // t3 = X-Z
+    fp2mul_mont(t0, t3, t0);                        // t0 = (X2+Z2)*(X-Z)
+    fp2mul_mont(t1, t2, t1);                        // t1 = (X2-Z2)*(X+Z)
+    mp2_add(t0, t1, t2);                            // t2 = (X2+Z2)*(X-Z) + (X2-Z2)*(X+Z)   
+    mp2_sub_p2(t0, t1, t3);                         // t3 = (X2+Z2)*(X-Z) - (X2-Z2)*(X+Z)
+    fp2mul_mont(P->X, t2, P->X);                    // Xfinal
+    fp2mul_mont(P->Z, t3, P->Z);                    // Zfinal
+}
+
+#endif
+
+void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
+{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
+  // Input:  projective point of order four P = (X4:Z4).
+  // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients 
+  //         that are used to evaluate the isogeny at a point in eval_4_isog().
+    mp2_sub_p2(P->X, P->Z, coeff[1]);               // coeff[1] = X4-Z4
+    mp2_add(P->X, P->Z, coeff[2]);                  // coeff[2] = X4+Z4
+    fp2sqr_mont(P->Z, coeff[0]);                    // coeff[0] = Z4^2
+    mp2_add(coeff[0], coeff[0], coeff[0]);          // coeff[0] = 2*Z4^2
+    fp2sqr_mont(coeff[0], C24);                     // C24 = 4*Z4^4
+    mp2_add(coeff[0], coeff[0], coeff[0]);          // coeff[0] = 4*Z4^2
+    fp2sqr_mont(P->X, A24plus);                     // A24plus = X4^2
+    mp2_add(A24plus, A24plus, A24plus);             // A24plus = 2*X4^2
+    fp2sqr_mont(A24plus, A24plus);                  // A24plus = 4*X4^4
+}
+
+
+void eval_4_isog(point_proj_t P, f2elm_t* coeff)
+{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined 
+  // by the 3 coefficients in coeff (computed in the function get_4_isog()).
+  // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
+  // Output: the projective point P = phi(P) = (X:Z) in the codomain.
+    f2elm_t t0, t1;
+    
+    mp2_add(P->X, P->Z, t0);                        // t0 = X+Z
+    mp2_sub_p2(P->X, P->Z, t1);                     // t1 = X-Z
+    fp2mul_mont(t0, coeff[1], P->X);                // X = (X+Z)*coeff[1]
+    fp2mul_mont(t1, coeff[2], P->Z);                // Z = (X-Z)*coeff[2]
+    fp2mul_mont(t0, t1, t0);                        // t0 = (X+Z)*(X-Z)
+    fp2mul_mont(coeff[0], t0, t0);                  // t0 = coeff[0]*(X+Z)*(X-Z)
+    mp2_add(P->X, P->Z, t1);                        // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
+    mp2_sub_p2(P->X, P->Z, P->Z);                   // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
+    fp2sqr_mont(t1, t1);                            // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+    fp2sqr_mont(P->Z, P->Z);                        // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
+    mp2_add(t1, t0, P->X);                          // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+    mp2_sub_p2(P->Z, t0, t0);                       // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
+    fp2mul_mont(P->X, t1, P->X);                    // Xfinal
+    fp2mul_mont(P->Z, t0, P->Z);                    // Zfinal
+}
+
+
+void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)              
+{ // Tripling of a Montgomery point in projective coordinates (X:Z).
+  // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+  // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). 
+    f2elm_t t0, t1, t2, t3, t4, t5, t6;
+                                    
+    mp2_sub_p2(P->X, P->Z, t0);                     // t0 = X-Z 
+    fp2sqr_mont(t0, t2);                            // t2 = (X-Z)^2           
+    mp2_add(P->X, P->Z, t1);                        // t1 = X+Z 
+    fp2sqr_mont(t1, t3);                            // t3 = (X+Z)^2
+    mp2_add(P->X, P->X, t4);                        // t4 = 2*X
+    mp2_add(P->Z, P->Z, t0);                        // t0 = 2*Z 
+    fp2sqr_mont(t4, t1);                            // t1 = 4*X^2
+    mp2_sub_p2(t1, t3, t1);                         // t1 = 4*X^2 - (X+Z)^2 
+    mp2_sub_p2(t1, t2, t1);                         // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
+    fp2mul_mont(A24plus, t3, t5);                   // t5 = A24plus*(X+Z)^2 
+    fp2mul_mont(t3, t5, t3);                        // t3 = A24plus*(X+Z)^4
+    fp2mul_mont(A24minus, t2, t6);                  // t6 = A24minus*(X-Z)^2
+    fp2mul_mont(t2, t6, t2);                        // t2 = A24minus*(X-Z)^4
+    mp2_sub_p2(t2, t3, t3);                         // t3 = A24minus*(X-Z)^4 - A24plus*(X+Z)^4
+    mp2_sub_p2(t5, t6, t2);                         // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
+    fp2mul_mont(t1, t2, t1);                        // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+    fp2add(t3, t1, t2);                             // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^4 - A24plus*(X+Z)^4
+    fp2sqr_mont(t2, t2);                            // t2 = t2^2
+    fp2mul_mont(t4, t2, Q->X);                      // X3 = 2*X*t2
+    fp2sub(t3, t1, t1);                             // t1 = A24minus*(X-Z)^4 - A24plus*(X+Z)^4 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+    fp2sqr_mont(t1, t1);                            // t1 = t1^2
+    fp2mul_mont(t0, t1, Q->Z);                      // Z3 = 2*Z*t1
+}
+
+
+void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, const int e)
+{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
+  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+  // Output: projective Montgomery x-coordinates Q <- (3^e)*P.
+    int i;
+        
+    copy_words((digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD);
+
+    for (i = 0; i < e; i++) {
+        xTPL(Q, Q, A24minus, A24plus);
+    }
+}
+
+
+void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
+{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
+  // Input:  projective point of order three P = (X3:Z3).
+  // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. 
+    f2elm_t t0, t1, t2, t3, t4;
+    
+    mp2_sub_p2(P->X, P->Z, coeff[0]);               // coeff0 = X-Z
+    fp2sqr_mont(coeff[0], t0);                      // t0 = (X-Z)^2
+    mp2_add(P->X, P->Z, coeff[1]);                  // coeff1 = X+Z
+    fp2sqr_mont(coeff[1], t1);                      // t1 = (X+Z)^2
+    mp2_add(P->X, P->X, t3);                        // t3 = 2*X
+    fp2sqr_mont(t3, t3);                            // t3 = 4*X^2 
+    fp2sub(t3, t0, t2);                             // t2 = 4*X^2 - (X-Z)^2 
+    fp2sub(t3, t1, t3);                             // t3 = 4*X^2 - (X+Z)^2
+    mp2_add(t0, t3, t4);                            // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 
+    mp2_add(t4, t4, t4);                            // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) 
+    mp2_add(t1, t4, t4);                            // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
+    fp2mul_mont(t2, t4, A24minus);                  // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
+    mp2_add(t1, t2, t4);                            // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
+    mp2_add(t4, t4, t4);                            // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) 
+    mp2_add(t0, t4, t4);                            // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
+    fp2mul_mont(t3, t4, A24plus);                   // A24plus = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
+}
+
+
+void eval_3_isog(point_proj_t Q, const f2elm_t* coeff)
+{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and 
+  // a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
+  // Inputs: projective points P = (X3:Z3) and Q = (X:Z).
+  // Output: the projective point Q <- phi(Q) = (X3:Z3).
+    f2elm_t t0, t1, t2;
+     
+    mp2_add(Q->X, Q->Z, t0);                      // t0 = X+Z
+    mp2_sub_p2(Q->X, Q->Z, t1);                   // t1 = X-Z
+    fp2mul_mont(coeff[0], t0, t0);                // t0 = coeff0*(X+Z)
+    fp2mul_mont(coeff[1], t1, t1);                // t1 = coeff1*(X-Z)
+    mp2_add(t0, t1, t2);                          // t2 = coeff0*(X+Z) + coeff1*(X-Z)
+    mp2_sub_p2(t1, t0, t0);                       // t0 = coeff1*(X-Z) - coeff0*(X+Z)
+    fp2sqr_mont(t2, t2);                          // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
+    fp2sqr_mont(t0, t0);                          // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
+    fp2mul_mont(Q->X, t2, Q->X);                  // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2     
+    fp2mul_mont(Q->Z, t0, Q->Z);                  // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
+}
+
+
+void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
+{ // 3-way simultaneous inversion
+  // Input:  z1,z2,z3
+  // Output: 1/z1,1/z2,1/z3 (override inputs).
+    f2elm_t t0, t1, t2;
+
+    fp2mul_mont(z1, z2, t0);                      // t0 = z1*z2
+    fp2mul_mont(z3, t0, t1);                      // t1 = z1*z2*z3
+    fp2inv_mont(t1);                              // t1 = 1/(z1*z2*z3)
+    fp2mul_mont(z3, t1, t2);                      // t2 = 1/(z1*z2)
+    fp2mul_mont(t0, t1, z3);                      // z3 = 1/z3
+    fp2mul_mont(t2, z2, t0);                      // z1 = 1/z1
+    fp2mul_mont(t2, z1, z2);                      // z2 = 1/z2
+    fp2copy(t0, z1);
+}
+
+
+void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
+{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+  // Input:  the x-coordinates xP, xQ, and xR of the points P, Q and R.
+  // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. 
+    f2elm_t t0, t1, one = {0};
+    
+    fpcopy((digit_t*)&Montgomery_one, one[0]);
+    fp2add(xP, xQ, t1);                           // t1 = xP+xQ
+    fp2mul_mont(xP, xQ, t0);                      // t0 = xP*xQ
+    fp2mul_mont(xR, t1, A);                       // A = xR*t1
+    fp2add(t0, A, A);                             // A = A+t0
+    fp2mul_mont(t0, xR, t0);                      // t0 = t0*xR
+    fp2sub(A, one, A);                            // A = A-1
+    fp2add(t0, t0, t0);                           // t0 = t0+t0
+    fp2add(t1, xR, t1);                           // t1 = t1+xR
+    fp2add(t0, t0, t0);                           // t0 = t0+t0
+    fp2sqr_mont(A, A);                            // A = A^2
+    fp2inv_mont(t0);                              // t0 = 1/t0
+    fp2mul_mont(A, t0, A);                        // A = A*t0
+    fp2sub(A, t1, A);                             // Afinal = A-t1
+}
+
+
+void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
+{ // Computes the j-invariant of a Montgomery curve with projective constant.
+  // Input: A,C in GF(p^2).
+  // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.   
+    f2elm_t t0, t1;
+    
+    fp2sqr_mont(A, jinv);                           // jinv = A^2        
+    fp2sqr_mont(C, t1);                             // t1 = C^2
+    fp2add(t1, t1, t0);                             // t0 = t1+t1
+    fp2sub(jinv, t0, t0);                           // t0 = jinv-t0
+    fp2sub(t0, t1, t0);                             // t0 = t0-t1
+    fp2sub(t0, t1, jinv);                           // jinv = t0-t1
+    fp2sqr_mont(t1, t1);                            // t1 = t1^2
+    fp2mul_mont(jinv, t1, jinv);                    // jinv = jinv*t1
+    fp2add(t0, t0, t0);                             // t0 = t0+t0
+    fp2add(t0, t0, t0);                             // t0 = t0+t0
+    fp2sqr_mont(t0, t1);                            // t1 = t0^2
+    fp2mul_mont(t0, t1, t0);                        // t0 = t0*t1
+    fp2add(t0, t0, t0);                             // t0 = t0+t0
+    fp2add(t0, t0, t0);                             // t0 = t0+t0
+    fp2inv_mont(jinv);                              // jinv = 1/jinv 
+    fp2mul_mont(jinv, t0, jinv);                    // jinv = t0*jinv
+}
+
+
+void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24)
+{ // Simultaneous doubling and differential addition.
+  // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
+  // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. 
+    f2elm_t t0, t1, t2;
+
+    mp2_add(P->X, P->Z, t0);                        // t0 = XP+ZP
+    mp2_sub_p2(P->X, P->Z, t1);                     // t1 = XP-ZP
+    fp2sqr_mont(t0, P->X);                          // XP = (XP+ZP)^2
+    mp2_sub_p2(Q->X, Q->Z, t2);                     // t2 = XQ-ZQ
+    mp2_add(Q->X, Q->Z, Q->X);                      // XQ = XQ+ZQ
+    fp2mul_mont(t0, t2, t0);                        // t0 = (XP+ZP)*(XQ-ZQ)
+    fp2sqr_mont(t1, P->Z);                          // ZP = (XP-ZP)^2
+    fp2mul_mont(t1, Q->X, t1);                      // t1 = (XP-ZP)*(XQ+ZQ)
+    mp2_sub_p2(P->X, P->Z, t2);                     // t2 = (XP+ZP)^2-(XP-ZP)^2
+    fp2mul_mont(P->X, P->Z, P->X);                  // XP = (XP+ZP)^2*(XP-ZP)^2
+    fp2mul_mont(A24, t2, Q->X);                     // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
+    mp2_sub_p2(t0, t1, Q->Z);                       // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
+    mp2_add(Q->X, P->Z, P->Z);                      // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
+    mp2_add(t0, t1, Q->X);                          // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
+    fp2mul_mont(P->Z, t2, P->Z);                    // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
+    fp2sqr_mont(Q->Z, Q->Z);                        // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+    fp2sqr_mont(Q->X, Q->X);                        // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
+    fp2mul_mont(Q->Z, XPQ, Q->Z);                   // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+    fp2mul_mont(Q->X, ZPQ, Q->X);                   // XQ = ZPQ*[(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2            
+}
+
+
+static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option)
+{ // Swap points.
+  // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+    digit_t temp;
+    unsigned int i;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        temp = option & (P->X[0][i] ^ Q->X[0][i]);
+        P->X[0][i] = temp ^ P->X[0][i]; 
+        Q->X[0][i] = temp ^ Q->X[0][i];  
+        temp = option & (P->X[1][i] ^ Q->X[1][i]);
+        P->X[1][i] = temp ^ P->X[1][i]; 
+        Q->X[1][i] = temp ^ Q->X[1][i];
+        temp = option & (P->Z[0][i] ^ Q->Z[0][i]);
+        P->Z[0][i] = temp ^ P->Z[0][i]; 
+        Q->Z[0][i] = temp ^ Q->Z[0][i];
+        temp = option & (P->Z[1][i] ^ Q->Z[1][i]);
+        P->Z[1][i] = temp ^ P->Z[1][i]; 
+        Q->Z[1][i] = temp ^ Q->Z[1][i]; 
+    }
+}
+
+
+static void LADDER3PT(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t A)
+{
+    point_proj_t R0 = {0}, R2 = {0};
+    f2elm_t A24 = {0};
+    digit_t mask;
+    int i, nbits, bit, swap, prevbit = 0;
+
+    if (AliceOrBob == ALICE) {
+        nbits = OALICE_BITS;
+    } else {
+        nbits = OBOB_BITS - 1;
+    }
+
+    // Initializing constant
+    fpcopy((digit_t*)&Montgomery_one, A24[0]);
+    mp2_add(A24, A24, A24);
+    mp2_add(A, A24, A24);
+    fp2div2(A24, A24);  
+    fp2div2(A24, A24);  // A24 = (A+2)/4
+
+    // Initializing points
+    fp2copy(xQ, R0->X);
+    fpcopy((digit_t*)&Montgomery_one, (digit_t*)R0->Z);
+    fp2copy(xPQ, R2->X);
+    fpcopy((digit_t*)&Montgomery_one, (digit_t*)R2->Z);
+    fp2copy(xP, R->X);
+    fpcopy((digit_t*)&Montgomery_one, (digit_t*)R->Z);
+    fpzero((digit_t*)(R->Z)[1]);
+
+    // Main loop
+    for (i = 0; i < nbits; i++) {
+        bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1;
+        swap = bit ^ prevbit;
+        prevbit = bit;
+        mask = 0 - (digit_t)swap;
+
+        swap_points(R, R2, mask);
+        xDBLADD(R0, R2, R->X, R->Z, A24);
+    }
+    swap = 0 ^ prevbit;
+    mask = 0 - (digit_t)swap;
+    swap_points(R, R2, mask);
+}
+
+
+void TraverseTree(f2elm_t jinv, point_proj_t R, f2elm_t A24plus, f2elm_t C24, const unsigned int *strat, unsigned int lenstrat, bool keygen,
+                  point_proj_t phiP, point_proj_t phiQ, point_proj_t phiR) 
+{ // Isogeny tree traversal
+    point_proj_t pts[MAX_INT_POINTS_ALICE];
+    f2elm_t coeff[3];
+    unsigned int i, m, row, ii = 0, index = 0, npts = 0, pts_index[MAX_INT_POINTS_ALICE];
+
+    for (row = 1; row < lenstrat; row++) {
+        while (index < lenstrat - row) {
+            fp2copy(R->X, pts[npts]->X);
+            fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = strat[ii++];
+            xDBLe(R, R, A24plus, C24, (int)(2*m));
+            index += m;
+        }
+        get_4_isog(R, A24plus, C24, coeff);
+        if (keygen) {
+            eval_4_isog(phiP, coeff);
+            eval_4_isog(phiQ, coeff);
+            eval_4_isog(phiR, coeff);
+        }
+
+        for (i = 0; i < npts; i++) {
+            eval_4_isog(pts[i], coeff);
+        }
+
+        fp2copy(pts[npts-1]->X, R->X);
+        fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+    get_4_isog(R, A24plus, C24, coeff);
+    if (keygen) {
+        eval_4_isog(phiP, coeff);
+        eval_4_isog(phiQ, coeff);
+        eval_4_isog(phiR, coeff);
+        inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+        fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+        fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+        fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+    } else {
+        fp2add(A24plus, A24plus, A24plus);
+        fp2sub(A24plus, C24, A24plus);
+        fp2add(A24plus, A24plus, A24plus);
+        j_inv(A24plus, C24, jinv);
+    }
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/fpx.c b/SIKE_sw/src/fpx.c
new file mode 100644
index 0000000..1b8e070
--- /dev/null
+++ b/SIKE_sw/src/fpx.c
@@ -0,0 +1,1103 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: Core functions over GF(p) and GF(p^2)
+*********************************************************************************************/
+
+
+int8_t ct_compare(const uint8_t *a, const uint8_t *b, unsigned int len) 
+{ // Compare two byte arrays in constant time.
+  // Returns 0 if the byte arrays are equal, -1 otherwise.
+    uint8_t r = 0;
+
+    for (unsigned int i = 0; i < len; i++)
+        r |= a[i] ^ b[i];
+    
+    return (int8_t)((-(int32_t)r) >> (8*sizeof(uint32_t)-1));
+}
+
+
+void ct_cmov(uint8_t *r, const uint8_t *a, unsigned int len, int8_t selector) 
+{ // Conditional move in constant time.
+  // If selector = -1 then load r with a, else if selector = 0 then keep r.
+
+    for (unsigned int i = 0; i < len; i++)
+        r[i] ^= selector & (a[i] ^ r[i]);
+}
+
+
+__inline static void encode_to_bytes(const digit_t* x, unsigned char* enc, int nbytes)
+{ // Encoding digits to bytes according to endianness
+#ifdef _BIG_ENDIAN_
+    int ndigits = nbytes / sizeof(digit_t);
+    int rem = nbytes % sizeof(digit_t);
+
+    for (int i = 0; i < ndigits; i++)
+        ((digit_t*)enc)[i] = BSWAP_DIGIT(x[i]);
+    if (rem) {
+        digit_t ld = BSWAP_DIGIT(x[ndigits]);
+        memcpy(enc + ndigits*sizeof(digit_t), (unsigned char*)&ld, rem);
+    }
+#else    
+    memcpy(enc, (const unsigned char*)x, nbytes);
+#endif
+}
+
+
+__inline static void decode_to_digits(const unsigned char* x, digit_t* dec, int nbytes, int ndigits)
+{ // Decoding bytes to digits according to endianness
+
+    dec[ndigits - 1] = 0;
+    memcpy((unsigned char*)dec, x, nbytes);
+#ifdef _BIG_ENDIAN_
+    for (int i = 0; i < ndigits; i++)
+        dec[i] = BSWAP_DIGIT(dec[i]);
+#endif
+}
+
+
+static void fp2_encode(const f2elm_t x, unsigned char *enc)
+{ // Conversion of GF(p^2) element from Montgomery to standard representation, and encoding by removing leading 0 bytes
+    f2elm_t t;
+
+    from_fp2mont(x, t);
+    encode_to_bytes(t[0], enc, FP2_ENCODED_BYTES / 2);
+    encode_to_bytes(t[1], enc + FP2_ENCODED_BYTES / 2, FP2_ENCODED_BYTES / 2);
+}
+
+
+static void fp2_decode(const unsigned char *x, f2elm_t dec)
+{ // Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation
+
+    decode_to_digits(x, dec[0], FP2_ENCODED_BYTES / 2, NWORDS_FIELD);
+    decode_to_digits(x + FP2_ENCODED_BYTES / 2, dec[1], FP2_ENCODED_BYTES / 2, NWORDS_FIELD);
+    to_fp2mont(dec, dec);
+}
+
+
+__inline void fpcopy(const felm_t a, felm_t c)
+{ // Copy a field element, c = a.
+    unsigned int i;
+
+    for (i = 0; i < NWORDS_FIELD; i++)
+        c[i] = a[i];
+}
+
+
+__inline void fpzero(felm_t a)
+{ // Zero a field element, a = 0.
+    unsigned int i;
+
+    for (i = 0; i < NWORDS_FIELD; i++)
+        a[i] = 0;
+}
+
+
+void to_mont(const felm_t a, felm_t mc)
+{   // Conversion to Montgomery representation,
+    // mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1].
+    // The Montgomery constant R^2 mod p is the global value "Montgomery_R2".
+
+    fpmul_mont(a, (digit_t *)&Montgomery_R2, mc);
+}
+
+
+void from_mont(const felm_t ma, felm_t c)
+{   // Conversion from Montgomery representation to standard representation,
+    // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
+    digit_t one[NWORDS_FIELD] = {0};
+
+    one[0] = 1;
+    fpmul_mont(ma, one, c);
+    fpcorrection(c);
+}
+
+
+void copy_words(const digit_t *a, digit_t *c, const unsigned int nwords)
+{ // Copy wordsize digits, c = a, where lng(a) = nwords.
+    unsigned int i;
+
+    for (i = 0; i < nwords; i++)
+        c[i] = a[i];
+}
+
+
+void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
+{ // Multiprecision multiplication, c = a*b mod p.
+    dfelm_t temp = {0};
+
+    mp_mul(ma, mb, temp, NWORDS_FIELD);
+    rdc_mont(temp, mc);
+}
+
+
+void fpsqr_mont(const felm_t ma, felm_t mc)
+{ // Multiprecision squaring, c = a^2 mod p.
+    dfelm_t temp = {0};
+
+    mp_mul(ma, ma, temp, NWORDS_FIELD);
+    rdc_mont(temp, mc);
+}
+
+
+void fpinv_mont(felm_t a)
+{ // Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
+    felm_t tt;
+
+    fpcopy(a, tt);
+    fpinv_chain_mont(tt);
+    fpsqr_mont(tt, tt);
+    fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, a);
+}
+
+
+void fp2copy(const f2elm_t a, f2elm_t c)
+{ // Copy a GF(p^2) element, c = a.
+    fpcopy(a[0], c[0]);
+    fpcopy(a[1], c[1]);
+}
+
+
+void fp2zero(f2elm_t a)
+{ // Zero a GF(p^2) element, a = 0.
+    fpzero(a[0]);
+    fpzero(a[1]);
+}
+
+
+void fp2neg(f2elm_t a)
+{ // GF(p^2) negation, a = -a in GF(p^2).
+    fpneg(a[0]);
+    fpneg(a[1]);
+}
+
+
+__inline void fp2add(const f2elm_t a, const f2elm_t b, f2elm_t c)
+{ // GF(p^2) addition, c = a+b in GF(p^2).
+    fpadd(a[0], b[0], c[0]);
+    fpadd(a[1], b[1], c[1]);
+}
+
+__inline void fp2sub(const f2elm_t a, const f2elm_t b, f2elm_t c)
+{ // GF(p^2) subtraction, c = a-b in GF(p^2).
+    fpsub(a[0], b[0], c[0]);
+    fpsub(a[1], b[1], c[1]);
+}
+
+
+void fp2div2(const f2elm_t a, f2elm_t c)
+{ // GF(p^2) division by two, c = a/2  in GF(p^2).
+    fpdiv2(a[0], c[0]);
+    fpdiv2(a[1], c[1]);
+}
+
+
+void fp2correction(f2elm_t a)
+{ // Modular correction, a = a in GF(p^2).
+    fpcorrection(a[0]);
+    fpcorrection(a[1]);
+}
+
+
+__inline static void mp_addfast(const digit_t *a, const digit_t *b, digit_t *c)
+{ // Multiprecision addition, c = a+b.
+#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION)
+
+    mp_add(a, b, c, NWORDS_FIELD);
+
+#elif (OS_TARGET == OS_LINUX)
+
+    mp_add_asm(a, b, c);
+
+#endif
+}
+
+
+__inline static void mp2_add(const f2elm_t a, const f2elm_t b, f2elm_t c)       
+{ // GF(p^2) addition without correction, c = a+b in GF(p^2). 
+    mp_addfast(a[0], b[0], c[0]);
+    mp_addfast(a[1], b[1], c[1]);
+}
+
+
+__inline static void mp2_sub_p2(const f2elm_t a, const f2elm_t b, f2elm_t c)       
+{ // GF(p^2) subtraction with correction with 2*p, c = a-b+2p in GF(p^2).    
+    mp_sub_p2(a[0], b[0], c[0]);  
+    mp_sub_p2(a[1], b[1], c[1]);
+}
+
+
+__inline unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
+{ // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
+    unsigned int i, carry = 0;
+        
+    for (i = 0; i < nwords; i++) {                      
+        ADDC(carry, a[i], b[i], carry, c[i]);
+    }
+
+    return carry;
+}
+
+
+void fp2sqr_mont(const f2elm_t a, f2elm_t c)
+{   // GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
+    // Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
+    // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] 
+    felm_t t1, t2, t3;
+    
+    mp_addfast(a[0], a[1], t1);                      // t1 = a0+a1 
+    sub_p4(a[0], a[1], t2);                          // t2 = a0-a1
+    mp_addfast(a[0], a[0], t3);                      // t3 = 2a0
+    fpmul_mont(t1, t2, c[0]);                        // c0 = (a0+a1)(a0-a1)
+    fpmul_mont(t3, a[1], c[1]);                      // c1 = 2a0*a1
+}
+
+
+__inline unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords)
+{ // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
+    unsigned int i, borrow = 0;
+
+    for (i = 0; i < nwords; i++)
+        SUBC(borrow, a[i], b[i], borrow, c[i]);
+
+    return borrow;
+}
+
+
+__inline static void mp_subaddfast(const digit_t* a, const digit_t* b, digit_t* c)
+{ // Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b. 
+#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION)
+    felm_t t1;
+
+    digit_t mask = 0 - (digit_t)mp_sub(a, b, c, 2*NWORDS_FIELD);
+    for (int i = 0; i < NWORDS_FIELD; i++)
+        t1[i] = ((digit_t*)PRIME)[i] & mask;
+    mp_addfast((digit_t*)&c[NWORDS_FIELD], t1, (digit_t*)&c[NWORDS_FIELD]);
+
+#elif (OS_TARGET == OS_LINUX)               
+
+    mp_subaddx2_asm(a, b, c);     
+
+#endif
+}
+
+
+__inline static void mp_dblsubfast(const digit_t *a, const digit_t *b, digit_t *c)
+{   // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
+    // Inputs should be s.t. c > a and c > b
+#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION)
+
+    mp_sub(c, a, c, 2 * NWORDS_FIELD);
+    mp_sub(c, b, c, 2 * NWORDS_FIELD);
+
+#elif (OS_TARGET == OS_LINUX)
+
+    mp_dblsubx2_asm(a, b, c);
+
+#endif
+}
+
+
+void fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c)
+{   // GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
+    // Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
+    // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
+    felm_t t1, t2;
+    dfelm_t tt1, tt2, tt3;
+
+    mp_addfast(a[0], a[1], t1);            // t1 = a0+a1
+    mp_addfast(b[0], b[1], t2);            // t2 = b0+b1
+    mp_mul(a[0], b[0], tt1, NWORDS_FIELD); // tt1 = a0*b0
+    mp_mul(a[1], b[1], tt2, NWORDS_FIELD); // tt2 = a1*b1
+    mp_mul(t1, t2, tt3, NWORDS_FIELD);     // tt3 = (a0+a1)*(b0+b1)
+    mp_dblsubfast(tt1, tt2, tt3);          // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+    mp_subaddfast(tt1, tt2, tt1);          // tt1 = a0*b0 - a1*b1 + p*2^MAXBITS_FIELD if a0*b0 - a1*b1 < 0, else tt1 = a0*b0 - a1*b1
+    rdc_mont(tt3, c[1]);                   // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 
+    rdc_mont(tt1, c[0]);                   // c[0] = a0*b0 - a1*b1
+}
+
+
+void fpinv_chain_mont(felm_t a)
+{ // Chain to compute a^(p-3)/4 using Montgomery arithmetic.
+    unsigned int i, j;
+    
+#if (NBITS_FIELD == 377)
+    felm_t t[15], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 13; i++) fpmul_mont(t[i], tt, t[i+1]);
+
+    fpcopy(t[1], tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 11; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (j = 0; j < 37; j++) {
+        for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+        fpmul_mont(t[14], tt, tt);
+    }
+    fpcopy(tt, a);    
+    
+#elif (NBITS_FIELD == 434)
+    felm_t t[31], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]);
+
+    fpcopy(a, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[23], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[21], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[19], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[25], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (j = 0; j < 35; j++) {
+        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+        fpmul_mont(t[30], tt, tt);
+    }
+    fpcopy(tt, a);   
+    
+#elif (NBITS_FIELD == 503)
+    felm_t t[15], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 13; i++) fpmul_mont(t[i], tt, t[i+1]);
+
+    fpcopy(a, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 12; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (j = 0; j < 49; j++) {
+        for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
+        fpmul_mont(t[14], tt, tt);
+    }
+    fpcopy(tt, a);   
+    
+#elif (NBITS_FIELD == 546)
+    felm_t t[31], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]);
+
+    fpcopy(t[0], tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[29], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[29], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[15], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[15], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[17], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[17], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[17], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[29], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[29], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[15], tt, tt);
+    for (j = 0; j < 45; j++) {
+        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+        fpmul_mont(t[30], tt, tt);
+    }
+    fpcopy(tt, a); 
+
+#elif (NBITS_FIELD == 610)
+    felm_t t[31], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]);
+
+    fpcopy(a, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[25], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 11; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[15], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[15], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[19], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[27], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[29], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[25], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 11; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 11; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (j = 0; j < 50; j++) {
+        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+        fpmul_mont(t[30], tt, tt);
+    }
+    fpcopy(tt, a);  
+
+#elif (NBITS_FIELD == 697)
+    felm_t t[31], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 29; i++) fpmul_mont(t[i], tt, t[i+1]);
+
+    fpcopy(t[0], tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[27], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[23], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[27], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[27], tt, tt);
+    for (i = 0; i < 12; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[27], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[25], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[27], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[19], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[29], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[27], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[21], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[17], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[30], tt, tt);
+    for (j = 0; j < 58; j++) {
+        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+        fpmul_mont(t[30], tt, tt);
+    }
+    fpcopy(tt, a);   
+
+#elif (NBITS_FIELD == 751)
+    felm_t t[27], tt;
+    
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    fpmul_mont(a, tt, t[0]);
+    fpmul_mont(t[0], tt, t[1]);
+    fpmul_mont(t[1], tt, t[2]);
+    fpmul_mont(t[2], tt, t[3]); 
+    fpmul_mont(t[3], tt, t[3]);
+    for (i = 3; i <= 8; i++) fpmul_mont(t[i], tt, t[i+1]);
+    fpmul_mont(t[9], tt, t[9]);
+    for (i = 9; i <= 20; i++) fpmul_mont(t[i], tt, t[i+1]);
+    fpmul_mont(t[21], tt, t[21]); 
+    for (i = 21; i <= 24; i++) fpmul_mont(t[i], tt, t[i+1]); 
+    fpmul_mont(t[25], tt, t[25]);
+    fpmul_mont(t[25], tt, t[26]);
+
+    fpcopy(a, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[23], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[15], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[17], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(a, tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[19], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[25], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[22], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[18], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[4], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[23], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[21], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[23], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[17], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[8], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[11], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    fpmul_mont(t[20], tt, tt);
+    for (j = 0; j < 61; j++) {
+        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+        fpmul_mont(t[26], tt, tt);
+    }
+    fpcopy(tt, a); 
+#else
+    (void)a, (void)i, (void)j;
+#endif
+}
+
+
+void fp2inv_mont(f2elm_t a)
+{ // GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
+    f2elm_t t1;
+
+    fpsqr_mont(a[0], t1[0]);    // t10 = a0^2
+    fpsqr_mont(a[1], t1[1]);    // t11 = a1^2
+    fpadd(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2
+    fpinv_mont(t1[0]);          // t10 = (a0^2+a1^2)^-1
+    fpneg(a[1]);                // a = a0-i*a1
+    fpmul_mont(a[0], t1[0], a[0]);
+    fpmul_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1
+}
+
+
+void to_fp2mont(const f2elm_t a, f2elm_t mc)
+{   // Conversion of a GF(p^2) element to Montgomery representation,
+    // mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
+
+    to_mont(a[0], mc[0]);
+    to_mont(a[1], mc[1]);
+}
+
+
+void from_fp2mont(const f2elm_t ma, f2elm_t c)
+{   // Conversion of a GF(p^2) element from Montgomery representation to standard representation,
+    // c_i = ma_i*R^(-1) = a_i in GF(p^2).
+
+    from_mont(ma[0], c[0]);
+    from_mont(ma[1], c[1]);
+}
+
+
+void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords)
+{
+    unsigned int i, j = 0;
+
+    while (shift > RADIX) {
+        j += 1;
+        shift -= RADIX;
+    }
+
+    for (i = 0; i < nwords-j; i++)
+        x[nwords - 1 - i] = x[nwords - 1 - i - j];
+    for (i = nwords-j; i < nwords; i++)
+        x[nwords-1-i] = 0;
+    if (shift != 0) {
+        for (j = nwords-1; j > 0; j--)
+            SHIFTL(x[j], x[j-1], shift, x[j], RADIX);
+        x[0] <<= shift;
+    }
+}
+
+
+void mp_shiftr1(digit_t *x, const unsigned int nwords)
+{ // Multiprecision right shift by one.
+
+    for (unsigned int i = 0; i < nwords-1; i++) {
+        SHIFTR(x[i + 1], x[i], 1, x[i], RADIX);
+    }
+    x[nwords - 1] >>= 1;
+}
+
+
+void mp_shiftl1(digit_t *x, const unsigned int nwords)
+{ // Multiprecision left shift by one.
+
+    for (int i = nwords-1; i > 0; i--) {
+        SHIFTL(x[i], x[i-1], 1, x[i], RADIX);
+    }
+    x[0] <<= 1;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/internal.h b/SIKE_sw/src/internal.h
new file mode 100644
index 0000000..7fd9da2
--- /dev/null
+++ b/SIKE_sw/src/internal.h
@@ -0,0 +1,116 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: internal header file for function definitions
+*********************************************************************************************/  
+
+#ifndef INTERNAL_H
+#define INTERNAL_H
+
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/ 
+
+// Copy wordsize digits, c = a, where lng(a) = nwords
+void copy_words(const digit_t* a, digit_t* c, const unsigned int nwords);
+
+// Compare two byte arrays in constant time
+int8_t ct_compare(const uint8_t *a, const uint8_t *b, unsigned int len) ;
+
+// Conditional move in constant time
+void ct_cmov(uint8_t *r, const uint8_t *a, unsigned int len, int8_t selector);
+
+// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit 
+unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords);
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit 
+unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords);
+
+// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b 
+void mp_subaddx2_asm(const digit_t* a, const digit_t* b, digit_t* c);
+
+// Multiprecision left shift
+void mp_shiftleft(digit_t* x, unsigned int shift, const unsigned int nwords);
+
+// Multiprecision right shift by one
+void mp_shiftr1(digit_t* x, const unsigned int nwords);
+
+// Multiprecision left right shift by one    
+void mp_shiftl1(digit_t* x, const unsigned int nwords);
+
+// Digit multiplication, digit * digit -> 2-digit result
+void digit_x_digit(const digit_t a, const digit_t b, digit_t* c); 
+
+// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.
+void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords);
+
+/************ Montgomery reduction and conversion functions *************/
+
+// Montgomery reduction, c = a mod p
+void rdc_mont(digit_t* a, digit_t* c);
+
+// Conversion to Montgomery representation
+void to_mont(const digit_t* a, digit_t* mc);
+    
+// Conversion from Montgomery representation to standard representation
+void from_mont(const digit_t* ma, digit_t* c);
+    
+// Conversion of a GF(p^2) element to Montgomery representation
+void to_fp2mont(const f2elm_t a, f2elm_t mc);
+
+// Conversion of a GF(p^2) element from Montgomery representation to standard representation
+void from_fp2mont(const f2elm_t ma, f2elm_t c);
+
+// n-way Montgomery inversion
+void mont_n_way_inv(const f2elm_t* vec, const int n, f2elm_t* out);
+
+/************ Elliptic curve and isogeny functions *************/
+
+// Computes the j-invariant of a Montgomery curve with projective constant.
+void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv);
+
+// Simultaneous doubling and differential addition.
+void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t XPQ, const f2elm_t ZPQ, const f2elm_t A24);
+
+// Doubling of a Montgomery point in projective coordinates (X:Z).
+void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24);
+
+// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
+void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, const int e);
+
+// Differential addition.
+void xADD(point_proj_t P, const point_proj_t Q, const f2elm_t xPQ);
+
+// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
+void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
+
+// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
+void eval_4_isog(point_proj_t P, f2elm_t* coeff);
+
+// Tripling of a Montgomery point in projective coordinates (X:Z).
+void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus);
+
+// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
+void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, const int e);
+
+// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
+void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff);
+
+// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
+void eval_3_isog(point_proj_t Q, const f2elm_t* coeff);
+
+// 3-way simultaneous inversion
+void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3);
+
+// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
+
+// Isogeny tree traversal
+void TraverseTree(f2elm_t jinv, point_proj_t R, f2elm_t A24plus, f2elm_t C24, const unsigned int *strat, unsigned int lenstrat, bool keygen, point_proj_t PhiP, point_proj_t PhiQ, point_proj_t PhiR);
+
+
+#endif
diff --git a/SIKE_sw/src/random/random.c b/SIKE_sw/src/random/random.c
new file mode 100644
index 0000000..7f445b8
--- /dev/null
+++ b/SIKE_sw/src/random/random.c
@@ -0,0 +1,61 @@
+/********************************************************************************************
+* Hardware-based random number generation function
+*
+* It uses /dev/urandom in Linux and CNG's BCryptGenRandom function in Windows
+*********************************************************************************************/ 
+
+#include "random.h"
+#include <stdlib.h>
+#if defined(__WINDOWS__)
+    #include <windows.h>
+    #include <bcrypt.h>
+#elif defined(__LINUX__)
+    #include <unistd.h>
+    #include <fcntl.h>
+    static int lock = -1;
+#endif
+
+#define passed 0 
+#define failed 1
+
+
+static __inline void delay(unsigned int count)
+{
+    while (count--) {}
+}
+
+
+int randombytes(unsigned char* random_array, unsigned long long nbytes)
+{ // Generation of "nbytes" of random values
+    
+#if defined(__WINDOWS__)   
+    if (!BCRYPT_SUCCESS(BCryptGenRandom(NULL, random_array, (unsigned long)nbytes, BCRYPT_USE_SYSTEM_PREFERRED_RNG))) {
+        return failed;
+    }
+
+#elif defined(__LINUX__)
+    int r, n = (int)nbytes, count = 0;
+    
+    if (lock == -1) {
+        do {
+            lock = open("/dev/urandom", O_RDONLY);
+            if (lock == -1) {
+                delay(0xFFFFF);
+            }
+        } while (lock == -1);
+    }
+
+    while (n > 0) {
+        do {
+            r = read(lock, random_array+count, n);
+            if (r == -1) {
+                delay(0xFFFF);
+            }
+        } while (r == -1);
+        count += r;
+        n -= r;
+    }
+#endif
+
+    return passed;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/random/random.h b/SIKE_sw/src/random/random.h
new file mode 100644
index 0000000..8eb1118
--- /dev/null
+++ b/SIKE_sw/src/random/random.h
@@ -0,0 +1,9 @@
+#ifndef RANDOM_H
+#define RANDOM_H
+
+
+// Generate random bytes and output the result to random_array
+int randombytes(unsigned char* random_array, unsigned long long nbytes);
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/src/sha3/fips202.c b/SIKE_sw/src/sha3/fips202.c
new file mode 100644
index 0000000..8733a9e
--- /dev/null
+++ b/SIKE_sw/src/sha3/fips202.c
@@ -0,0 +1,573 @@
+/********************************************************************************************
+* SHA3-derived functions: SHAKE and cSHAKE
+*
+* Based on the public domain implementation in crypto_hash/keccakc512/simple/ 
+* from http://bench.cr.yp.to/supercop.html by Ronny Van Keer 
+* and the public domain "TweetFips202" implementation from https://twitter.com/tweetfips202 
+* by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe
+*
+* See NIST Special Publication 800-185 for more information:
+* http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-185.pdf
+*
+*********************************************************************************************/  
+
+#include <stdint.h>
+#include <assert.h>
+#include "fips202.h"
+
+#define NROUNDS 24
+#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))
+
+
+static uint64_t load64(const unsigned char *x)
+{
+  unsigned long long r = 0, i;
+
+  for (i = 0; i < 8; ++i) {
+    r |= (unsigned long long)x[i] << 8 * i;
+  }
+  return r;
+}
+
+
+static void store64(uint8_t *x, uint64_t u)
+{
+  unsigned int i;
+
+  for (i = 0; i < 8; ++i) {
+    x[i] = (uint8_t)u;
+    u >>= 8;
+  }
+}
+
+
+static const uint64_t KeccakF_RoundConstants[NROUNDS] = 
+{
+    (uint64_t)0x0000000000000001ULL,
+    (uint64_t)0x0000000000008082ULL,
+    (uint64_t)0x800000000000808aULL,
+    (uint64_t)0x8000000080008000ULL,
+    (uint64_t)0x000000000000808bULL,
+    (uint64_t)0x0000000080000001ULL,
+    (uint64_t)0x8000000080008081ULL,
+    (uint64_t)0x8000000000008009ULL,
+    (uint64_t)0x000000000000008aULL,
+    (uint64_t)0x0000000000000088ULL,
+    (uint64_t)0x0000000080008009ULL,
+    (uint64_t)0x000000008000000aULL,
+    (uint64_t)0x000000008000808bULL,
+    (uint64_t)0x800000000000008bULL,
+    (uint64_t)0x8000000000008089ULL,
+    (uint64_t)0x8000000000008003ULL,
+    (uint64_t)0x8000000000008002ULL,
+    (uint64_t)0x8000000000000080ULL,
+    (uint64_t)0x000000000000800aULL,
+    (uint64_t)0x800000008000000aULL,
+    (uint64_t)0x8000000080008081ULL,
+    (uint64_t)0x8000000000008080ULL,
+    (uint64_t)0x0000000080000001ULL,
+    (uint64_t)0x8000000080008008ULL
+};
+
+
+void KeccakF1600_StatePermute(uint64_t * state)
+{
+  int round;
+
+        uint64_t Aba, Abe, Abi, Abo, Abu;
+        uint64_t Aga, Age, Agi, Ago, Agu;
+        uint64_t Aka, Ake, Aki, Ako, Aku;
+        uint64_t Ama, Ame, Ami, Amo, Amu;
+        uint64_t Asa, Ase, Asi, Aso, Asu;
+        uint64_t BCa, BCe, BCi, BCo, BCu;
+        uint64_t Da, De, Di, Do, Du;
+        uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
+        uint64_t Ega, Ege, Egi, Ego, Egu;
+        uint64_t Eka, Eke, Eki, Eko, Eku;
+        uint64_t Ema, Eme, Emi, Emo, Emu;
+        uint64_t Esa, Ese, Esi, Eso, Esu;
+
+        //copyFromState(A, state)
+        Aba = state[ 0];
+        Abe = state[ 1];
+        Abi = state[ 2];
+        Abo = state[ 3];
+        Abu = state[ 4];
+        Aga = state[ 5];
+        Age = state[ 6];
+        Agi = state[ 7];
+        Ago = state[ 8];
+        Agu = state[ 9];
+        Aka = state[10];
+        Ake = state[11];
+        Aki = state[12];
+        Ako = state[13];
+        Aku = state[14];
+        Ama = state[15];
+        Ame = state[16];
+        Ami = state[17];
+        Amo = state[18];
+        Amu = state[19];
+        Asa = state[20];
+        Ase = state[21];
+        Asi = state[22];
+        Aso = state[23];
+        Asu = state[24];
+
+        for( round = 0; round < NROUNDS; round += 2 )
+        {
+            //    prepareTheta
+            BCa = Aba^Aga^Aka^Ama^Asa;
+            BCe = Abe^Age^Ake^Ame^Ase;
+            BCi = Abi^Agi^Aki^Ami^Asi;
+            BCo = Abo^Ago^Ako^Amo^Aso;
+            BCu = Abu^Agu^Aku^Amu^Asu;
+
+            //thetaRhoPiChiIotaPrepareTheta(round  , A, E)
+            Da = BCu^ROL(BCe, 1);
+            De = BCa^ROL(BCi, 1);
+            Di = BCe^ROL(BCo, 1);
+            Do = BCi^ROL(BCu, 1);
+            Du = BCo^ROL(BCa, 1);
+
+            Aba ^= Da;
+            BCa = Aba;
+            Age ^= De;
+            BCe = ROL(Age, 44);
+            Aki ^= Di;
+            BCi = ROL(Aki, 43);
+            Amo ^= Do;
+            BCo = ROL(Amo, 21);
+            Asu ^= Du;
+            BCu = ROL(Asu, 14);
+            Eba =   BCa ^((~BCe)&  BCi );
+            Eba ^= (uint64_t)KeccakF_RoundConstants[round];
+            Ebe =   BCe ^((~BCi)&  BCo );
+            Ebi =   BCi ^((~BCo)&  BCu );
+            Ebo =   BCo ^((~BCu)&  BCa );
+            Ebu =   BCu ^((~BCa)&  BCe );
+
+            Abo ^= Do;
+            BCa = ROL(Abo, 28);
+            Agu ^= Du;
+            BCe = ROL(Agu, 20);
+            Aka ^= Da;
+            BCi = ROL(Aka,  3);
+            Ame ^= De;
+            BCo = ROL(Ame, 45);
+            Asi ^= Di;
+            BCu = ROL(Asi, 61);
+            Ega =   BCa ^((~BCe)&  BCi );
+            Ege =   BCe ^((~BCi)&  BCo );
+            Egi =   BCi ^((~BCo)&  BCu );
+            Ego =   BCo ^((~BCu)&  BCa );
+            Egu =   BCu ^((~BCa)&  BCe );
+
+            Abe ^= De;
+            BCa = ROL(Abe,  1);
+            Agi ^= Di;
+            BCe = ROL(Agi,  6);
+            Ako ^= Do;
+            BCi = ROL(Ako, 25);
+            Amu ^= Du;
+            BCo = ROL(Amu,  8);
+            Asa ^= Da;
+            BCu = ROL(Asa, 18);
+            Eka =   BCa ^((~BCe)&  BCi );
+            Eke =   BCe ^((~BCi)&  BCo );
+            Eki =   BCi ^((~BCo)&  BCu );
+            Eko =   BCo ^((~BCu)&  BCa );
+            Eku =   BCu ^((~BCa)&  BCe );
+
+            Abu ^= Du;
+            BCa = ROL(Abu, 27);
+            Aga ^= Da;
+            BCe = ROL(Aga, 36);
+            Ake ^= De;
+            BCi = ROL(Ake, 10);
+            Ami ^= Di;
+            BCo = ROL(Ami, 15);
+            Aso ^= Do;
+            BCu = ROL(Aso, 56);
+            Ema =   BCa ^((~BCe)&  BCi );
+            Eme =   BCe ^((~BCi)&  BCo );
+            Emi =   BCi ^((~BCo)&  BCu );
+            Emo =   BCo ^((~BCu)&  BCa );
+            Emu =   BCu ^((~BCa)&  BCe );
+
+            Abi ^= Di;
+            BCa = ROL(Abi, 62);
+            Ago ^= Do;
+            BCe = ROL(Ago, 55);
+            Aku ^= Du;
+            BCi = ROL(Aku, 39);
+            Ama ^= Da;
+            BCo = ROL(Ama, 41);
+            Ase ^= De;
+            BCu = ROL(Ase,  2);
+            Esa =   BCa ^((~BCe)&  BCi );
+            Ese =   BCe ^((~BCi)&  BCo );
+            Esi =   BCi ^((~BCo)&  BCu );
+            Eso =   BCo ^((~BCu)&  BCa );
+            Esu =   BCu ^((~BCa)&  BCe );
+
+            //    prepareTheta
+            BCa = Eba^Ega^Eka^Ema^Esa;
+            BCe = Ebe^Ege^Eke^Eme^Ese;
+            BCi = Ebi^Egi^Eki^Emi^Esi;
+            BCo = Ebo^Ego^Eko^Emo^Eso;
+            BCu = Ebu^Egu^Eku^Emu^Esu;
+
+            //thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
+            Da = BCu^ROL(BCe, 1);
+            De = BCa^ROL(BCi, 1);
+            Di = BCe^ROL(BCo, 1);
+            Do = BCi^ROL(BCu, 1);
+            Du = BCo^ROL(BCa, 1);
+
+            Eba ^= Da;
+            BCa = Eba;
+            Ege ^= De;
+            BCe = ROL(Ege, 44);
+            Eki ^= Di;
+            BCi = ROL(Eki, 43);
+            Emo ^= Do;
+            BCo = ROL(Emo, 21);
+            Esu ^= Du;
+            BCu = ROL(Esu, 14);
+            Aba =   BCa ^((~BCe)&  BCi );
+            Aba ^= (uint64_t)KeccakF_RoundConstants[round+1];
+            Abe =   BCe ^((~BCi)&  BCo );
+            Abi =   BCi ^((~BCo)&  BCu );
+            Abo =   BCo ^((~BCu)&  BCa );
+            Abu =   BCu ^((~BCa)&  BCe );
+
+            Ebo ^= Do;
+            BCa = ROL(Ebo, 28);
+            Egu ^= Du;
+            BCe = ROL(Egu, 20);
+            Eka ^= Da;
+            BCi = ROL(Eka, 3);
+            Eme ^= De;
+            BCo = ROL(Eme, 45);
+            Esi ^= Di;
+            BCu = ROL(Esi, 61);
+            Aga =   BCa ^((~BCe)&  BCi );
+            Age =   BCe ^((~BCi)&  BCo );
+            Agi =   BCi ^((~BCo)&  BCu );
+            Ago =   BCo ^((~BCu)&  BCa );
+            Agu =   BCu ^((~BCa)&  BCe );
+
+            Ebe ^= De;
+            BCa = ROL(Ebe, 1);
+            Egi ^= Di;
+            BCe = ROL(Egi, 6);
+            Eko ^= Do;
+            BCi = ROL(Eko, 25);
+            Emu ^= Du;
+            BCo = ROL(Emu, 8);
+            Esa ^= Da;
+            BCu = ROL(Esa, 18);
+            Aka =   BCa ^((~BCe)&  BCi );
+            Ake =   BCe ^((~BCi)&  BCo );
+            Aki =   BCi ^((~BCo)&  BCu );
+            Ako =   BCo ^((~BCu)&  BCa );
+            Aku =   BCu ^((~BCa)&  BCe );
+
+            Ebu ^= Du;
+            BCa = ROL(Ebu, 27);
+            Ega ^= Da;
+            BCe = ROL(Ega, 36);
+            Eke ^= De;
+            BCi = ROL(Eke, 10);
+            Emi ^= Di;
+            BCo = ROL(Emi, 15);
+            Eso ^= Do;
+            BCu = ROL(Eso, 56);
+            Ama =   BCa ^((~BCe)&  BCi );
+            Ame =   BCe ^((~BCi)&  BCo );
+            Ami =   BCi ^((~BCo)&  BCu );
+            Amo =   BCo ^((~BCu)&  BCa );
+            Amu =   BCu ^((~BCa)&  BCe );
+
+            Ebi ^= Di;
+            BCa = ROL(Ebi, 62);
+            Ego ^= Do;
+            BCe = ROL(Ego, 55);
+            Eku ^= Du;
+            BCi = ROL(Eku, 39);
+            Ema ^= Da;
+            BCo = ROL(Ema, 41);
+            Ese ^= De;
+            BCu = ROL(Ese, 2);
+            Asa =   BCa ^((~BCe)&  BCi );
+            Ase =   BCe ^((~BCi)&  BCo );
+            Asi =   BCi ^((~BCo)&  BCu );
+            Aso =   BCo ^((~BCu)&  BCa );
+            Asu =   BCu ^((~BCa)&  BCe );
+        }
+
+        //copyToState(state, A)
+        state[ 0] = Aba;
+        state[ 1] = Abe;
+        state[ 2] = Abi;
+        state[ 3] = Abo;
+        state[ 4] = Abu;
+        state[ 5] = Aga;
+        state[ 6] = Age;
+        state[ 7] = Agi;
+        state[ 8] = Ago;
+        state[ 9] = Agu;
+        state[10] = Aka;
+        state[11] = Ake;
+        state[12] = Aki;
+        state[13] = Ako;
+        state[14] = Aku;
+        state[15] = Ama;
+        state[16] = Ame;
+        state[17] = Ami;
+        state[18] = Amo;
+        state[19] = Amu;
+        state[20] = Asa;
+        state[21] = Ase;
+        state[22] = Asi;
+        state[23] = Aso;
+        state[24] = Asu;
+
+        #undef    round
+}
+
+#include <string.h>
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+
+static void keccak_absorb(uint64_t *s, unsigned int r, const unsigned char *m, unsigned long long int mlen, unsigned char p)
+{
+  unsigned long long i;
+  unsigned char t[200];
+ 
+  while (mlen >= r) 
+  {
+    for (i = 0; i < r / 8; ++i)
+      s[i] ^= load64(m + 8 * i);
+    
+    KeccakF1600_StatePermute(s);
+    mlen -= r;
+    m += r;
+  }
+
+  for (i = 0; i < r; ++i)
+    t[i] = 0;
+  for (i = 0; i < mlen; ++i)
+    t[i] = m[i];
+  t[i] = p;
+  t[r - 1] |= 128;
+  for (i = 0; i < r / 8; ++i)
+    s[i] ^= load64(t + 8 * i);
+}
+
+
+static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, uint64_t *s, unsigned int r)
+{
+  unsigned int i;
+
+  while(nblocks > 0) 
+  {
+    KeccakF1600_StatePermute(s);
+    for (i = 0; i < (r>>3); i++)
+    {
+      store64(h+8*i, s[i]);
+    }
+    h += r;
+    nblocks--;
+  }
+}
+
+
+/********** SHAKE128 ***********/
+
+void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen)
+{
+	keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F);
+}
+
+
+void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s)
+{
+	keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+}
+
+
+void shake128(unsigned char *output, unsigned long long outlen, const unsigned char *input,  unsigned long long inlen)
+{
+  uint64_t s[25] = {0};
+  unsigned char t[SHAKE128_RATE];
+  unsigned long long nblocks = outlen/SHAKE128_RATE;
+  size_t i;
+  
+  /* Absorb input */
+  keccak_absorb(s, SHAKE128_RATE, input, inlen, 0x1F);
+
+  /* Squeeze output */
+  keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+
+  output += nblocks*SHAKE128_RATE;
+  outlen -= nblocks*SHAKE128_RATE;
+
+  if (outlen) 
+  {
+    keccak_squeezeblocks(t, 1, s, SHAKE128_RATE);
+    for (i = 0; i < outlen; i++)
+      output[i] = t[i];
+  }
+}
+
+
+/********** cSHAKE128 ***********/
+
+void cshake128_simple_absorb(uint64_t s[25], uint16_t cstm, const unsigned char *in, unsigned long long inlen)
+{
+  unsigned char *sep = (unsigned char*)s;
+  unsigned int i;
+
+  for (i = 0; i < 25; i++)
+    s[i] = 0;
+
+  /* Absorb customization (domain-separation) string */
+  sep[0] = 0x01;
+  sep[1] = 0xa8;
+  sep[2] = 0x01;
+  sep[3] = 0x00;
+  sep[4] = 0x01;
+  sep[5] = 16; // fixed bitlen of cstm
+  sep[6] = cstm & 0xff;
+  sep[7] = cstm >> 8;
+
+  KeccakF1600_StatePermute(s);
+
+  /* Absorb input */
+  keccak_absorb(s, SHAKE128_RATE, in, inlen, 0x04);
+}
+
+
+void cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s)
+{
+  keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+}
+
+
+void cshake128_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen)
+{
+  uint64_t s[25];
+  unsigned char t[SHAKE128_RATE];
+  unsigned int i;
+
+  cshake128_simple_absorb(s, cstm, in, inlen);
+
+  /* Squeeze output */
+  keccak_squeezeblocks(output, outlen/SHAKE128_RATE, s, SHAKE128_RATE);
+  output += (outlen/SHAKE128_RATE)*SHAKE128_RATE;
+
+  if (outlen%SHAKE128_RATE)
+  {
+    keccak_squeezeblocks(t, 1, s, SHAKE128_RATE);
+    for (i = 0; i < outlen%SHAKE128_RATE; i++)
+      output[i] = t[i];
+  }
+}
+
+
+/********** SHAKE256 ***********/
+
+void shake256_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen)
+{
+	keccak_absorb(s, SHAKE256_RATE, input, inputByteLen, 0x1F);
+}
+
+
+void shake256_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s)
+{
+	keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE);
+}
+
+
+void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input,  unsigned long long inlen)
+{
+  uint64_t s[25];
+  unsigned char t[SHAKE256_RATE];
+  unsigned long long nblocks = outlen/SHAKE256_RATE;
+  size_t i;
+
+  for (i = 0; i < 25; ++i)
+    s[i] = 0;
+  
+  /* Absorb input */
+  keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F);
+
+  /* Squeeze output */
+  keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE);
+
+  output += nblocks*SHAKE256_RATE;
+  outlen -= nblocks*SHAKE256_RATE;
+
+  if (outlen) 
+  {
+    keccak_squeezeblocks(t, 1, s, SHAKE256_RATE);
+    for (i = 0; i < outlen; i++)
+      output[i] = t[i];
+  }
+}
+
+
+/********** cSHAKE256 ***********/
+
+void cshake256_simple_absorb(uint64_t s[25], uint16_t cstm, const unsigned char *in, unsigned long long inlen)
+{
+  unsigned char *sep = (unsigned char*)s;
+  unsigned int i;
+
+  for (i = 0; i < 25; i++)
+    s[i] = 0;
+
+  /* Absorb customization (domain-separation) string */
+  sep[0] = 0x01;
+  sep[1] = 0x88;
+  sep[2] = 0x01;
+  sep[3] = 0x00;
+  sep[4] = 0x01;
+  sep[5] = 16; // fixed bitlen of cstm
+  sep[6] = cstm & 0xff;
+  sep[7] = cstm >> 8;
+
+  KeccakF1600_StatePermute(s);
+
+  /* Absorb input */
+  keccak_absorb(s, SHAKE256_RATE, in, inlen, 0x04);
+}
+
+
+void cshake256_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s)
+{
+  keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE);
+}
+
+
+void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen)
+{
+  uint64_t s[25];
+  unsigned char t[SHAKE256_RATE];
+  unsigned int i;
+
+  cshake256_simple_absorb(s, cstm, in, inlen);
+
+  /* Squeeze output */
+  keccak_squeezeblocks(output, outlen/SHAKE256_RATE, s, SHAKE256_RATE);
+  output += (outlen/SHAKE256_RATE)*SHAKE256_RATE;
+
+  if(outlen%SHAKE256_RATE)
+  {
+    keccak_squeezeblocks(t, 1, s, SHAKE256_RATE);
+    for (i = 0; i < outlen%SHAKE256_RATE; i++)
+      output[i] = t[i];
+  }
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/sha3/fips202.h b/SIKE_sw/src/sha3/fips202.h
new file mode 100644
index 0000000..55b400a
--- /dev/null
+++ b/SIKE_sw/src/sha3/fips202.h
@@ -0,0 +1,27 @@
+#ifndef FIPS202_H
+#define FIPS202_H
+
+#include <stdint.h>
+
+
+#define SHAKE128_RATE 168
+#define SHAKE256_RATE 136
+
+void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen);
+void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s);
+void shake128(unsigned char *output, unsigned long long outlen, const unsigned char *input,  unsigned long long inlen);
+
+void cshake128_simple_absorb(uint64_t *s, uint16_t cstm, const unsigned char *in, unsigned long long inlen);
+void cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s);
+void cshake128_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen);
+
+void shake256_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen);
+void shake256_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s);
+void shake256(unsigned char *output, unsigned long long outlen, const unsigned char *input,  unsigned long long inlen);
+
+void cshake256_simple_absorb(uint64_t *s, uint16_t cstm, const unsigned char *in, unsigned long long inlen);
+void cshake256_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s);
+void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen);
+
+
+#endif
diff --git a/SIKE_sw/src/sidh.c b/SIKE_sw/src/sidh.c
new file mode 100644
index 0000000..2258cac
--- /dev/null
+++ b/SIKE_sw/src/sidh.c
@@ -0,0 +1,263 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: Ephemeral supersingular isogeny Diffie-Hellman key exchange (SIDH)
+*********************************************************************************************/ 
+
+#include "random/random.h"
+
+
+static void init_basis(digit_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR)
+{ // Initialization of basis points
+    fpcopy(gen,                  XP[0]);
+    fpcopy(gen +   NWORDS_FIELD, XP[1]);
+    fpcopy(gen + 2*NWORDS_FIELD, XQ[0]);
+    fpcopy(gen + 3*NWORDS_FIELD, XQ[1]);
+    fpcopy(gen + 4*NWORDS_FIELD, XR[0]);
+    fpcopy(gen + 5*NWORDS_FIELD, XR[1]);
+}
+
+
+void random_mod_order_A(unsigned char* random_digits)
+{  // Generation of Alice's secret key  
+   // Outputs random value in [0, 2^eA - 1]
+
+    randombytes(random_digits, SECRETKEY_A_BYTES);
+    random_digits[SECRETKEY_A_BYTES-1] &= MASK_ALICE;    // Masking last byte 
+}
+
+
+void random_mod_order_B(unsigned char* random_digits)
+{  // Generation of Bob's secret key  
+   // Outputs random value in [0, 2^Floor(Log(2, oB)) - 1]
+
+    randombytes(random_digits, SECRETKEY_B_BYTES);
+    random_digits[SECRETKEY_B_BYTES-1] &= MASK_BOB;     // Masking last byte 
+}
+
+
+int EphemeralKeyGeneration_A(const unsigned char* PrivateKeyA, unsigned char* PublicKeyA)
+{ // Alice's ephemeral public key generation
+  // Input:  a private key PrivateKeyA in the range [0, 2^eA - 1]. 
+  // Output: the public key PublicKeyA consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes.
+    point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0};
+    f2elm_t XPA, XQA, XRA, A24plus = {0}, C24 = {0}, A = {0}, unused;
+    digit_t SecretKeyA[NWORDS_ORDER] = {0};
+
+    // Initialize basis points
+    init_basis((digit_t*)A_gen, XPA, XQA, XRA);
+    init_basis((digit_t*)B_gen, phiP->X, phiQ->X, phiR->X);
+    fpcopy((digit_t*)&Montgomery_one, (phiP->Z)[0]);
+    fpcopy((digit_t*)&Montgomery_one, (phiQ->Z)[0]);
+    fpcopy((digit_t*)&Montgomery_one, (phiR->Z)[0]);
+
+    // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1
+    fpcopy((digit_t*)&Montgomery_one, A24plus[0]);
+    fp2add(A24plus, A24plus, A24plus);
+    fp2add(A24plus, A24plus, C24);
+    fp2add(A24plus, C24, A);
+    fp2add(C24, C24, A24plus);
+
+    // Retrieve kernel point
+    decode_to_digits(PrivateKeyA, SecretKeyA, SECRETKEY_A_BYTES, NWORDS_ORDER);
+    LADDER3PT(XPA, XQA, XRA, SecretKeyA, ALICE, R, A);         
+
+#if (OALICE_BITS % 2 == 1)
+    point_proj_t S;
+
+    xDBLe(R, S, A24plus, C24, (int)(OALICE_BITS-1));
+    get_2_isog(S, A24plus, C24); 
+    eval_2_isog(phiP, S); 
+    eval_2_isog(phiQ, S); 
+    eval_2_isog(phiR, S);
+    eval_2_isog(R, S);
+#endif
+
+    // Traverse tree
+    TraverseTree(unused, R, A24plus, C24, strat_Alice, (unsigned int)MAX_Alice, true, phiP, phiQ, phiR);
+    // Format public key                   
+    fp2_encode(phiP->X, PublicKeyA);
+    fp2_encode(phiQ->X, PublicKeyA + FP2_ENCODED_BYTES);
+    fp2_encode(phiR->X, PublicKeyA + 2*FP2_ENCODED_BYTES);
+
+    return 0;
+}
+
+
+int EphemeralKeyGeneration_B(const unsigned char* PrivateKeyB, unsigned char* PublicKeyB)
+{ // Bob's ephemeral public key generation
+  // Input:  a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. 
+  // Output: the public key PublicKeyB consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes.
+    point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_BOB];
+    f2elm_t XPB, XQB, XRB, coeff[3], A24plus = {0}, A24minus = {0}, A = {0};
+    unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+    digit_t SecretKeyB[NWORDS_ORDER] = {0};
+
+    // Initialize basis points
+    init_basis((digit_t*)B_gen, XPB, XQB, XRB);
+    init_basis((digit_t*)A_gen, phiP->X, phiQ->X, phiR->X);
+    fpcopy((digit_t*)&Montgomery_one, (phiP->Z)[0]);
+    fpcopy((digit_t*)&Montgomery_one, (phiQ->Z)[0]);
+    fpcopy((digit_t*)&Montgomery_one, (phiR->Z)[0]);
+
+    // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
+    fpcopy((digit_t*)&Montgomery_one, A24plus[0]);
+    fp2add(A24plus, A24plus, A24plus);
+    fp2add(A24plus, A24plus, A24minus);
+    fp2add(A24plus, A24minus, A);
+    fp2add(A24minus, A24minus, A24plus);
+
+    // Retrieve kernel point
+    decode_to_digits(PrivateKeyB, SecretKeyB, SECRETKEY_B_BYTES, NWORDS_ORDER);
+    LADDER3PT(XPB, XQB, XRB, SecretKeyB, BOB, R, A);
+    
+    // Traverse tree
+    index = 0;  
+    for (row = 1; row < MAX_Bob; row++) {
+        while (index < MAX_Bob-row) {
+            fp2copy(R->X, pts[npts]->X);
+            fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = strat_Bob[ii++];
+            xTPLe(R, R, A24minus, A24plus, (int)m);
+            index += m;
+        } 
+        get_3_isog(R, A24minus, A24plus, coeff);
+
+        for (i = 0; i < npts; i++) {
+            eval_3_isog(pts[i], coeff);
+        }     
+        eval_3_isog(phiP, coeff);
+        eval_3_isog(phiQ, coeff);
+        eval_3_isog(phiR, coeff);
+
+        fp2copy(pts[npts-1]->X, R->X); 
+        fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+    
+    get_3_isog(R, A24minus, A24plus, coeff);
+    eval_3_isog(phiP, coeff);
+    eval_3_isog(phiQ, coeff);
+    eval_3_isog(phiR, coeff);
+
+    inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+    fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+    fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+    fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+
+    // Format public key
+    fp2_encode(phiP->X, PublicKeyB);
+    fp2_encode(phiQ->X, PublicKeyB + FP2_ENCODED_BYTES);
+    fp2_encode(phiR->X, PublicKeyB + 2*FP2_ENCODED_BYTES);
+
+    return 0;
+}
+
+
+int EphemeralSecretAgreement_A(const unsigned char* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA)
+{ // Alice's ephemeral shared secret computation
+  // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+  // Inputs: Alice's PrivateKeyA is an integer in the range [0, oA-1]. 
+  //         Bob's PublicKeyB consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes.
+  // Output: a shared secret SharedSecretA that consists of one element in GF(p^2) encoded by removing leading 0 bytes.
+    point_proj_t R, unused1, unused2, unused3;
+    f2elm_t PKB[3], jinv, A24plus = {0}, C24 = {0}, A = {0};
+    digit_t SecretKeyA[NWORDS_ORDER] = {0};
+      
+    // Initialize images of Bob's basis
+    fp2_decode(PublicKeyB, PKB[0]);
+    fp2_decode(PublicKeyB + FP2_ENCODED_BYTES, PKB[1]);
+    fp2_decode(PublicKeyB + 2*FP2_ENCODED_BYTES, PKB[2]);
+
+    // Initialize constants: A24plus = A+2C, C24 = 4C, where C=1
+    get_A(PKB[0], PKB[1], PKB[2], A);
+    fpadd((digit_t*)&Montgomery_one, (digit_t*)&Montgomery_one, C24[0]);
+    fp2add(A, C24, A24plus);
+    fpadd(C24[0], C24[0], C24[0]);
+
+    // Retrieve kernel point
+    decode_to_digits(PrivateKeyA, SecretKeyA, SECRETKEY_A_BYTES, NWORDS_ORDER);
+    LADDER3PT(PKB[0], PKB[1], PKB[2], SecretKeyA, ALICE, R, A);       
+
+#if (OALICE_BITS % 2 == 1)
+    point_proj_t S;
+
+    xDBLe(R, S, A24plus, C24, (int)(OALICE_BITS-1));
+    get_2_isog(S, A24plus, C24);
+    eval_2_isog(R, S);
+#endif
+
+    // Traverse tree 
+    TraverseTree(jinv, R, A24plus, C24, strat_Alice, (unsigned int)MAX_Alice, false, unused1, unused2, unused3);
+    // Format shared secret
+    fp2_encode(jinv, SharedSecretA);    
+
+    return 0;
+}
+
+
+int EphemeralSecretAgreement_B(const unsigned char* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB)
+{ // Bob's ephemeral shared secret computation
+  // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+  // Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,oB)) - 1]. 
+  //         Alice's PublicKeyA consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes.
+  // Output: a shared secret SharedSecretB that consists of one element in GF(p^2) encoded by removing leading 0 bytes.  
+    point_proj_t R, pts[MAX_INT_POINTS_BOB];
+    f2elm_t coeff[3], PKB[3], jinv;
+    f2elm_t A24plus = {0}, A24minus = {0}, A = {0};
+    unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+    digit_t SecretKeyB[NWORDS_ORDER] = {0};
+      
+    // Initialize images of Alice's basis
+    fp2_decode(PublicKeyA, PKB[0]);
+    fp2_decode(PublicKeyA + FP2_ENCODED_BYTES, PKB[1]);
+    fp2_decode(PublicKeyA + 2*FP2_ENCODED_BYTES, PKB[2]);
+
+    // Initialize constants: A24plus = A+2C, A24minus = A-2C, where C=1
+    get_A(PKB[0], PKB[1], PKB[2], A);
+    fpadd((digit_t*)&Montgomery_one, (digit_t*)&Montgomery_one, A24minus[0]);
+    fp2add(A, A24minus, A24plus);
+    fp2sub(A, A24minus, A24minus);
+
+    // Retrieve kernel point
+    decode_to_digits(PrivateKeyB, SecretKeyB, SECRETKEY_B_BYTES, NWORDS_ORDER);
+    LADDER3PT(PKB[0], PKB[1], PKB[2], SecretKeyB, BOB, R, A);
+    
+    // Traverse tree
+    index = 0;  
+    for (row = 1; row < MAX_Bob; row++) {
+        while (index < MAX_Bob-row) {
+            fp2copy(R->X, pts[npts]->X);
+            fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = strat_Bob[ii++];
+            xTPLe(R, R, A24minus, A24plus, (int)m);
+            index += m;
+        }
+        get_3_isog(R, A24minus, A24plus, coeff);
+
+        for (i = 0; i < npts; i++) {
+            eval_3_isog(pts[i], coeff);
+        } 
+
+        fp2copy(pts[npts-1]->X, R->X); 
+        fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+     
+    get_3_isog(R, A24minus, A24plus, coeff);    
+    fp2add(A24plus, A24minus, A);                 
+    fp2add(A, A, A);
+    fp2sub(A24plus, A24minus, A24plus);                   
+    j_inv(A, A24plus, jinv);
+    fp2_encode(jinv, SharedSecretB);    // Format shared secret
+
+    return 0;
+}
\ No newline at end of file
diff --git a/SIKE_sw/src/sike.c b/SIKE_sw/src/sike.c
new file mode 100644
index 0000000..e99aa74
--- /dev/null
+++ b/SIKE_sw/src/sike.c
@@ -0,0 +1,98 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
+*********************************************************************************************/ 
+
+#include <string.h>
+#include "sha3/fips202.h"
+
+
+int crypto_kem_keypair(unsigned char *pk, unsigned char *sk)
+{ // SIKE's key generation
+  // Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes)
+  //          public key pk (CRYPTO_PUBLICKEYBYTES bytes) 
+
+    // Generate lower portion of secret key sk <- s||SK
+    randombytes(sk, MSG_BYTES);
+    random_mod_order_B(sk + MSG_BYTES);
+
+    // Generate public key pk
+    EphemeralKeyGeneration_B(sk + MSG_BYTES, pk);
+
+    // Append public key pk to secret key sk
+    memcpy(&sk[MSG_BYTES + SECRETKEY_B_BYTES], pk, CRYPTO_PUBLICKEYBYTES);
+
+    return 0;
+}
+
+
+int crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk)
+{ // SIKE's encapsulation
+  // Input:   public key pk         (CRYPTO_PUBLICKEYBYTES bytes)
+  // Outputs: shared secret ss      (CRYPTO_BYTES bytes)
+  //          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes)
+    unsigned char ephemeralsk[SECRETKEY_A_BYTES];
+    unsigned char jinvariant[FP2_ENCODED_BYTES];
+    unsigned char h[MSG_BYTES];
+    unsigned char temp[CRYPTO_CIPHERTEXTBYTES+MSG_BYTES];
+
+    // Generate ephemeralsk <- G(m||pk) mod oA 
+    randombytes(temp, MSG_BYTES);
+    memcpy(&temp[MSG_BYTES], pk, CRYPTO_PUBLICKEYBYTES);
+    shake256(ephemeralsk, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES+MSG_BYTES);
+    ephemeralsk[SECRETKEY_A_BYTES - 1] &= MASK_ALICE;
+
+    // Encrypt
+    EphemeralKeyGeneration_A(ephemeralsk, ct);
+    EphemeralSecretAgreement_A(ephemeralsk, pk, jinvariant);
+    shake256(h, MSG_BYTES, jinvariant, FP2_ENCODED_BYTES);
+    for (int i = 0; i < MSG_BYTES; i++) {
+        ct[i + CRYPTO_PUBLICKEYBYTES] = temp[i] ^ h[i];
+    }
+
+    // Generate shared secret ss <- H(m||ct)
+    memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES);
+    shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES+MSG_BYTES);
+
+    return 0;
+}
+
+
+int crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk)
+{ // SIKE's decapsulation
+  // Input:   secret key sk         (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes)
+  //          ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes) 
+  // Outputs: shared secret ss      (CRYPTO_BYTES bytes)
+    unsigned char ephemeralsk_[SECRETKEY_A_BYTES];
+    unsigned char jinvariant_[FP2_ENCODED_BYTES];
+    unsigned char h_[MSG_BYTES];
+    unsigned char c0_[CRYPTO_PUBLICKEYBYTES];
+    unsigned char temp[CRYPTO_CIPHERTEXTBYTES+MSG_BYTES];
+
+    // Decrypt
+    EphemeralSecretAgreement_B(sk + MSG_BYTES, ct, jinvariant_);
+    shake256(h_, MSG_BYTES, jinvariant_, FP2_ENCODED_BYTES);
+    for (int i = 0; i < MSG_BYTES; i++) {
+        temp[i] = ct[i + CRYPTO_PUBLICKEYBYTES] ^ h_[i];
+    }
+
+    // Generate ephemeralsk_ <- G(m||pk) mod oA
+    memcpy(&temp[MSG_BYTES], &sk[MSG_BYTES + SECRETKEY_B_BYTES], CRYPTO_PUBLICKEYBYTES);
+    shake256(ephemeralsk_, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES+MSG_BYTES);
+    ephemeralsk_[SECRETKEY_A_BYTES - 1] &= MASK_ALICE;
+    
+    // Generate shared secret ss <- H(m||ct), or output ss <- H(s||ct) in case of ct verification failure
+    EphemeralKeyGeneration_A(ephemeralsk_, c0_);
+    // If selector = 0 then do ss = H(m||ct), else if selector = -1 load s to do ss = H(s||ct)
+    int8_t selector = ct_compare(c0_, ct, CRYPTO_PUBLICKEYBYTES);
+    ct_cmov(temp, sk, MSG_BYTES, selector);
+    memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES);
+    shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES+MSG_BYTES);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/SIKE_sw/tests/arith_tests-p377.c b/SIKE_sw/tests/arith_tests-p377.c
new file mode 100644
index 0000000..3ad7052
--- /dev/null
+++ b/SIKE_sw/tests/arith_tests-p377.c
@@ -0,0 +1,616 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions
+*********************************************************************************************/
+
+#include "../src/config.h"
+#include "../src/P377/P377_internal.h"
+#include "../src/internal.h"
+#include "test_extras.h"
+#include <stdio.h>
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS           100       // Number of iterations per bench
+    #define SMALL_BENCH_LOOPS     100       // Number of iterations per bench
+    #define TEST_LOOPS             10       // Number of iterations per test
+#else
+    #define BENCH_LOOPS        100000 
+    #define SMALL_BENCH_LOOPS   10000       
+    #define TEST_LOOPS            100   
+#endif
+
+
+bool fp_test()
+{ // Tests for the field arithmetic
+    bool OK = true;
+    int n, passed;
+    felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing field arithmetic over GF(p377): \n\n"); 
+
+    // Field addition over the prime p377
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom377_test(a); fprandom377_test(b); fprandom377_test(c); fprandom377_test(d); fprandom377_test(e); fprandom377_test(f); 
+
+        fpadd377(a, b, d); fpadd377(d, c, e);                 // e = (a+b)+c
+        fpadd377(b, c, d); fpadd377(d, a, f);                 // f = a+(b+c)
+        fpcorrection377(e);
+        fpcorrection377(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpadd377(a, b, d);                                     // d = a+b 
+        fpadd377(b, a, e);                                     // e = b+a
+        fpcorrection377(d);
+        fpcorrection377(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero377(b);
+        fpadd377(a, b, d);                                     // d = a+0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero377(b);
+        fpcopy377(a, d);     
+        fpneg377(d);                      
+        fpadd377(a, d, e);                                     // e = a+(-a)
+        fpcorrection377(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) addition tests ............................................ PASSED");
+    else { printf("  GF(p) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field subtraction over the prime p377
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom377_test(a); fprandom377_test(b); fprandom377_test(c); fprandom377_test(d); fprandom377_test(e); fprandom377_test(f); 
+
+        fpsub377(a, b, d); fpsub377(d, c, e);                 // e = (a-b)-c
+        fpadd377(b, c, d); fpsub377(a, d, f);                 // f = a-(b+c)
+        fpcorrection377(e);
+        fpcorrection377(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpsub377(a, b, d);                                     // d = a-b 
+        fpsub377(b, a, e);                                         
+        fpneg377(e);                                           // e = -(b-a)
+        fpcorrection377(d);
+        fpcorrection377(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero377(b);
+        fpsub377(a, b, d);                                     // d = a-0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero377(b);
+        fpcopy377(a, d);                 
+        fpsub377(a, d, e);                                     // e = a+(-a)
+        fpcorrection377(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) subtraction tests ......................................... PASSED");
+    else { printf("  GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field multiplication over the prime p377
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fprandom377_test(a); fprandom377_test(b); fprandom377_test(c);  
+        fprandom377_test(ma); fprandom377_test(mb); fprandom377_test(mc); fprandom377_test(md); fprandom377_test(me); fprandom377_test(mf); 
+
+        to_mont(a, ma);
+        fpcopy377(ma, mc);
+        from_mont(mc, c);
+        if (compare_words(a, c, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpmul377_mont(ma, mb, md); fpmul377_mont(md, mc, me);                          // e = (a*b)*c
+        fpmul377_mont(mb, mc, md); fpmul377_mont(md, ma, mf);                          // f = a*(b*c)
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpadd377(mb, mc, md); fpmul377_mont(ma, md, me);                               // e = a*(b+c)
+        fpmul377_mont(ma, mb, md); fpmul377_mont(ma, mc, mf); fpadd377(md, mf, mf);    // f = a*b+a*c
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_mont(a, ma); to_mont(b, mb);
+        fpmul377_mont(ma, mb, md);                                                      // d = a*b 
+        fpmul377_mont(mb, ma, me);                                                      // e = b*a 
+        from_mont(md, d);
+        from_mont(me, e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma);
+        fpzero377(b); b[0] = 1; to_mont(b, mb);
+        fpmul377_mont(ma, mb, md);                                                      // d = a*1  
+        from_mont(ma, a);
+        from_mont(md, d);                
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero377(b); to_mont(b, mb);
+        fpmul377_mont(ma, mb, md);                                                      // d = a*0  
+        from_mont(mb, b);
+        from_mont(md, d);                
+        if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p) multiplication tests ...................................... PASSED");
+    else { printf("  GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field squaring over the prime p377
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom377_test(a);
+        
+        to_mont(a, ma);
+        fpsqr377_mont(ma, mb);                                 // b = a^2
+        fpmul377_mont(ma, ma, mc);                             // c = a*a 
+        if (compare_words(mb, mc, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero377(a); to_mont(a, ma);
+        fpsqr377_mont(ma, md);                                 // d = 0^2 
+        if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) squaring tests............................................. PASSED");
+    else { printf("  GF(p) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field inversion over the prime p377
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom377_test(a); 
+        to_mont(a, ma);
+        fpzero377(d); d[0]=1; to_mont(d, md);
+        fpcopy377(ma, mb);                            
+        fpinv377_mont(ma);                                
+        fpmul377_mont(ma, mb, mc);                             // c = a*a^-1 
+        if (compare_words(mc, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) inversion tests............................................ PASSED");
+    else { printf("  GF(p) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_test()
+{ // Tests for the quadratic extension field arithmetic
+    bool OK = true;
+    int n, passed;
+    f2elm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing quadratic extension arithmetic over GF(p377^2): \n\n"); 
+
+    // Addition over GF(p377^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)a); fp2random377_test((digit_t*)b); fp2random377_test((digit_t*)c); fp2random377_test((digit_t*)d); fp2random377_test((digit_t*)e); fp2random377_test((digit_t*)f); 
+
+        fp2add377(a, b, d); fp2add377(d, c, e);                 // e = (a+b)+c
+        fp2add377(b, c, d); fp2add377(d, a, f);                 // f = a+(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2add377(a, b, d);                                     // d = a+b 
+        fp2add377(b, a, e);                                     // e = b+a
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero377(b);
+        fp2add377(a, b, d);                                     // d = a+0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero377(b);
+        fp2copy377(a, d);     
+        fp2neg377(d);                      
+        fp2add377(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) addition tests .......................................... PASSED");
+    else { printf("  GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Subtraction over GF(p377^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)a); fp2random377_test((digit_t*)b); fp2random377_test((digit_t*)c); fp2random377_test((digit_t*)d); fp2random377_test((digit_t*)e); fp2random377_test((digit_t*)f); 
+
+        fp2sub377(a, b, d); fp2sub377(d, c, e);                 // e = (a-b)-c
+        fp2add377(b, c, d); fp2sub377(a, d, f);                 // f = a-(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2sub377(a, b, d);                                     // d = a-b 
+        fp2sub377(b, a, e);                                         
+        fp2neg377(e);                                           // e = -(b-a)
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero377(b);
+        fp2sub377(a, b, d);                                     // d = a-0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero377(b);
+        fp2copy377(a, d);                 
+        fp2sub377(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) subtraction tests ....................................... PASSED");
+    else { printf("  GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Multiplication over GF(p377^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fp2random377_test((digit_t*)a); fp2random377_test((digit_t*)b); fp2random377_test((digit_t*)c);  
+        fp2random377_test((digit_t*)ma); fp2random377_test((digit_t*)mb); fp2random377_test((digit_t*)mc); fp2random377_test((digit_t*)md); fp2random377_test((digit_t*)me); fp2random377_test((digit_t*)mf); 
+
+        to_fp2mont(a, ma);
+        fp2copy377(ma, mc);
+        from_fp2mont(mc, c);
+        if (compare_words((digit_t*)a, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2mul377_mont(ma, mb, md); fp2mul377_mont(md, mc, me);                          // e = (a*b)*c
+        fp2mul377_mont(mb, mc, md); fp2mul377_mont(md, ma, mf);                          // f = a*(b*c)
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2add377(mb, mc, md); fp2mul377_mont(ma, md, me);                               // e = a*(b+c)
+        fp2mul377_mont(ma, mb, md); fp2mul377_mont(ma, mc, mf); fp2add377(md, mf, mf);   // f = a*b+a*c
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_fp2mont(a, ma); to_fp2mont(b, mb);
+        fp2mul377_mont(ma, mb, md);                                                      // d = a*b 
+        fp2mul377_mont(mb, ma, me);                                                      // e = b*a 
+        from_fp2mont(md, d);
+        from_fp2mont(me, e);
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma);
+        fp2zero377(b); b[0][0] = 1; to_fp2mont(b, mb);
+        fp2mul377_mont(ma, mb, md);                                                      // d = a*1  
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero377(b); to_fp2mont(b, mb);
+        fp2mul377_mont(ma, mb, md);                                                      // d = a*0 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)b, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p^2) multiplication tests .................................... PASSED");
+    else { printf("  GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Squaring over GF(p377^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)a);
+        
+        to_fp2mont(a, ma);
+        fp2sqr377_mont(ma, mb);                                 // b = a^2
+        fp2mul377_mont(ma, ma, mc);                             // c = a*a 
+        from_fp2mont(mb, b);               
+        from_fp2mont(mc, c);               
+        if (compare_words((digit_t*)b, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero377(a); to_fp2mont(a, ma);
+        fp2sqr377_mont(ma, md);                                 // d = 0^2 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) squaring tests........................................... PASSED");
+    else { printf("  GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Inversion over GF(p377^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)a);    
+        
+        to_fp2mont(a, ma);
+        fp2zero377(d); d[0][0]=1; to_fp2mont(d, md);
+        fp2copy377(ma, mb);                            
+        fp2inv377_mont(ma);                                
+        fp2mul377_mont(ma, mb, mc);                             // c = a*a^-1              
+        from_fp2mont(mc, c);  
+        if (compare_words((digit_t*)c, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) inversion tests.......................................... PASSED");
+    else { printf("  GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    felm_t a, b, c;
+    dfelm_t aa;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking field arithmetic over GF(p377): \n\n"); 
+        
+    fprandom377_test(a); fprandom377_test(b); fprandom377_test(c);
+
+    // GF(p) addition using p377
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpadd377(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) addition runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) subtraction using p377
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpsub377(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) subtraction runs in ....................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) multiplication using p377
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpmul377_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) multiplication runs in .................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) reduction using p377
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        mp_mul(a, b, aa, NWORDS_FIELD);
+
+        cycles1 = cpucycles(); 
+        rdc_mont(aa, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) reduction runs in ......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpinv377_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) inversion (exponentiation) runs in ........................ %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n"); 
+    
+    return OK;
+}
+
+
+bool fp2_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t a, b, c;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking quadratic extension arithmetic over GF(p377^2): \n\n"); 
+    
+    fp2random377_test((digit_t*)a); fp2random377_test((digit_t*)b); fp2random377_test((digit_t*)c);
+
+    // GF(p^2) addition
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2add377(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) addition runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) subtraction
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sub377(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) subtraction runs in ..................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) multiplication
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2mul377_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) multiplication runs in .................................. %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) squaring
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sqr377_mont(a, b);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) squaring runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2inv377_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) inversion (exponentiation) runs in ...................... %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool ecisog_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t A24, C24, A4, A, C, coeff[5];
+    point_proj_t P, Q;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking elliptic curve and isogeny functions: \n\n"); 
+
+    // Point doubling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)A24); fp2random377_test((digit_t*)C24);
+
+        cycles1 = cpucycles(); 
+        xDBL(P, Q, A24, C24);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point doubling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)A); fp2random377_test((digit_t*)coeff[0]); fp2random377_test((digit_t*)coeff[1]); fp2random377_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        get_4_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)A); fp2random377_test((digit_t*)coeff[0]); fp2random377_test((digit_t*)coeff[1]); fp2random377_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        eval_4_isog(P, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // Point tripling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)A4); fp2random377_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        xTPL(P, Q, A4, C);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point tripling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random377_test((digit_t*)A); fp2random377_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        get_3_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        eval_3_isog(Q, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+int main()
+{
+    bool OK = true;
+
+    OK = OK && fp_test();          // Test field operations using p377
+    OK = OK && fp_run();           // Benchmark field operations using p377
+
+    OK = OK && fp2_test();         // Test arithmetic functions over GF(p377^2)
+    OK = OK && fp2_run();          // Benchmark arithmetic functions over GF(p377^2)
+    
+    OK = OK && ecisog_run();       // Benchmark elliptic curve and isogeny functions
+
+    return OK;
+}
diff --git a/SIKE_sw/tests/arith_tests-p434.c b/SIKE_sw/tests/arith_tests-p434.c
new file mode 100644
index 0000000..fbd3e69
--- /dev/null
+++ b/SIKE_sw/tests/arith_tests-p434.c
@@ -0,0 +1,616 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions
+*********************************************************************************************/
+
+#include "../src/config.h"
+#include "../src/P434/P434_internal.h"
+#include "../src/internal.h"
+#include "test_extras.h"
+#include <stdio.h>
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS           100       // Number of iterations per bench
+    #define SMALL_BENCH_LOOPS     100       // Number of iterations per bench
+    #define TEST_LOOPS             10       // Number of iterations per test
+#else
+    #define BENCH_LOOPS        100000 
+    #define SMALL_BENCH_LOOPS   10000       
+    #define TEST_LOOPS            100   
+#endif
+
+
+bool fp_test()
+{ // Tests for the field arithmetic
+    bool OK = true;
+    int n, passed;
+    felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing field arithmetic over GF(p434): \n\n"); 
+
+    // Field addition over the prime p434
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom434_test(a); fprandom434_test(b); fprandom434_test(c); fprandom434_test(d); fprandom434_test(e); fprandom434_test(f); 
+
+        fpadd434(a, b, d); fpadd434(d, c, e);                 // e = (a+b)+c
+        fpadd434(b, c, d); fpadd434(d, a, f);                 // f = a+(b+c)
+        fpcorrection434(e);
+        fpcorrection434(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpadd434(a, b, d);                                     // d = a+b 
+        fpadd434(b, a, e);                                     // e = b+a
+        fpcorrection434(d);
+        fpcorrection434(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero434(b);
+        fpadd434(a, b, d);                                     // d = a+0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero434(b);
+        fpcopy434(a, d);     
+        fpneg434(d);                      
+        fpadd434(a, d, e);                                     // e = a+(-a)
+        fpcorrection434(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) addition tests ............................................ PASSED");
+    else { printf("  GF(p) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field subtraction over the prime p434
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom434_test(a); fprandom434_test(b); fprandom434_test(c); fprandom434_test(d); fprandom434_test(e); fprandom434_test(f); 
+
+        fpsub434(a, b, d); fpsub434(d, c, e);                 // e = (a-b)-c
+        fpadd434(b, c, d); fpsub434(a, d, f);                 // f = a-(b+c)
+        fpcorrection434(e);
+        fpcorrection434(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpsub434(a, b, d);                                     // d = a-b 
+        fpsub434(b, a, e);                                         
+        fpneg434(e);                                           // e = -(b-a)
+        fpcorrection434(d);
+        fpcorrection434(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero434(b);
+        fpsub434(a, b, d);                                     // d = a-0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero434(b);
+        fpcopy434(a, d);                 
+        fpsub434(a, d, e);                                     // e = a+(-a)
+        fpcorrection434(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) subtraction tests ......................................... PASSED");
+    else { printf("  GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field multiplication over the prime p434
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fprandom434_test(a); fprandom434_test(b); fprandom434_test(c);  
+        fprandom434_test(ma); fprandom434_test(mb); fprandom434_test(mc); fprandom434_test(md); fprandom434_test(me); fprandom434_test(mf); 
+
+        to_mont(a, ma);
+        fpcopy434(ma, mc);
+        from_mont(mc, c);
+        if (compare_words(a, c, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpmul434_mont(ma, mb, md); fpmul434_mont(md, mc, me);                          // e = (a*b)*c
+        fpmul434_mont(mb, mc, md); fpmul434_mont(md, ma, mf);                          // f = a*(b*c)
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpadd434(mb, mc, md); fpmul434_mont(ma, md, me);                               // e = a*(b+c)
+        fpmul434_mont(ma, mb, md); fpmul434_mont(ma, mc, mf); fpadd434(md, mf, mf);    // f = a*b+a*c
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_mont(a, ma); to_mont(b, mb);
+        fpmul434_mont(ma, mb, md);                                                      // d = a*b 
+        fpmul434_mont(mb, ma, me);                                                      // e = b*a 
+        from_mont(md, d);
+        from_mont(me, e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma);
+        fpzero434(b); b[0] = 1; to_mont(b, mb);
+        fpmul434_mont(ma, mb, md);                                                      // d = a*1  
+        from_mont(ma, a);
+        from_mont(md, d);                
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero434(b); to_mont(b, mb);
+        fpmul434_mont(ma, mb, md);                                                      // d = a*0  
+        from_mont(mb, b);
+        from_mont(md, d);                
+        if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p) multiplication tests ...................................... PASSED");
+    else { printf("  GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field squaring over the prime p434
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom434_test(a);
+        
+        to_mont(a, ma);
+        fpsqr434_mont(ma, mb);                                 // b = a^2
+        fpmul434_mont(ma, ma, mc);                             // c = a*a 
+        if (compare_words(mb, mc, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero434(a); to_mont(a, ma);
+        fpsqr434_mont(ma, md);                                 // d = 0^2 
+        if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) squaring tests............................................. PASSED");
+    else { printf("  GF(p) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field inversion over the prime p434
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom434_test(a); 
+        to_mont(a, ma);
+        fpzero434(d); d[0]=1; to_mont(d, md);
+        fpcopy434(ma, mb);                            
+        fpinv434_mont(ma);                                
+        fpmul434_mont(ma, mb, mc);                             // c = a*a^-1 
+        if (compare_words(mc, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) inversion tests............................................ PASSED");
+    else { printf("  GF(p) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_test()
+{ // Tests for the quadratic extension field arithmetic
+    bool OK = true;
+    int n, passed;
+    f2elm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing quadratic extension arithmetic over GF(p434^2): \n\n"); 
+
+    // Addition over GF(p434^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)a); fp2random434_test((digit_t*)b); fp2random434_test((digit_t*)c); fp2random434_test((digit_t*)d); fp2random434_test((digit_t*)e); fp2random434_test((digit_t*)f); 
+
+        fp2add434(a, b, d); fp2add434(d, c, e);                 // e = (a+b)+c
+        fp2add434(b, c, d); fp2add434(d, a, f);                 // f = a+(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2add434(a, b, d);                                     // d = a+b 
+        fp2add434(b, a, e);                                     // e = b+a
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero434(b);
+        fp2add434(a, b, d);                                     // d = a+0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero434(b);
+        fp2copy434(a, d);     
+        fp2neg434(d);                      
+        fp2add434(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) addition tests .......................................... PASSED");
+    else { printf("  GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Subtraction over GF(p434^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)a); fp2random434_test((digit_t*)b); fp2random434_test((digit_t*)c); fp2random434_test((digit_t*)d); fp2random434_test((digit_t*)e); fp2random434_test((digit_t*)f); 
+
+        fp2sub434(a, b, d); fp2sub434(d, c, e);                 // e = (a-b)-c
+        fp2add434(b, c, d); fp2sub434(a, d, f);                 // f = a-(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2sub434(a, b, d);                                     // d = a-b 
+        fp2sub434(b, a, e);                                         
+        fp2neg434(e);                                           // e = -(b-a)
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero434(b);
+        fp2sub434(a, b, d);                                     // d = a-0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero434(b);
+        fp2copy434(a, d);                 
+        fp2sub434(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) subtraction tests ....................................... PASSED");
+    else { printf("  GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Multiplication over GF(p434^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fp2random434_test((digit_t*)a); fp2random434_test((digit_t*)b); fp2random434_test((digit_t*)c);  
+        fp2random434_test((digit_t*)ma); fp2random434_test((digit_t*)mb); fp2random434_test((digit_t*)mc); fp2random434_test((digit_t*)md); fp2random434_test((digit_t*)me); fp2random434_test((digit_t*)mf); 
+
+        to_fp2mont(a, ma);
+        fp2copy434(ma, mc);
+        from_fp2mont(mc, c);
+        if (compare_words((digit_t*)a, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2mul434_mont(ma, mb, md); fp2mul434_mont(md, mc, me);                          // e = (a*b)*c
+        fp2mul434_mont(mb, mc, md); fp2mul434_mont(md, ma, mf);                          // f = a*(b*c)
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2add434(mb, mc, md); fp2mul434_mont(ma, md, me);                               // e = a*(b+c)
+        fp2mul434_mont(ma, mb, md); fp2mul434_mont(ma, mc, mf); fp2add434(md, mf, mf);   // f = a*b+a*c
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_fp2mont(a, ma); to_fp2mont(b, mb);
+        fp2mul434_mont(ma, mb, md);                                                      // d = a*b 
+        fp2mul434_mont(mb, ma, me);                                                      // e = b*a 
+        from_fp2mont(md, d);
+        from_fp2mont(me, e);
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma);
+        fp2zero434(b); b[0][0] = 1; to_fp2mont(b, mb);
+        fp2mul434_mont(ma, mb, md);                                                      // d = a*1  
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero434(b); to_fp2mont(b, mb);
+        fp2mul434_mont(ma, mb, md);                                                      // d = a*0 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)b, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p^2) multiplication tests .................................... PASSED");
+    else { printf("  GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Squaring over GF(p434^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)a);
+        
+        to_fp2mont(a, ma);
+        fp2sqr434_mont(ma, mb);                                 // b = a^2
+        fp2mul434_mont(ma, ma, mc);                             // c = a*a 
+        from_fp2mont(mb, b);               
+        from_fp2mont(mc, c);               
+        if (compare_words((digit_t*)b, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero434(a); to_fp2mont(a, ma);
+        fp2sqr434_mont(ma, md);                                 // d = 0^2 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) squaring tests........................................... PASSED");
+    else { printf("  GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Inversion over GF(p434^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)a);    
+        
+        to_fp2mont(a, ma);
+        fp2zero434(d); d[0][0]=1; to_fp2mont(d, md);
+        fp2copy434(ma, mb);                            
+        fp2inv434_mont(ma);                                
+        fp2mul434_mont(ma, mb, mc);                             // c = a*a^-1              
+        from_fp2mont(mc, c);  
+        if (compare_words((digit_t*)c, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) inversion tests.......................................... PASSED");
+    else { printf("  GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    felm_t a, b, c;
+    dfelm_t aa;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking field arithmetic over GF(p434): \n\n"); 
+        
+    fprandom434_test(a); fprandom434_test(b); fprandom434_test(c);
+
+    // GF(p) addition using p434
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpadd434(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) addition runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) subtraction using p434
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpsub434(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) subtraction runs in ....................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) multiplication using p434
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpmul434_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) multiplication runs in .................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) reduction using p434
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        mp_mul(a, b, aa, NWORDS_FIELD);
+
+        cycles1 = cpucycles(); 
+        rdc_mont(aa, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) reduction runs in ......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpinv434_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) inversion (exponentiation) runs in ........................ %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n"); 
+    
+    return OK;
+}
+
+
+bool fp2_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t a, b, c;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking quadratic extension arithmetic over GF(p434^2): \n\n"); 
+    
+    fp2random434_test((digit_t*)a); fp2random434_test((digit_t*)b); fp2random434_test((digit_t*)c);
+
+    // GF(p^2) addition
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2add434(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) addition runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) subtraction
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sub434(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) subtraction runs in ..................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) multiplication
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2mul434_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) multiplication runs in .................................. %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) squaring
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sqr434_mont(a, b);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) squaring runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2inv434_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) inversion (exponentiation) runs in ...................... %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool ecisog_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t A24, C24, A4, A, C, coeff[5];
+    point_proj_t P, Q;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking elliptic curve and isogeny functions: \n\n"); 
+
+    // Point doubling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)A24); fp2random434_test((digit_t*)C24);
+
+        cycles1 = cpucycles(); 
+        xDBL(P, Q, A24, C24);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point doubling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)A); fp2random434_test((digit_t*)coeff[0]); fp2random434_test((digit_t*)coeff[1]); fp2random434_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        get_4_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)A); fp2random434_test((digit_t*)coeff[0]); fp2random434_test((digit_t*)coeff[1]); fp2random434_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        eval_4_isog(P, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // Point tripling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)A4); fp2random434_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        xTPL(P, Q, A4, C);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point tripling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random434_test((digit_t*)A); fp2random434_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        get_3_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        eval_3_isog(Q, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+int main()
+{
+    bool OK = true;
+
+    OK = OK && fp_test();          // Test field operations using p434
+    OK = OK && fp_run();           // Benchmark field operations using p434
+
+    OK = OK && fp2_test();         // Test arithmetic functions over GF(p434^2)
+    OK = OK && fp2_run();          // Benchmark arithmetic functions over GF(p434^2)
+    
+    OK = OK && ecisog_run();       // Benchmark elliptic curve and isogeny functions
+
+    return OK;
+}
diff --git a/SIKE_sw/tests/arith_tests-p503.c b/SIKE_sw/tests/arith_tests-p503.c
new file mode 100644
index 0000000..8d4e491
--- /dev/null
+++ b/SIKE_sw/tests/arith_tests-p503.c
@@ -0,0 +1,616 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions
+*********************************************************************************************/
+
+#include "../src/config.h"
+#include "../src/P503/P503_internal.h"
+#include "../src/internal.h"
+#include "test_extras.h"
+#include <stdio.h>
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS           100       // Number of iterations per bench
+    #define SMALL_BENCH_LOOPS     100       // Number of iterations per bench
+    #define TEST_LOOPS             10       // Number of iterations per test
+#else
+    #define BENCH_LOOPS        100000 
+    #define SMALL_BENCH_LOOPS   10000       
+    #define TEST_LOOPS            100   
+#endif
+
+
+bool fp_test()
+{ // Tests for the field arithmetic
+    bool OK = true;
+    int n, passed;
+    felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing field arithmetic over GF(p503): \n\n"); 
+
+    // Field addition over the prime p503
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom503_test(a); fprandom503_test(b); fprandom503_test(c); fprandom503_test(d); fprandom503_test(e); fprandom503_test(f); 
+
+        fpadd503(a, b, d); fpadd503(d, c, e);                 // e = (a+b)+c
+        fpadd503(b, c, d); fpadd503(d, a, f);                 // f = a+(b+c)
+        fpcorrection503(e);
+        fpcorrection503(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpadd503(a, b, d);                                     // d = a+b 
+        fpadd503(b, a, e);                                     // e = b+a
+        fpcorrection503(d);
+        fpcorrection503(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero503(b);
+        fpadd503(a, b, d);                                     // d = a+0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero503(b);
+        fpcopy503(a, d);     
+        fpneg503(d);                      
+        fpadd503(a, d, e);                                     // e = a+(-a)
+        fpcorrection503(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) addition tests ............................................ PASSED");
+    else { printf("  GF(p) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field subtraction over the prime p503
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom503_test(a); fprandom503_test(b); fprandom503_test(c); fprandom503_test(d); fprandom503_test(e); fprandom503_test(f); 
+
+        fpsub503(a, b, d); fpsub503(d, c, e);                 // e = (a-b)-c
+        fpadd503(b, c, d); fpsub503(a, d, f);                 // f = a-(b+c)
+        fpcorrection503(e);
+        fpcorrection503(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpsub503(a, b, d);                                     // d = a-b 
+        fpsub503(b, a, e);                                         
+        fpneg503(e);                                           // e = -(b-a)
+        fpcorrection503(d);
+        fpcorrection503(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero503(b);
+        fpsub503(a, b, d);                                     // d = a-0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero503(b);
+        fpcopy503(a, d);                 
+        fpsub503(a, d, e);                                     // e = a+(-a)
+        fpcorrection503(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) subtraction tests ......................................... PASSED");
+    else { printf("  GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field multiplication over the prime p503
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fprandom503_test(a); fprandom503_test(b); fprandom503_test(c);  
+        fprandom503_test(ma); fprandom503_test(mb); fprandom503_test(mc); fprandom503_test(md); fprandom503_test(me); fprandom503_test(mf); 
+
+        to_mont(a, ma);
+        fpcopy503(ma, mc);
+        from_mont(mc, c);
+        if (compare_words(a, c, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpmul503_mont(ma, mb, md); fpmul503_mont(md, mc, me);                          // e = (a*b)*c
+        fpmul503_mont(mb, mc, md); fpmul503_mont(md, ma, mf);                          // f = a*(b*c)
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpadd503(mb, mc, md); fpmul503_mont(ma, md, me);                               // e = a*(b+c)
+        fpmul503_mont(ma, mb, md); fpmul503_mont(ma, mc, mf); fpadd503(md, mf, mf);    // f = a*b+a*c
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_mont(a, ma); to_mont(b, mb);
+        fpmul503_mont(ma, mb, md);                                                      // d = a*b 
+        fpmul503_mont(mb, ma, me);                                                      // e = b*a 
+        from_mont(md, d);
+        from_mont(me, e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma);
+        fpzero503(b); b[0] = 1; to_mont(b, mb);
+        fpmul503_mont(ma, mb, md);                                                      // d = a*1  
+        from_mont(ma, a);
+        from_mont(md, d);                
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero503(b); to_mont(b, mb);
+        fpmul503_mont(ma, mb, md);                                                      // d = a*0  
+        from_mont(mb, b);
+        from_mont(md, d);                
+        if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p) multiplication tests ...................................... PASSED");
+    else { printf("  GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field squaring over the prime p503
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom503_test(a);
+        
+        to_mont(a, ma);
+        fpsqr503_mont(ma, mb);                                 // b = a^2
+        fpmul503_mont(ma, ma, mc);                             // c = a*a 
+        if (compare_words(mb, mc, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero503(a); to_mont(a, ma);
+        fpsqr503_mont(ma, md);                                 // d = 0^2 
+        if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) squaring tests............................................. PASSED");
+    else { printf("  GF(p) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field inversion over the prime p503
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom503_test(a); 
+        to_mont(a, ma);
+        fpzero503(d); d[0]=1; to_mont(d, md);
+        fpcopy503(ma, mb);                            
+        fpinv503_mont(ma);                                
+        fpmul503_mont(ma, mb, mc);                             // c = a*a^-1 
+        if (compare_words(mc, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) inversion tests............................................ PASSED");
+    else { printf("  GF(p) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_test()
+{ // Tests for the quadratic extension field arithmetic
+    bool OK = true;
+    int n, passed;
+    f2elm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing quadratic extension arithmetic over GF(p503^2): \n\n"); 
+
+    // Addition over GF(p503^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)a); fp2random503_test((digit_t*)b); fp2random503_test((digit_t*)c); fp2random503_test((digit_t*)d); fp2random503_test((digit_t*)e); fp2random503_test((digit_t*)f); 
+
+        fp2add503(a, b, d); fp2add503(d, c, e);                 // e = (a+b)+c
+        fp2add503(b, c, d); fp2add503(d, a, f);                 // f = a+(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2add503(a, b, d);                                     // d = a+b 
+        fp2add503(b, a, e);                                     // e = b+a
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero503(b);
+        fp2add503(a, b, d);                                     // d = a+0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero503(b);
+        fp2copy503(a, d);     
+        fp2neg503(d);                      
+        fp2add503(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) addition tests .......................................... PASSED");
+    else { printf("  GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Subtraction over GF(p503^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)a); fp2random503_test((digit_t*)b); fp2random503_test((digit_t*)c); fp2random503_test((digit_t*)d); fp2random503_test((digit_t*)e); fp2random503_test((digit_t*)f); 
+
+        fp2sub503(a, b, d); fp2sub503(d, c, e);                 // e = (a-b)-c
+        fp2add503(b, c, d); fp2sub503(a, d, f);                 // f = a-(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2sub503(a, b, d);                                     // d = a-b 
+        fp2sub503(b, a, e);                                         
+        fp2neg503(e);                                           // e = -(b-a)
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero503(b);
+        fp2sub503(a, b, d);                                     // d = a-0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero503(b);
+        fp2copy503(a, d);                 
+        fp2sub503(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) subtraction tests ....................................... PASSED");
+    else { printf("  GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Multiplication over GF(p503^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fp2random503_test((digit_t*)a); fp2random503_test((digit_t*)b); fp2random503_test((digit_t*)c);  
+        fp2random503_test((digit_t*)ma); fp2random503_test((digit_t*)mb); fp2random503_test((digit_t*)mc); fp2random503_test((digit_t*)md); fp2random503_test((digit_t*)me); fp2random503_test((digit_t*)mf); 
+
+        to_fp2mont(a, ma);
+        fp2copy503(ma, mc);
+        from_fp2mont(mc, c);
+        if (compare_words((digit_t*)a, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2mul503_mont(ma, mb, md); fp2mul503_mont(md, mc, me);                          // e = (a*b)*c
+        fp2mul503_mont(mb, mc, md); fp2mul503_mont(md, ma, mf);                          // f = a*(b*c)
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2add503(mb, mc, md); fp2mul503_mont(ma, md, me);                               // e = a*(b+c)
+        fp2mul503_mont(ma, mb, md); fp2mul503_mont(ma, mc, mf); fp2add503(md, mf, mf);   // f = a*b+a*c
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_fp2mont(a, ma); to_fp2mont(b, mb);
+        fp2mul503_mont(ma, mb, md);                                                      // d = a*b 
+        fp2mul503_mont(mb, ma, me);                                                      // e = b*a 
+        from_fp2mont(md, d);
+        from_fp2mont(me, e);
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma);
+        fp2zero503(b); b[0][0] = 1; to_fp2mont(b, mb);
+        fp2mul503_mont(ma, mb, md);                                                      // d = a*1  
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero503(b); to_fp2mont(b, mb);
+        fp2mul503_mont(ma, mb, md);                                                      // d = a*0 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)b, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p^2) multiplication tests .................................... PASSED");
+    else { printf("  GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Squaring over GF(p503^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)a);
+        
+        to_fp2mont(a, ma);
+        fp2sqr503_mont(ma, mb);                                 // b = a^2
+        fp2mul503_mont(ma, ma, mc);                             // c = a*a 
+        from_fp2mont(mb, b);               
+        from_fp2mont(mc, c);               
+        if (compare_words((digit_t*)b, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero503(a); to_fp2mont(a, ma);
+        fp2sqr503_mont(ma, md);                                 // d = 0^2 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) squaring tests........................................... PASSED");
+    else { printf("  GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Inversion over GF(p503^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)a);    
+        
+        to_fp2mont(a, ma);
+        fp2zero503(d); d[0][0]=1; to_fp2mont(d, md);
+        fp2copy503(ma, mb);                            
+        fp2inv503_mont(ma);                                
+        fp2mul503_mont(ma, mb, mc);                             // c = a*a^-1              
+        from_fp2mont(mc, c);  
+        if (compare_words((digit_t*)c, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) inversion tests.......................................... PASSED");
+    else { printf("  GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    felm_t a, b, c;
+    dfelm_t aa;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking field arithmetic over GF(p503): \n\n"); 
+        
+    fprandom503_test(a); fprandom503_test(b); fprandom503_test(c);
+
+    // GF(p) addition using p503
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpadd503(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) addition runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) subtraction using p503
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpsub503(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) subtraction runs in ....................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) multiplication using p503
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpmul503_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) multiplication runs in .................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) reduction using p503
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        mp_mul(a, b, aa, NWORDS_FIELD);
+
+        cycles1 = cpucycles(); 
+        rdc_mont(aa, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) reduction runs in ......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpinv503_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) inversion (exponentiation) runs in ........................ %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n"); 
+    
+    return OK;
+}
+
+
+bool fp2_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t a, b, c;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking quadratic extension arithmetic over GF(p503^2): \n\n"); 
+    
+    fp2random503_test((digit_t*)a); fp2random503_test((digit_t*)b); fp2random503_test((digit_t*)c);
+
+    // GF(p^2) addition
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2add503(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) addition runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) subtraction
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sub503(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) subtraction runs in ..................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) multiplication
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2mul503_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) multiplication runs in .................................. %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) squaring
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sqr503_mont(a, b);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) squaring runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2inv503_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) inversion (exponentiation) runs in ...................... %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool ecisog_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t A24, C24, A4, A, C, coeff[5];
+    point_proj_t P, Q;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking elliptic curve and isogeny functions: \n\n"); 
+
+    // Point doubling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)A24); fp2random503_test((digit_t*)C24);
+
+        cycles1 = cpucycles(); 
+        xDBL(P, Q, A24, C24);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point doubling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)A); fp2random503_test((digit_t*)coeff[0]); fp2random503_test((digit_t*)coeff[1]); fp2random503_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        get_4_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)A); fp2random503_test((digit_t*)coeff[0]); fp2random503_test((digit_t*)coeff[1]); fp2random503_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        eval_4_isog(P, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // Point tripling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)A4); fp2random503_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        xTPL(P, Q, A4, C);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point tripling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random503_test((digit_t*)A); fp2random503_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        get_3_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        eval_3_isog(Q, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+int main()
+{
+    bool OK = true;
+
+    OK = OK && fp_test();          // Test field operations using p503
+    OK = OK && fp_run();           // Benchmark field operations using p503
+
+    OK = OK && fp2_test();         // Test arithmetic functions over GF(p503^2)
+    OK = OK && fp2_run();          // Benchmark arithmetic functions over GF(p503^2)
+    
+    OK = OK && ecisog_run();       // Benchmark elliptic curve and isogeny functions
+
+    return OK;
+}
diff --git a/SIKE_sw/tests/arith_tests-p546.c b/SIKE_sw/tests/arith_tests-p546.c
new file mode 100644
index 0000000..3ff1dc6
--- /dev/null
+++ b/SIKE_sw/tests/arith_tests-p546.c
@@ -0,0 +1,616 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions
+*********************************************************************************************/
+
+#include "../src/config.h"
+#include "../src/P546/P546_internal.h"
+#include "../src/internal.h"
+#include "test_extras.h"
+#include <stdio.h>
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS           100       // Number of iterations per bench
+    #define SMALL_BENCH_LOOPS     100       // Number of iterations per bench
+    #define TEST_LOOPS             10       // Number of iterations per test
+#else
+    #define BENCH_LOOPS        100000 
+    #define SMALL_BENCH_LOOPS   10000       
+    #define TEST_LOOPS            100   
+#endif
+
+
+bool fp_test()
+{ // Tests for the field arithmetic
+    bool OK = true;
+    int n, passed;
+    felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing field arithmetic over GF(p546): \n\n"); 
+
+    // Field addition over the prime p546
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom546_test(a); fprandom546_test(b); fprandom546_test(c); fprandom546_test(d); fprandom546_test(e); fprandom546_test(f); 
+
+        fpadd546(a, b, d); fpadd546(d, c, e);                 // e = (a+b)+c
+        fpadd546(b, c, d); fpadd546(d, a, f);                 // f = a+(b+c)
+        fpcorrection546(e);
+        fpcorrection546(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpadd546(a, b, d);                                     // d = a+b 
+        fpadd546(b, a, e);                                     // e = b+a
+        fpcorrection546(d);
+        fpcorrection546(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero546(b);
+        fpadd546(a, b, d);                                     // d = a+0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero546(b);
+        fpcopy546(a, d);     
+        fpneg546(d);                      
+        fpadd546(a, d, e);                                     // e = a+(-a)
+        fpcorrection546(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) addition tests ............................................ PASSED");
+    else { printf("  GF(p) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field subtraction over the prime p546
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom546_test(a); fprandom546_test(b); fprandom546_test(c); fprandom546_test(d); fprandom546_test(e); fprandom546_test(f); 
+
+        fpsub546(a, b, d); fpsub546(d, c, e);                 // e = (a-b)-c
+        fpadd546(b, c, d); fpsub546(a, d, f);                 // f = a-(b+c)
+        fpcorrection546(e);
+        fpcorrection546(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpsub546(a, b, d);                                     // d = a-b 
+        fpsub546(b, a, e);                                         
+        fpneg546(e);                                           // e = -(b-a)
+        fpcorrection546(d);
+        fpcorrection546(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero546(b);
+        fpsub546(a, b, d);                                     // d = a-0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero546(b);
+        fpcopy546(a, d);                 
+        fpsub546(a, d, e);                                     // e = a+(-a)
+        fpcorrection546(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) subtraction tests ......................................... PASSED");
+    else { printf("  GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field multiplication over the prime p546
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fprandom546_test(a); fprandom546_test(b); fprandom546_test(c);  
+        fprandom546_test(ma); fprandom546_test(mb); fprandom546_test(mc); fprandom546_test(md); fprandom546_test(me); fprandom546_test(mf); 
+
+        to_mont(a, ma);
+        fpcopy546(ma, mc);
+        from_mont(mc, c);
+        if (compare_words(a, c, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpmul546_mont(ma, mb, md); fpmul546_mont(md, mc, me);                          // e = (a*b)*c
+        fpmul546_mont(mb, mc, md); fpmul546_mont(md, ma, mf);                          // f = a*(b*c)
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpadd546(mb, mc, md); fpmul546_mont(ma, md, me);                               // e = a*(b+c)
+        fpmul546_mont(ma, mb, md); fpmul546_mont(ma, mc, mf); fpadd546(md, mf, mf);    // f = a*b+a*c
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_mont(a, ma); to_mont(b, mb);
+        fpmul546_mont(ma, mb, md);                                                      // d = a*b 
+        fpmul546_mont(mb, ma, me);                                                      // e = b*a 
+        from_mont(md, d);
+        from_mont(me, e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma);
+        fpzero546(b); b[0] = 1; to_mont(b, mb);
+        fpmul546_mont(ma, mb, md);                                                      // d = a*1  
+        from_mont(ma, a);
+        from_mont(md, d);                
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero546(b); to_mont(b, mb);
+        fpmul546_mont(ma, mb, md);                                                      // d = a*0  
+        from_mont(mb, b);
+        from_mont(md, d);                
+        if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p) multiplication tests ...................................... PASSED");
+    else { printf("  GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field squaring over the prime p546
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom546_test(a);
+        
+        to_mont(a, ma);
+        fpsqr546_mont(ma, mb);                                 // b = a^2
+        fpmul546_mont(ma, ma, mc);                             // c = a*a 
+        if (compare_words(mb, mc, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero546(a); to_mont(a, ma);
+        fpsqr546_mont(ma, md);                                 // d = 0^2 
+        if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) squaring tests............................................. PASSED");
+    else { printf("  GF(p) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field inversion over the prime p546
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom546_test(a); 
+        to_mont(a, ma);
+        fpzero546(d); d[0]=1; to_mont(d, md);
+        fpcopy546(ma, mb);                            
+        fpinv546_mont(ma);                                
+        fpmul546_mont(ma, mb, mc);                             // c = a*a^-1 
+        if (compare_words(mc, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) inversion tests............................................ PASSED");
+    else { printf("  GF(p) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_test()
+{ // Tests for the quadratic extension field arithmetic
+    bool OK = true;
+    int n, passed;
+    f2elm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing quadratic extension arithmetic over GF(p546^2): \n\n"); 
+
+    // Addition over GF(p546^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)a); fp2random546_test((digit_t*)b); fp2random546_test((digit_t*)c); fp2random546_test((digit_t*)d); fp2random546_test((digit_t*)e); fp2random546_test((digit_t*)f); 
+
+        fp2add546(a, b, d); fp2add546(d, c, e);                 // e = (a+b)+c
+        fp2add546(b, c, d); fp2add546(d, a, f);                 // f = a+(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2add546(a, b, d);                                     // d = a+b 
+        fp2add546(b, a, e);                                     // e = b+a
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero546(b);
+        fp2add546(a, b, d);                                     // d = a+0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero546(b);
+        fp2copy546(a, d);     
+        fp2neg546(d);                      
+        fp2add546(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) addition tests .......................................... PASSED");
+    else { printf("  GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Subtraction over GF(p546^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)a); fp2random546_test((digit_t*)b); fp2random546_test((digit_t*)c); fp2random546_test((digit_t*)d); fp2random546_test((digit_t*)e); fp2random546_test((digit_t*)f); 
+
+        fp2sub546(a, b, d); fp2sub546(d, c, e);                 // e = (a-b)-c
+        fp2add546(b, c, d); fp2sub546(a, d, f);                 // f = a-(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2sub546(a, b, d);                                     // d = a-b 
+        fp2sub546(b, a, e);                                         
+        fp2neg546(e);                                           // e = -(b-a)
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero546(b);
+        fp2sub546(a, b, d);                                     // d = a-0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero546(b);
+        fp2copy546(a, d);                 
+        fp2sub546(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) subtraction tests ....................................... PASSED");
+    else { printf("  GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Multiplication over GF(p546^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fp2random546_test((digit_t*)a); fp2random546_test((digit_t*)b); fp2random546_test((digit_t*)c);  
+        fp2random546_test((digit_t*)ma); fp2random546_test((digit_t*)mb); fp2random546_test((digit_t*)mc); fp2random546_test((digit_t*)md); fp2random546_test((digit_t*)me); fp2random546_test((digit_t*)mf); 
+
+        to_fp2mont(a, ma);
+        fp2copy546(ma, mc);
+        from_fp2mont(mc, c);
+        if (compare_words((digit_t*)a, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2mul546_mont(ma, mb, md); fp2mul546_mont(md, mc, me);                          // e = (a*b)*c
+        fp2mul546_mont(mb, mc, md); fp2mul546_mont(md, ma, mf);                          // f = a*(b*c)
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2add546(mb, mc, md); fp2mul546_mont(ma, md, me);                               // e = a*(b+c)
+        fp2mul546_mont(ma, mb, md); fp2mul546_mont(ma, mc, mf); fp2add546(md, mf, mf);   // f = a*b+a*c
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_fp2mont(a, ma); to_fp2mont(b, mb);
+        fp2mul546_mont(ma, mb, md);                                                      // d = a*b 
+        fp2mul546_mont(mb, ma, me);                                                      // e = b*a 
+        from_fp2mont(md, d);
+        from_fp2mont(me, e);
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma);
+        fp2zero546(b); b[0][0] = 1; to_fp2mont(b, mb);
+        fp2mul546_mont(ma, mb, md);                                                      // d = a*1  
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero546(b); to_fp2mont(b, mb);
+        fp2mul546_mont(ma, mb, md);                                                      // d = a*0 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)b, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p^2) multiplication tests .................................... PASSED");
+    else { printf("  GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Squaring over GF(p546^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)a);
+        
+        to_fp2mont(a, ma);
+        fp2sqr546_mont(ma, mb);                                 // b = a^2
+        fp2mul546_mont(ma, ma, mc);                             // c = a*a 
+        from_fp2mont(mb, b);               
+        from_fp2mont(mc, c);               
+        if (compare_words((digit_t*)b, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero546(a); to_fp2mont(a, ma);
+        fp2sqr546_mont(ma, md);                                 // d = 0^2 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) squaring tests........................................... PASSED");
+    else { printf("  GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Inversion over GF(p546^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)a);    
+        
+        to_fp2mont(a, ma);
+        fp2zero546(d); d[0][0]=1; to_fp2mont(d, md);
+        fp2copy546(ma, mb);                            
+        fp2inv546_mont(ma);                                
+        fp2mul546_mont(ma, mb, mc);                             // c = a*a^-1              
+        from_fp2mont(mc, c);  
+        if (compare_words((digit_t*)c, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) inversion tests.......................................... PASSED");
+    else { printf("  GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    felm_t a, b, c;
+    dfelm_t aa;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking field arithmetic over GF(p546): \n\n"); 
+        
+    fprandom546_test(a); fprandom546_test(b); fprandom546_test(c);
+
+    // GF(p) addition using p546
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpadd546(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) addition runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) subtraction using p546
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpsub546(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) subtraction runs in ....................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) multiplication using p546
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpmul546_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) multiplication runs in .................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) reduction using p546
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        mp_mul(a, b, aa, NWORDS_FIELD);
+
+        cycles1 = cpucycles(); 
+        rdc_mont(aa, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) reduction runs in ......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpinv546_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) inversion (exponentiation) runs in ........................ %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n"); 
+    
+    return OK;
+}
+
+
+bool fp2_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t a, b, c;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking quadratic extension arithmetic over GF(p546^2): \n\n"); 
+    
+    fp2random546_test((digit_t*)a); fp2random546_test((digit_t*)b); fp2random546_test((digit_t*)c);
+
+    // GF(p^2) addition
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2add546(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) addition runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) subtraction
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sub546(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) subtraction runs in ..................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) multiplication
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2mul546_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) multiplication runs in .................................. %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) squaring
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sqr546_mont(a, b);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) squaring runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2inv546_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) inversion (exponentiation) runs in ...................... %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool ecisog_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t A24, C24, A4, A, C, coeff[5];
+    point_proj_t P, Q;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking elliptic curve and isogeny functions: \n\n"); 
+
+    // Point doubling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)A24); fp2random546_test((digit_t*)C24);
+
+        cycles1 = cpucycles(); 
+        xDBL(P, Q, A24, C24);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point doubling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)A); fp2random546_test((digit_t*)coeff[0]); fp2random546_test((digit_t*)coeff[1]); fp2random546_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        get_4_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)A); fp2random546_test((digit_t*)coeff[0]); fp2random546_test((digit_t*)coeff[1]); fp2random546_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        eval_4_isog(P, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // Point tripling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)A4); fp2random546_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        xTPL(P, Q, A4, C);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point tripling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random546_test((digit_t*)A); fp2random546_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        get_3_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        eval_3_isog(Q, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+int main()
+{
+    bool OK = true;
+
+    OK = OK && fp_test();          // Test field operations using p546
+    OK = OK && fp_run();           // Benchmark field operations using p546
+
+    OK = OK && fp2_test();         // Test arithmetic functions over GF(p546^2)
+    OK = OK && fp2_run();          // Benchmark arithmetic functions over GF(p546^2)
+    
+    OK = OK && ecisog_run();       // Benchmark elliptic curve and isogeny functions
+
+    return OK;
+}
diff --git a/SIKE_sw/tests/arith_tests-p610.c b/SIKE_sw/tests/arith_tests-p610.c
new file mode 100644
index 0000000..b4a841d
--- /dev/null
+++ b/SIKE_sw/tests/arith_tests-p610.c
@@ -0,0 +1,617 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions
+*********************************************************************************************/
+
+#include "../src/config.h"
+#include "../src/P610/P610_internal.h"
+#include "../src/internal.h"
+#include "test_extras.h"
+#include <stdio.h>
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS           100       // Number of iterations per bench
+    #define SMALL_BENCH_LOOPS     100       // Number of iterations per bench
+    #define TEST_LOOPS             10       // Number of iterations per test
+#else
+    #define BENCH_LOOPS        100000 
+    #define SMALL_BENCH_LOOPS   10000       
+    #define TEST_LOOPS            100   
+#endif
+
+
+bool fp_test()
+{ // Tests for the field arithmetic
+    bool OK = true;
+    int n, passed;
+    felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing field arithmetic over GF(p610): \n\n"); 
+
+    // Field addition over the prime p610
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom610_test(a); fprandom610_test(b); fprandom610_test(c); fprandom610_test(d); fprandom610_test(e); fprandom610_test(f); 
+
+        fpadd610(a, b, d); fpadd610(d, c, e);                 // e = (a+b)+c
+        fpadd610(b, c, d); fpadd610(d, a, f);                 // f = a+(b+c)
+        fpcorrection610(e);
+        fpcorrection610(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpadd610(a, b, d);                                     // d = a+b 
+        fpadd610(b, a, e);                                     // e = b+a
+        fpcorrection610(d);
+        fpcorrection610(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero610(b);
+        fpadd610(a, b, d);                                     // d = a+0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero610(b);
+        fpcopy610(a, d);     
+        fpneg610(d);                      
+        fpadd610(a, d, e);                                     // e = a+(-a)
+        fpcorrection610(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) addition tests ............................................ PASSED");
+    else { printf("  GF(p) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field subtraction over the prime p610
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom610_test(a); fprandom610_test(b); fprandom610_test(c); fprandom610_test(d); fprandom610_test(e); fprandom610_test(f); 
+
+        fpsub610(a, b, d); fpsub610(d, c, e);                 // e = (a-b)-c
+        fpadd610(b, c, d); fpsub610(a, d, f);                 // f = a-(b+c)
+        fpcorrection610(e);
+        fpcorrection610(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpsub610(a, b, d);                                     // d = a-b 
+        fpsub610(b, a, e);                                         
+        fpneg610(e);                                           // e = -(b-a)
+        fpcorrection610(d);
+        fpcorrection610(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero610(b);
+        fpsub610(a, b, d);                                     // d = a-0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero610(b);
+        fpcopy610(a, d);                 
+        fpsub610(a, d, e);                                     // e = a+(-a)
+        fpcorrection610(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) subtraction tests ......................................... PASSED");
+    else { printf("  GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field multiplication over the prime p610
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fprandom610_test(a); fprandom610_test(b); fprandom610_test(c);  
+        fprandom610_test(ma); fprandom610_test(mb); fprandom610_test(mc); fprandom610_test(md); fprandom610_test(me); fprandom610_test(mf); 
+
+        to_mont(a, ma);
+        fpcopy610(ma, mc);
+        from_mont(mc, c);
+        if (compare_words(a, c, NWORDS_FIELD)!=0) { 
+        passed=0; break; }
+        
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpmul610_mont(ma, mb, md); fpmul610_mont(md, mc, me);                          // e = (a*b)*c
+        fpmul610_mont(mb, mc, md); fpmul610_mont(md, ma, mf);                          // f = a*(b*c)
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpadd610(mb, mc, md); fpmul610_mont(ma, md, me);                               // e = a*(b+c)
+        fpmul610_mont(ma, mb, md); fpmul610_mont(ma, mc, mf); fpadd610(md, mf, mf);    // f = a*b+a*c
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_mont(a, ma); to_mont(b, mb);
+        fpmul610_mont(ma, mb, md);                                                      // d = a*b 
+        fpmul610_mont(mb, ma, me);                                                      // e = b*a 
+        from_mont(md, d);
+        from_mont(me, e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma);
+        fpzero610(b); b[0] = 1; to_mont(b, mb);
+        fpmul610_mont(ma, mb, md);                                                      // d = a*1  
+        from_mont(ma, a);
+        from_mont(md, d);                
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero610(b); to_mont(b, mb);
+        fpmul610_mont(ma, mb, md);                                                      // d = a*0  
+        from_mont(mb, b);
+        from_mont(md, d);                
+        if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p) multiplication tests ...................................... PASSED");
+    else { printf("  GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field squaring over the prime p610
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom610_test(a);
+        
+        to_mont(a, ma);
+        fpsqr610_mont(ma, mb);                                 // b = a^2
+        fpmul610_mont(ma, ma, mc);                             // c = a*a 
+        if (compare_words(mb, mc, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero610(a); to_mont(a, ma);
+        fpsqr610_mont(ma, md);                                 // d = 0^2 
+        if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) squaring tests............................................. PASSED");
+    else { printf("  GF(p) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field inversion over the prime p610
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom610_test(a); 
+        to_mont(a, ma);
+        fpzero610(d); d[0]=1; to_mont(d, md);
+        fpcopy610(ma, mb);                            
+        fpinv610_mont(ma);                                
+        fpmul610_mont(ma, mb, mc);                             // c = a*a^-1 
+        if (compare_words(mc, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) inversion tests............................................ PASSED");
+    else { printf("  GF(p) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_test()
+{ // Tests for the quadratic extension field arithmetic
+    bool OK = true;
+    int n, passed;
+    f2elm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing quadratic extension arithmetic over GF(p610^2): \n\n"); 
+
+    // Addition over GF(p610^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)a); fp2random610_test((digit_t*)b); fp2random610_test((digit_t*)c); fp2random610_test((digit_t*)d); fp2random610_test((digit_t*)e); fp2random610_test((digit_t*)f); 
+
+        fp2add610(a, b, d); fp2add610(d, c, e);                 // e = (a+b)+c
+        fp2add610(b, c, d); fp2add610(d, a, f);                 // f = a+(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2add610(a, b, d);                                     // d = a+b 
+        fp2add610(b, a, e);                                     // e = b+a
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero610(b);
+        fp2add610(a, b, d);                                     // d = a+0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero610(b);
+        fp2copy610(a, d);     
+        fp2neg610(d);                      
+        fp2add610(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) addition tests .......................................... PASSED");
+    else { printf("  GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Subtraction over GF(p610^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)a); fp2random610_test((digit_t*)b); fp2random610_test((digit_t*)c); fp2random610_test((digit_t*)d); fp2random610_test((digit_t*)e); fp2random610_test((digit_t*)f); 
+
+        fp2sub610(a, b, d); fp2sub610(d, c, e);                 // e = (a-b)-c
+        fp2add610(b, c, d); fp2sub610(a, d, f);                 // f = a-(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2sub610(a, b, d);                                     // d = a-b 
+        fp2sub610(b, a, e);                                         
+        fp2neg610(e);                                           // e = -(b-a)
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero610(b);
+        fp2sub610(a, b, d);                                     // d = a-0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero610(b);
+        fp2copy610(a, d);                 
+        fp2sub610(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) subtraction tests ....................................... PASSED");
+    else { printf("  GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Multiplication over GF(p610^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fp2random610_test((digit_t*)a); fp2random610_test((digit_t*)b); fp2random610_test((digit_t*)c);  
+        fp2random610_test((digit_t*)ma); fp2random610_test((digit_t*)mb); fp2random610_test((digit_t*)mc); fp2random610_test((digit_t*)md); fp2random610_test((digit_t*)me); fp2random610_test((digit_t*)mf); 
+
+        to_fp2mont(a, ma);
+        fp2copy610(ma, mc);
+        from_fp2mont(mc, c);
+        if (compare_words((digit_t*)a, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2mul610_mont(ma, mb, md); fp2mul610_mont(md, mc, me);                          // e = (a*b)*c
+        fp2mul610_mont(mb, mc, md); fp2mul610_mont(md, ma, mf);                          // f = a*(b*c)
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2add610(mb, mc, md); fp2mul610_mont(ma, md, me);                               // e = a*(b+c)
+        fp2mul610_mont(ma, mb, md); fp2mul610_mont(ma, mc, mf); fp2add610(md, mf, mf);   // f = a*b+a*c
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_fp2mont(a, ma); to_fp2mont(b, mb);
+        fp2mul610_mont(ma, mb, md);                                                      // d = a*b 
+        fp2mul610_mont(mb, ma, me);                                                      // e = b*a 
+        from_fp2mont(md, d);
+        from_fp2mont(me, e);
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma);
+        fp2zero610(b); b[0][0] = 1; to_fp2mont(b, mb);
+        fp2mul610_mont(ma, mb, md);                                                      // d = a*1  
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero610(b); to_fp2mont(b, mb);
+        fp2mul610_mont(ma, mb, md);                                                      // d = a*0 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)b, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p^2) multiplication tests .................................... PASSED");
+    else { printf("  GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Squaring over GF(p610^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)a);
+        
+        to_fp2mont(a, ma);
+        fp2sqr610_mont(ma, mb);                                 // b = a^2
+        fp2mul610_mont(ma, ma, mc);                             // c = a*a 
+        from_fp2mont(mb, b);               
+        from_fp2mont(mc, c);               
+        if (compare_words((digit_t*)b, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero610(a); to_fp2mont(a, ma);
+        fp2sqr610_mont(ma, md);                                 // d = 0^2 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) squaring tests........................................... PASSED");
+    else { printf("  GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Inversion over GF(p610^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)a);    
+        
+        to_fp2mont(a, ma);
+        fp2zero610(d); d[0][0]=1; to_fp2mont(d, md);
+        fp2copy610(ma, mb);                            
+        fp2inv610_mont(ma);                                
+        fp2mul610_mont(ma, mb, mc);                             // c = a*a^-1              
+        from_fp2mont(mc, c);  
+        if (compare_words((digit_t*)c, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) inversion tests.......................................... PASSED");
+    else { printf("  GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    felm_t a, b, c;
+    dfelm_t aa;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking field arithmetic over GF(p610): \n\n"); 
+        
+    fprandom610_test(a); fprandom610_test(b); fprandom610_test(c);
+
+    // GF(p) addition using p610
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpadd610(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) addition runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) subtraction using p610
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpsub610(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) subtraction runs in ....................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) multiplication using p610
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpmul610_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) multiplication runs in .................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) reduction using p610
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        mp_mul(a, b, aa, NWORDS_FIELD);
+
+        cycles1 = cpucycles(); 
+        rdc_mont(aa, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) reduction runs in ......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpinv610_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) inversion (exponentiation) runs in ........................ %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n"); 
+    
+    return OK;
+}
+
+
+bool fp2_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t a, b, c;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking quadratic extension arithmetic over GF(p610^2): \n\n"); 
+    
+    fp2random610_test((digit_t*)a); fp2random610_test((digit_t*)b); fp2random610_test((digit_t*)c);
+
+    // GF(p^2) addition
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2add610(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) addition runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) subtraction
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sub610(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) subtraction runs in ..................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) multiplication
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2mul610_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) multiplication runs in .................................. %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) squaring
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sqr610_mont(a, b);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) squaring runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2inv610_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) inversion (exponentiation) runs in ...................... %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool ecisog_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t A24, C24, A4, A, C, coeff[5];
+    point_proj_t P, Q;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking elliptic curve and isogeny functions: \n\n"); 
+
+    // Point doubling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)A24); fp2random610_test((digit_t*)C24);
+
+        cycles1 = cpucycles(); 
+        xDBL(P, Q, A24, C24);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point doubling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)A); fp2random610_test((digit_t*)coeff[0]); fp2random610_test((digit_t*)coeff[1]); fp2random610_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        get_4_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)A); fp2random610_test((digit_t*)coeff[0]); fp2random610_test((digit_t*)coeff[1]); fp2random610_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        eval_4_isog(P, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // Point tripling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)A4); fp2random610_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        xTPL(P, Q, A4, C);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point tripling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random610_test((digit_t*)A); fp2random610_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        get_3_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        eval_3_isog(Q, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+int main()
+{
+    bool OK = true;
+
+    OK = OK && fp_test();          // Test field operations using p610
+    OK = OK && fp_run();           // Benchmark field operations using p610
+
+    OK = OK && fp2_test();         // Test arithmetic functions over GF(p610^2)
+    OK = OK && fp2_run();          // Benchmark arithmetic functions over GF(p610^2)
+    
+    OK = OK && ecisog_run();       // Benchmark elliptic curve and isogeny functions
+
+    return OK;
+}
diff --git a/SIKE_sw/tests/arith_tests-p697.c b/SIKE_sw/tests/arith_tests-p697.c
new file mode 100644
index 0000000..44318fb
--- /dev/null
+++ b/SIKE_sw/tests/arith_tests-p697.c
@@ -0,0 +1,616 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions
+*********************************************************************************************/
+
+#include "../src/config.h"
+#include "../src/P697/P697_internal.h"
+#include "../src/internal.h"
+#include "test_extras.h"
+#include <stdio.h>
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS           100       // Number of iterations per bench
+    #define SMALL_BENCH_LOOPS     100       // Number of iterations per bench
+    #define TEST_LOOPS             10       // Number of iterations per test
+#else
+    #define BENCH_LOOPS        100000 
+    #define SMALL_BENCH_LOOPS   10000       
+    #define TEST_LOOPS            100   
+#endif
+
+
+bool fp_test()
+{ // Tests for the field arithmetic
+    bool OK = true;
+    int n, passed;
+    felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing field arithmetic over GF(p697): \n\n"); 
+
+    // Field addition over the prime p697
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom697_test(a); fprandom697_test(b); fprandom697_test(c); fprandom697_test(d); fprandom697_test(e); fprandom697_test(f); 
+
+        fpadd697(a, b, d); fpadd697(d, c, e);                 // e = (a+b)+c
+        fpadd697(b, c, d); fpadd697(d, a, f);                 // f = a+(b+c)
+        fpcorrection697(e);
+        fpcorrection697(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpadd697(a, b, d);                                     // d = a+b 
+        fpadd697(b, a, e);                                     // e = b+a
+        fpcorrection697(d);
+        fpcorrection697(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero697(b);
+        fpadd697(a, b, d);                                     // d = a+0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero697(b);
+        fpcopy697(a, d);     
+        fpneg697(d);                      
+        fpadd697(a, d, e);                                     // e = a+(-a)
+        fpcorrection697(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) addition tests ............................................ PASSED");
+    else { printf("  GF(p) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field subtraction over the prime p697
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom697_test(a); fprandom697_test(b); fprandom697_test(c); fprandom697_test(d); fprandom697_test(e); fprandom697_test(f); 
+
+        fpsub697(a, b, d); fpsub697(d, c, e);                 // e = (a-b)-c
+        fpadd697(b, c, d); fpsub697(a, d, f);                 // f = a-(b+c)
+        fpcorrection697(e);
+        fpcorrection697(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpsub697(a, b, d);                                     // d = a-b 
+        fpsub697(b, a, e);                                         
+        fpneg697(e);                                           // e = -(b-a)
+        fpcorrection697(d);
+        fpcorrection697(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero697(b);
+        fpsub697(a, b, d);                                     // d = a-0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero697(b);
+        fpcopy697(a, d);                 
+        fpsub697(a, d, e);                                     // e = a+(-a)
+        fpcorrection697(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) subtraction tests ......................................... PASSED");
+    else { printf("  GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field multiplication over the prime p697
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fprandom697_test(a); fprandom697_test(b); fprandom697_test(c);  
+        fprandom697_test(ma); fprandom697_test(mb); fprandom697_test(mc); fprandom697_test(md); fprandom697_test(me); fprandom697_test(mf); 
+
+        to_mont(a, ma);
+        fpcopy697(ma, mc);
+        from_mont(mc, c);
+        if (compare_words(a, c, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpmul697_mont(ma, mb, md); fpmul697_mont(md, mc, me);                          // e = (a*b)*c
+        fpmul697_mont(mb, mc, md); fpmul697_mont(md, ma, mf);                          // f = a*(b*c)
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpadd697(mb, mc, md); fpmul697_mont(ma, md, me);                               // e = a*(b+c)
+        fpmul697_mont(ma, mb, md); fpmul697_mont(ma, mc, mf); fpadd697(md, mf, mf);    // f = a*b+a*c
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_mont(a, ma); to_mont(b, mb);
+        fpmul697_mont(ma, mb, md);                                                      // d = a*b 
+        fpmul697_mont(mb, ma, me);                                                      // e = b*a 
+        from_mont(md, d);
+        from_mont(me, e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma);
+        fpzero697(b); b[0] = 1; to_mont(b, mb);
+        fpmul697_mont(ma, mb, md);                                                      // d = a*1  
+        from_mont(ma, a);
+        from_mont(md, d);                
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero697(b); to_mont(b, mb);
+        fpmul697_mont(ma, mb, md);                                                      // d = a*0  
+        from_mont(mb, b);
+        from_mont(md, d);                
+        if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p) multiplication tests ...................................... PASSED");
+    else { printf("  GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field squaring over the prime p697
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom697_test(a);
+        
+        to_mont(a, ma);
+        fpsqr697_mont(ma, mb);                                 // b = a^2
+        fpmul697_mont(ma, ma, mc);                             // c = a*a 
+        if (compare_words(mb, mc, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero697(a); to_mont(a, ma);
+        fpsqr697_mont(ma, md);                                 // d = 0^2 
+        if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) squaring tests............................................. PASSED");
+    else { printf("  GF(p) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field inversion over the prime p697
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom697_test(a); 
+        to_mont(a, ma);
+        fpzero697(d); d[0]=1; to_mont(d, md);
+        fpcopy697(ma, mb);                            
+        fpinv697_mont(ma);                                
+        fpmul697_mont(ma, mb, mc);                             // c = a*a^-1 
+        if (compare_words(mc, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) inversion tests............................................ PASSED");
+    else { printf("  GF(p) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_test()
+{ // Tests for the quadratic extension field arithmetic
+    bool OK = true;
+    int n, passed;
+    f2elm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing quadratic extension arithmetic over GF(p697^2): \n\n"); 
+
+    // Addition over GF(p697^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)a); fp2random697_test((digit_t*)b); fp2random697_test((digit_t*)c); fp2random697_test((digit_t*)d); fp2random697_test((digit_t*)e); fp2random697_test((digit_t*)f); 
+
+        fp2add697(a, b, d); fp2add697(d, c, e);                 // e = (a+b)+c
+        fp2add697(b, c, d); fp2add697(d, a, f);                 // f = a+(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2add697(a, b, d);                                     // d = a+b 
+        fp2add697(b, a, e);                                     // e = b+a
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero697(b);
+        fp2add697(a, b, d);                                     // d = a+0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero697(b);
+        fp2copy697(a, d);     
+        fp2neg697(d);                      
+        fp2add697(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) addition tests .......................................... PASSED");
+    else { printf("  GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Subtraction over GF(p697^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)a); fp2random697_test((digit_t*)b); fp2random697_test((digit_t*)c); fp2random697_test((digit_t*)d); fp2random697_test((digit_t*)e); fp2random697_test((digit_t*)f); 
+
+        fp2sub697(a, b, d); fp2sub697(d, c, e);                 // e = (a-b)-c
+        fp2add697(b, c, d); fp2sub697(a, d, f);                 // f = a-(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2sub697(a, b, d);                                     // d = a-b 
+        fp2sub697(b, a, e);                                         
+        fp2neg697(e);                                           // e = -(b-a)
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero697(b);
+        fp2sub697(a, b, d);                                     // d = a-0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero697(b);
+        fp2copy697(a, d);                 
+        fp2sub697(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) subtraction tests ....................................... PASSED");
+    else { printf("  GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Multiplication over GF(p697^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fp2random697_test((digit_t*)a); fp2random697_test((digit_t*)b); fp2random697_test((digit_t*)c);  
+        fp2random697_test((digit_t*)ma); fp2random697_test((digit_t*)mb); fp2random697_test((digit_t*)mc); fp2random697_test((digit_t*)md); fp2random697_test((digit_t*)me); fp2random697_test((digit_t*)mf); 
+
+        to_fp2mont(a, ma);
+        fp2copy697(ma, mc);
+        from_fp2mont(mc, c);
+        if (compare_words((digit_t*)a, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2mul697_mont(ma, mb, md); fp2mul697_mont(md, mc, me);                          // e = (a*b)*c
+        fp2mul697_mont(mb, mc, md); fp2mul697_mont(md, ma, mf);                          // f = a*(b*c)
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2add697(mb, mc, md); fp2mul697_mont(ma, md, me);                               // e = a*(b+c)
+        fp2mul697_mont(ma, mb, md); fp2mul697_mont(ma, mc, mf); fp2add697(md, mf, mf);   // f = a*b+a*c
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_fp2mont(a, ma); to_fp2mont(b, mb);
+        fp2mul697_mont(ma, mb, md);                                                      // d = a*b 
+        fp2mul697_mont(mb, ma, me);                                                      // e = b*a 
+        from_fp2mont(md, d);
+        from_fp2mont(me, e);
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma);
+        fp2zero697(b); b[0][0] = 1; to_fp2mont(b, mb);
+        fp2mul697_mont(ma, mb, md);                                                      // d = a*1  
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero697(b); to_fp2mont(b, mb);
+        fp2mul697_mont(ma, mb, md);                                                      // d = a*0 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)b, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p^2) multiplication tests .................................... PASSED");
+    else { printf("  GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Squaring over GF(p697^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)a);
+        
+        to_fp2mont(a, ma);
+        fp2sqr697_mont(ma, mb);                                 // b = a^2
+        fp2mul697_mont(ma, ma, mc);                             // c = a*a 
+        from_fp2mont(mb, b);               
+        from_fp2mont(mc, c);               
+        if (compare_words((digit_t*)b, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero697(a); to_fp2mont(a, ma);
+        fp2sqr697_mont(ma, md);                                 // d = 0^2 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) squaring tests........................................... PASSED");
+    else { printf("  GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Inversion over GF(p697^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)a);    
+        
+        to_fp2mont(a, ma);
+        fp2zero697(d); d[0][0]=1; to_fp2mont(d, md);
+        fp2copy697(ma, mb);                            
+        fp2inv697_mont(ma);                                
+        fp2mul697_mont(ma, mb, mc);                             // c = a*a^-1              
+        from_fp2mont(mc, c);  
+        if (compare_words((digit_t*)c, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) inversion tests.......................................... PASSED");
+    else { printf("  GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    felm_t a, b, c;
+    dfelm_t aa;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking field arithmetic over GF(p697): \n\n"); 
+        
+    fprandom697_test(a); fprandom697_test(b); fprandom697_test(c);
+
+    // GF(p) addition using p697
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpadd697(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) addition runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) subtraction using p697
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpsub697(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) subtraction runs in ....................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) multiplication using p697
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpmul697_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) multiplication runs in .................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) reduction using p697
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        mp_mul(a, b, aa, NWORDS_FIELD);
+
+        cycles1 = cpucycles(); 
+        rdc_mont(aa, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) reduction runs in ......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpinv697_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) inversion (exponentiation) runs in ........................ %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n"); 
+    
+    return OK;
+}
+
+
+bool fp2_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t a, b, c;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking quadratic extension arithmetic over GF(p697^2): \n\n"); 
+    
+    fp2random697_test((digit_t*)a); fp2random697_test((digit_t*)b); fp2random697_test((digit_t*)c);
+
+    // GF(p^2) addition
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2add697(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) addition runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) subtraction
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sub697(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) subtraction runs in ..................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) multiplication
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2mul697_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) multiplication runs in .................................. %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) squaring
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sqr697_mont(a, b);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) squaring runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2inv697_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) inversion (exponentiation) runs in ...................... %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool ecisog_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t A24, C24, A4, A, C, coeff[5];
+    point_proj_t P, Q;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking elliptic curve and isogeny functions: \n\n"); 
+
+    // Point doubling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)A24); fp2random697_test((digit_t*)C24);
+
+        cycles1 = cpucycles(); 
+        xDBL(P, Q, A24, C24);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point doubling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)A); fp2random697_test((digit_t*)coeff[0]); fp2random697_test((digit_t*)coeff[1]); fp2random697_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        get_4_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)A); fp2random697_test((digit_t*)coeff[0]); fp2random697_test((digit_t*)coeff[1]); fp2random697_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        eval_4_isog(P, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // Point tripling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)A4); fp2random697_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        xTPL(P, Q, A4, C);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point tripling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random697_test((digit_t*)A); fp2random697_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        get_3_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        eval_3_isog(Q, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+int main()
+{
+    bool OK = true;
+
+    OK = OK && fp_test();          // Test field operations using p697
+    OK = OK && fp_run();           // Benchmark field operations using p697
+
+    OK = OK && fp2_test();         // Test arithmetic functions over GF(p697^2)
+    OK = OK && fp2_run();          // Benchmark arithmetic functions over GF(p697^2)
+    
+    OK = OK && ecisog_run();       // Benchmark elliptic curve and isogeny functions
+
+    return OK;
+}
diff --git a/SIKE_sw/tests/arith_tests-p751.c b/SIKE_sw/tests/arith_tests-p751.c
new file mode 100644
index 0000000..3e43711
--- /dev/null
+++ b/SIKE_sw/tests/arith_tests-p751.c
@@ -0,0 +1,617 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: testing code for field arithmetic, elliptic curve and isogeny functions
+*********************************************************************************************/
+
+#include "../src/config.h"
+#include "../src/P751/P751_internal.h"
+#include "../src/internal.h"
+#include "test_extras.h"
+#include <stdio.h>
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS           100       // Number of iterations per bench
+    #define SMALL_BENCH_LOOPS     100       // Number of iterations per bench
+    #define TEST_LOOPS             10       // Number of iterations per test
+#else
+    #define BENCH_LOOPS        100000 
+    #define SMALL_BENCH_LOOPS   10000       
+    #define TEST_LOOPS            100  
+#endif
+
+
+bool fp_test()
+{ // Tests for the field arithmetic
+    bool OK = true;
+    int n, passed;
+    felm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing field arithmetic over GF(p751): \n\n"); 
+
+    // Field addition over the prime p751
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom751_test(a); fprandom751_test(b); fprandom751_test(c); fprandom751_test(d); fprandom751_test(e); fprandom751_test(f); 
+
+        fpadd751(a, b, d); fpadd751(d, c, e);                 // e = (a+b)+c
+        fpadd751(b, c, d); fpadd751(d, a, f);                 // f = a+(b+c)
+        fpcorrection751(e);
+        fpcorrection751(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpadd751(a, b, d);                                     // d = a+b 
+        fpadd751(b, a, e);                                     // e = b+a
+        fpcorrection751(d);
+        fpcorrection751(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero751(b);
+        fpadd751(a, b, d);                                     // d = a+0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero751(b);
+        fpcopy751(a, d);     
+        fpneg751(d);                      
+        fpadd751(a, d, e);                                     // e = a+(-a)
+        fpcorrection751(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) addition tests ............................................ PASSED");
+    else { printf("  GF(p) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field subtraction over the prime p751
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom751_test(a); fprandom751_test(b); fprandom751_test(c); fprandom751_test(d); fprandom751_test(e); fprandom751_test(f); 
+
+        fpsub751(a, b, d); fpsub751(d, c, e);                 // e = (a-b)-c
+        fpadd751(b, c, d); fpsub751(a, d, f);                 // f = a-(b+c)
+        fpcorrection751(e);
+        fpcorrection751(f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpsub751(a, b, d);                                     // d = a-b 
+        fpsub751(b, a, e);                                         
+        fpneg751(e);                                           // e = -(b-a)
+        fpcorrection751(d);
+        fpcorrection751(e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero751(b);
+        fpsub751(a, b, d);                                     // d = a-0 
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero751(b);
+        fpcopy751(a, d);                 
+        fpsub751(a, d, e);                                     // e = a+(-a)
+        fpcorrection751(e);
+        if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) subtraction tests ......................................... PASSED");
+    else { printf("  GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field multiplication over the prime p751
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fprandom751_test(a); fprandom751_test(b); fprandom751_test(c);  
+        fprandom751_test(ma); fprandom751_test(mb); fprandom751_test(mc); fprandom751_test(md); fprandom751_test(me); fprandom751_test(mf); 
+
+        to_mont(a, ma);
+        fpcopy751(ma, mc);
+        from_mont(mc, c);
+        if (compare_words(a, c, NWORDS_FIELD)!=0) { 
+        passed=0; break; }
+        
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpmul751_mont(ma, mb, md); fpmul751_mont(md, mc, me);                          // e = (a*b)*c
+        fpmul751_mont(mb, mc, md); fpmul751_mont(md, ma, mf);                          // f = a*(b*c)
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_mont(a, ma); to_mont(b, mb); to_mont(c, mc); 
+        fpadd751(mb, mc, md); fpmul751_mont(ma, md, me);                               // e = a*(b+c)
+        fpmul751_mont(ma, mb, md); fpmul751_mont(ma, mc, mf); fpadd751(md, mf, mf);    // f = a*b+a*c
+        from_mont(me, e);
+        from_mont(mf, f);
+        if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_mont(a, ma); to_mont(b, mb);
+        fpmul751_mont(ma, mb, md);                                                      // d = a*b 
+        fpmul751_mont(mb, ma, me);                                                      // e = b*a 
+        from_mont(md, d);
+        from_mont(me, e);
+        if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_mont(a, ma);
+        fpzero751(b); b[0] = 1; to_mont(b, mb);
+        fpmul751_mont(ma, mb, md);                                                      // d = a*1  
+        from_mont(ma, a);
+        from_mont(md, d);                
+        if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fpzero751(b); to_mont(b, mb);
+        fpmul751_mont(ma, mb, md);                                                      // d = a*0  
+        from_mont(mb, b);
+        from_mont(md, d);                
+        if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p) multiplication tests ...................................... PASSED");
+    else { printf("  GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Field squaring over the prime p751
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom751_test(a);
+        
+        to_mont(a, ma);
+        fpsqr751_mont(ma, mb);                                 // b = a^2
+        fpmul751_mont(ma, ma, mc);                             // c = a*a 
+        if (compare_words(mb, mc, NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fpzero751(a); to_mont(a, ma);
+        fpsqr751_mont(ma, md);                                 // d = 0^2 
+        if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) squaring tests............................................. PASSED");
+    else { printf("  GF(p) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Field inversion over the prime p751
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fprandom751_test(a); 
+        to_mont(a, ma);
+        fpzero751(d); d[0]=1; to_mont(d, md);
+        fpcopy751(ma, mb);                            
+        fpinv751_mont(ma);                                
+        fpmul751_mont(ma, mb, mc);                             // c = a*a^-1 
+        if (compare_words(mc, md, NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p) inversion tests............................................ PASSED");
+    else { printf("  GF(p) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_test()
+{ // Tests for the quadratic extension field arithmetic
+    bool OK = true;
+    int n, passed;
+    f2elm_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
+
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Testing quadratic extension arithmetic over GF(p751^2): \n\n"); 
+
+    // Addition over GF(p751^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)a); fp2random751_test((digit_t*)b); fp2random751_test((digit_t*)c); fp2random751_test((digit_t*)d); fp2random751_test((digit_t*)e); fp2random751_test((digit_t*)f); 
+
+        fp2add751(a, b, d); fp2add751(d, c, e);                 // e = (a+b)+c
+        fp2add751(b, c, d); fp2add751(d, a, f);                 // f = a+(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2add751(a, b, d);                                     // d = a+b 
+        fp2add751(b, a, e);                                     // e = b+a
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero751(b);
+        fp2add751(a, b, d);                                     // d = a+0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero751(b);
+        fp2copy751(a, d);     
+        fp2neg751(d);                      
+        fp2add751(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) addition tests .......................................... PASSED");
+    else { printf("  GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Subtraction over GF(p751^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)a); fp2random751_test((digit_t*)b); fp2random751_test((digit_t*)c); fp2random751_test((digit_t*)d); fp2random751_test((digit_t*)e); fp2random751_test((digit_t*)f); 
+
+        fp2sub751(a, b, d); fp2sub751(d, c, e);                 // e = (a-b)-c
+        fp2add751(b, c, d); fp2sub751(a, d, f);                 // f = a-(b+c)
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2sub751(a, b, d);                                     // d = a-b 
+        fp2sub751(b, a, e);                                         
+        fp2neg751(e);                                           // e = -(b-a)
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero751(b);
+        fp2sub751(a, b, d);                                     // d = a-0 
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero751(b);
+        fp2copy751(a, d);                 
+        fp2sub751(a, d, e);                                     // e = a+(-a)
+        if (compare_words((digit_t*)e, (digit_t*)b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) subtraction tests ....................................... PASSED");
+    else { printf("  GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Multiplication over GF(p751^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {    
+        fp2random751_test((digit_t*)a); fp2random751_test((digit_t*)b); fp2random751_test((digit_t*)c);  
+        fp2random751_test((digit_t*)ma); fp2random751_test((digit_t*)mb); fp2random751_test((digit_t*)mc); fp2random751_test((digit_t*)md); fp2random751_test((digit_t*)me); fp2random751_test((digit_t*)mf); 
+
+        to_fp2mont(a, ma);
+        fp2copy751(ma, mc);
+        from_fp2mont(mc, c);
+        if (compare_words((digit_t*)a, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2mul751_mont(ma, mb, md); fp2mul751_mont(md, mc, me);                          // e = (a*b)*c
+        fp2mul751_mont(mb, mc, md); fp2mul751_mont(md, ma, mf);                          // f = a*(b*c)
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+      
+        to_fp2mont(a, ma); to_fp2mont(b, mb); to_fp2mont(c, mc); 
+        fp2add751(mb, mc, md); fp2mul751_mont(ma, md, me);                               // e = a*(b+c)
+        fp2mul751_mont(ma, mb, md); fp2mul751_mont(ma, mc, mf); fp2add751(md, mf, mf);   // f = a*b+a*c
+        from_fp2mont(me, e);
+        from_fp2mont(mf, f);
+        if (compare_words((digit_t*)e, (digit_t*)f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+       
+        to_fp2mont(a, ma); to_fp2mont(b, mb);
+        fp2mul751_mont(ma, mb, md);                                                      // d = a*b 
+        fp2mul751_mont(mb, ma, me);                                                      // e = b*a 
+        from_fp2mont(md, d);
+        from_fp2mont(me, e);
+        if (compare_words((digit_t*)d, (digit_t*)e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        to_fp2mont(a, ma);
+        fp2zero751(b); b[0][0] = 1; to_fp2mont(b, mb);
+        fp2mul751_mont(ma, mb, md);                                                      // d = a*1  
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+        
+        fp2zero751(b); to_fp2mont(b, mb);
+        fp2mul751_mont(ma, mb, md);                                                      // d = a*0 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)b, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; } 
+    }
+    if (passed==1) printf("  GF(p^2) multiplication tests .................................... PASSED");
+    else { printf("  GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+
+    // Squaring over GF(p751^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)a);
+        
+        to_fp2mont(a, ma);
+        fp2sqr751_mont(ma, mb);                                 // b = a^2
+        fp2mul751_mont(ma, ma, mc);                             // c = a*a 
+        from_fp2mont(mb, b);               
+        from_fp2mont(mc, c);               
+        if (compare_words((digit_t*)b, (digit_t*)c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+
+        fp2zero751(a); to_fp2mont(a, ma);
+        fp2sqr751_mont(ma, md);                                 // d = 0^2 
+        from_fp2mont(md, d);               
+        if (compare_words((digit_t*)a, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) squaring tests........................................... PASSED");
+    else { printf("  GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    // Inversion over GF(p751^2)
+    passed = 1;
+    for (n=0; n<TEST_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)a);    
+        
+        to_fp2mont(a, ma);
+        fp2zero751(d); d[0][0]=1; to_fp2mont(d, md);
+        fp2copy751(ma, mb);                            
+        fp2inv751_mont(ma);                                
+        fp2mul751_mont(ma, mb, mc);                             // c = a*a^-1              
+        from_fp2mont(mc, c);  
+        if (compare_words((digit_t*)c, (digit_t*)d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
+    }
+    if (passed==1) printf("  GF(p^2) inversion tests.......................................... PASSED");
+    else { printf("  GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    felm_t a, b, c;
+    dfelm_t aa;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking field arithmetic over GF(p751): \n\n"); 
+        
+    fprandom751_test(a); fprandom751_test(b); fprandom751_test(c);
+
+    // GF(p) addition using p751
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpadd751(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) addition runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) subtraction using p751
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpsub751(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) subtraction runs in ....................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) multiplication using p751
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpmul751_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) multiplication runs in .................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) reduction using p751
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        mp_mul(a, b, aa, NWORDS_FIELD);
+
+        cycles1 = cpucycles(); 
+        rdc_mont(aa, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) reduction runs in ......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fpinv751_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p) inversion (exponentiation) runs in ........................ %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+bool fp2_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t a, b, c;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking quadratic extension arithmetic over GF(p751^2): \n\n"); 
+    
+    fp2random751_test((digit_t*)a); fp2random751_test((digit_t*)b); fp2random751_test((digit_t*)c);
+
+    // GF(p^2) addition
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2add751(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) addition runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) subtraction
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sub751(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) subtraction runs in ..................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) multiplication
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2mul751_mont(a, b, c);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) multiplication runs in .................................. %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) squaring
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2sqr751_mont(a, b);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) squaring runs in ........................................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // GF(p^2) inversion
+    cycles = 0;
+    for (n=0; n<SMALL_BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        fp2inv751_mont(a);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  GF(p^2) inversion (exponentiation) runs in ...................... %7lld ", cycles/SMALL_BENCH_LOOPS); print_unit;
+    printf("\n"); 
+    
+    return OK;
+}
+
+
+bool ecisog_run()
+{
+    bool OK = true;
+    int n;
+    unsigned long long cycles, cycles1, cycles2;
+    f2elm_t A24, C24, A4, A, C, coeff[5];
+    point_proj_t P, Q;
+        
+    printf("\n--------------------------------------------------------------------------------------------------------\n\n"); 
+    printf("Benchmarking elliptic curve and isogeny functions: \n\n"); 
+
+    // Point doubling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)A24); fp2random751_test((digit_t*)C24);
+
+        cycles1 = cpucycles(); 
+        xDBL(P, Q, A24, C24);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point doubling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)A); fp2random751_test((digit_t*)coeff[0]); fp2random751_test((digit_t*)coeff[1]); fp2random751_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        get_4_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 4-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)A); fp2random751_test((digit_t*)coeff[0]); fp2random751_test((digit_t*)coeff[1]); fp2random751_test((digit_t*)coeff[2]);
+
+        cycles1 = cpucycles(); 
+        eval_4_isog(P, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  4-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // Point tripling
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)A4); fp2random751_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        xTPL(P, Q, A4, C);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  Point tripling runs in .......................................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny of a projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        fp2random751_test((digit_t*)A); fp2random751_test((digit_t*)C);
+
+        cycles1 = cpucycles(); 
+        get_3_isog(P, A, C, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny of projective point runs in ........................... %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    // 3-isogeny evaluation at projective point
+    cycles = 0;
+    for (n=0; n<BENCH_LOOPS; n++)
+    {
+        cycles1 = cpucycles(); 
+        eval_3_isog(Q, coeff);
+        cycles2 = cpucycles();
+        cycles = cycles+(cycles2-cycles1);
+    }
+    printf("  3-isogeny evaluation at projective point runs in ................ %7lld ", cycles/BENCH_LOOPS); print_unit;
+    printf("\n");
+    
+    return OK;
+}
+
+
+int main()
+{
+    bool OK = true;
+
+    OK = OK && fp_test();          // Test field operations using p751
+    OK = OK && fp_run();           // Benchmark field operations using p751
+
+    OK = OK && fp2_test();         // Test arithmetic functions over GF(p751^2)
+    OK = OK && fp2_run();          // Benchmark arithmetic functions over GF(p751^2)
+    
+    OK = OK && ecisog_run();       // Benchmark elliptic curve and isogeny functions
+
+    return OK;
+}
diff --git a/SIKE_sw/tests/test_SIKEp377.c b/SIKE_sw/tests/test_SIKEp377.c
new file mode 100644
index 0000000..e273879
--- /dev/null
+++ b/SIKE_sw/tests/test_SIKEp377.c
@@ -0,0 +1,17 @@
+/********************************************************************************************
+* NEW benchmarking/testing isogeny-based key encapsulation mechanism SIKEp377
+*********************************************************************************************/ 
+
+#include <stdio.h>
+#include <string.h>
+#include "test_extras.h"
+#include "../src/P377/P377_api.h"
+
+
+#define SCHEME_NAME    "SIKEp377"
+
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp377
+#define crypto_kem_enc                crypto_kem_enc_SIKEp377
+#define crypto_kem_dec                crypto_kem_dec_SIKEp377
+
+#include "test_sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_SIKEp434.c b/SIKE_sw/tests/test_SIKEp434.c
new file mode 100644
index 0000000..fce38b4
--- /dev/null
+++ b/SIKE_sw/tests/test_SIKEp434.c
@@ -0,0 +1,19 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp434
+*********************************************************************************************/ 
+
+#include <stdio.h>
+#include <string.h>
+#include "test_extras.h"
+#include "../src/P434/P434_api.h"
+
+
+#define SCHEME_NAME    "SIKEp434"
+
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp434
+#define crypto_kem_enc                crypto_kem_enc_SIKEp434
+#define crypto_kem_dec                crypto_kem_dec_SIKEp434
+
+#include "test_sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_SIKEp503.c b/SIKE_sw/tests/test_SIKEp503.c
new file mode 100644
index 0000000..e1e9d0e
--- /dev/null
+++ b/SIKE_sw/tests/test_SIKEp503.c
@@ -0,0 +1,19 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp503
+*********************************************************************************************/ 
+
+#include <stdio.h>
+#include <string.h>
+#include "test_extras.h"
+#include "../src/P503/P503_api.h"
+
+
+#define SCHEME_NAME    "SIKEp503"
+
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp503
+#define crypto_kem_enc                crypto_kem_enc_SIKEp503
+#define crypto_kem_dec                crypto_kem_dec_SIKEp503
+
+#include "test_sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_SIKEp546.c b/SIKE_sw/tests/test_SIKEp546.c
new file mode 100644
index 0000000..2735d1e
--- /dev/null
+++ b/SIKE_sw/tests/test_SIKEp546.c
@@ -0,0 +1,17 @@
+/********************************************************************************************
+* NEW benchmarking/testing isogeny-based key encapsulation mechanism SIKEp546
+*********************************************************************************************/ 
+
+#include <stdio.h>
+#include <string.h>
+#include "test_extras.h"
+#include "../src/P546/P546_api.h"
+
+
+#define SCHEME_NAME    "SIKEp546"
+
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp546
+#define crypto_kem_enc                crypto_kem_enc_SIKEp546
+#define crypto_kem_dec                crypto_kem_dec_SIKEp546
+
+#include "test_sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_SIKEp610.c b/SIKE_sw/tests/test_SIKEp610.c
new file mode 100644
index 0000000..b52b0d1
--- /dev/null
+++ b/SIKE_sw/tests/test_SIKEp610.c
@@ -0,0 +1,19 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp610
+*********************************************************************************************/ 
+
+#include <stdio.h>
+#include <string.h>
+#include "test_extras.h"
+#include "../src/P610/P610_api.h"
+
+
+#define SCHEME_NAME    "SIKEp610"
+
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp610
+#define crypto_kem_enc                crypto_kem_enc_SIKEp610
+#define crypto_kem_dec                crypto_kem_dec_SIKEp610
+
+#include "test_sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_SIKEp697.c b/SIKE_sw/tests/test_SIKEp697.c
new file mode 100644
index 0000000..fd6a4ee
--- /dev/null
+++ b/SIKE_sw/tests/test_SIKEp697.c
@@ -0,0 +1,17 @@
+/********************************************************************************************
+* NEW benchmarking/testing isogeny-based key encapsulation mechanism SIKEp697
+*********************************************************************************************/ 
+
+#include <stdio.h>
+#include <string.h>
+#include "test_extras.h"
+#include "../src/P697/P697_api.h"
+
+
+#define SCHEME_NAME    "SIKEp697"
+
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp697
+#define crypto_kem_enc                crypto_kem_enc_SIKEp697
+#define crypto_kem_dec                crypto_kem_dec_SIKEp697
+
+#include "test_sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_SIKEp751.c b/SIKE_sw/tests/test_SIKEp751.c
new file mode 100644
index 0000000..1a1bfaf
--- /dev/null
+++ b/SIKE_sw/tests/test_SIKEp751.c
@@ -0,0 +1,19 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism SIKEp751
+*********************************************************************************************/ 
+
+#include <stdio.h>
+#include <string.h>
+#include "test_extras.h"
+#include "../src/P751/P751_api.h"
+
+
+#define SCHEME_NAME    "SIKEp751"
+
+#define crypto_kem_keypair            crypto_kem_keypair_SIKEp751
+#define crypto_kem_enc                crypto_kem_enc_SIKEp751
+#define crypto_kem_dec                crypto_kem_dec_SIKEp751
+
+#include "test_sike.c"
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_extras.c b/SIKE_sw/tests/test_extras.c
new file mode 100644
index 0000000..643edeb
--- /dev/null
+++ b/SIKE_sw/tests/test_extras.c
@@ -0,0 +1,283 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license
+*
+* Abstract: utility functions for testing and benchmarking
+*********************************************************************************************/
+
+#include "test_extras.h"
+#if (OS_TARGET == OS_WIN)
+    #include <intrin.h>
+    #include <windows.h>
+#elif (OS_TARGET == OS_LINUX)
+    #if (TARGET == TARGET_ARM64)
+        #include <time.h>
+    #endif
+    #include <unistd.h>
+#endif
+#include <stdlib.h>     
+    
+static uint64_t p377[6]  = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x0B46D546BC2A5699, 0xA879CC6988CE7CF5, 0x015B702E0C542196 };
+static uint64_t p434[7]  = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF, 
+                             0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344 };
+static uint64_t p503[8]  = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF, 
+                             0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E };
+static uint64_t p546[9]  = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xC1CCF59098E1FFFF, 
+                             0x91CA3591A0810F4F, 0xC3A747738CBAAD7D, 0x3E568459654D5F6B, 0x000000030F5EBA42 };
+static uint64_t p610[10] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF, 
+                             0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768 };
+static uint64_t p697[11] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x604054AFFFFFFFFF,
+                             0xDF4970CF7313736F, 0x719AEC973BF54225, 0x40E474DA88B90FFE, 0x9A0E279D6CEB3C8E, 0x01B39F97671708CF };
+static uint64_t p751[12] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
+                             0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C };
+
+#define NBITS_FIELD377    377
+#define NBITS_FIELD434    434
+#define NBITS_FIELD503    503
+#define NBITS_FIELD546    546
+#define NBITS_FIELD610    610
+#define NBITS_FIELD697    697
+#define NBITS_FIELD751    751
+
+
+int64_t cpucycles(void)
+{ // Access system counter for benchmarking
+#if (OS_TARGET == OS_WIN) && (TARGET == TARGET_AMD64 || TARGET == TARGET_x86)
+    return __rdtsc();
+#elif (OS_TARGET == OS_LINUX) && (TARGET == TARGET_AMD64 || TARGET == TARGET_x86)
+    unsigned int hi, lo;
+
+    __asm volatile ("rdtsc\n\t" : "=a" (lo), "=d"(hi));
+    return ((int64_t)lo) | (((int64_t)hi) << 32);
+#elif (OS_TARGET == OS_LINUX) && (TARGET == TARGET_ARM64)
+    struct timespec time;
+
+    clock_gettime(CLOCK_REALTIME, &time);
+    return (int64_t)(time.tv_sec*1e9 + time.tv_nsec);
+#else
+    return 0;            
+#endif
+}
+
+
+int compare_words(digit_t* a, digit_t* b, unsigned int nwords)
+{ // Comparing "nword" elements, a=b? : (1) a>b, (0) a=b, (-1) a<b
+  // SECURITY NOTE: this function does not have constant-time execution. TO BE USED FOR TESTING ONLY.
+    int i;
+
+    for (i = nwords-1; i >= 0; i--)
+    {
+        if (a[i] > b[i]) return 1;
+        else if (a[i] < b[i]) return -1;
+    }
+
+    return 0; 
+}
+
+
+static void sub_test(digit_t* a, digit_t* b, digit_t* c, unsigned int nwords)
+{ // Subtraction without borrow, c = a-b where a>b
+  // SECURITY NOTE: this function does not have constant-time execution. It is for TESTING ONLY.     
+    unsigned int i;
+    digit_t res, carry, borrow = 0;
+  
+    for (i = 0; i < nwords; i++)
+    {
+        res = a[i] - b[i];
+        carry = (a[i] < b[i]);
+        c[i] = res - borrow;
+        borrow = carry || (res < borrow);
+    } 
+}
+
+
+void fprandom377_test(digit_t* a)
+{ // Generating a pseudo-random field element in [0, p377-1] 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+    unsigned int i, diff = 384-NBITS_FIELD377, nwords = NBITS_TO_NWORDS(NBITS_FIELD377);
+    unsigned char* string = NULL;
+
+    string = (unsigned char*)a;
+    for (i = 0; i < sizeof(digit_t)*nwords; i++) {
+        *(string + i) = (unsigned char)rand();              // Obtain 384-bit number
+    }
+    a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
+
+    while (compare_words((digit_t*)p377, a, nwords) < 1) {  // Force it to [0, modulus-1]
+        sub_test(a, (digit_t*)p377, a, nwords);
+    }
+}
+
+
+void fprandom434_test(digit_t* a)
+{ // Generating a pseudo-random field element in [0, p434-1] 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+    unsigned int i, diff = 448-NBITS_FIELD434, nwords = NBITS_TO_NWORDS(NBITS_FIELD434);
+    unsigned char* string = NULL;
+
+    string = (unsigned char*)a;
+    for (i = 0; i < sizeof(digit_t)*nwords; i++) {
+        *(string + i) = (unsigned char)rand();              // Obtain 448-bit number
+    }
+    a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
+
+    while (compare_words((digit_t*)p434, a, nwords) < 1) {  // Force it to [0, modulus-1]
+        sub_test(a, (digit_t*)p434, a, nwords);
+    }
+}
+
+
+void fprandom503_test(digit_t* a)
+{ // Generating a pseudo-random field element in [0, p503-1] 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+    unsigned int i, diff = 512-NBITS_FIELD503, nwords = NBITS_TO_NWORDS(NBITS_FIELD503);
+    unsigned char* string = NULL;
+
+    string = (unsigned char*)a;
+    for (i = 0; i < sizeof(digit_t)*nwords; i++) {
+        *(string + i) = (unsigned char)rand();              // Obtain 512-bit number
+    }
+    a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
+
+    while (compare_words((digit_t*)p503, a, nwords) < 1) {  // Force it to [0, modulus-1]
+        sub_test(a, (digit_t*)p503, a, nwords);
+    }
+}
+
+
+void fprandom546_test(digit_t* a)
+{ // Generating a pseudo-random field element in [0, p546-1] 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+    unsigned int i, diff = 576-NBITS_FIELD546, nwords = NBITS_TO_NWORDS(NBITS_FIELD546);
+    unsigned char* string = NULL;
+
+    string = (unsigned char*)a;
+    for (i = 0; i < sizeof(digit_t)*nwords; i++) {
+        *(string + i) = (unsigned char)rand();              // Obtain 576-bit number
+    }
+    a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
+
+    while (compare_words((digit_t*)p546, a, nwords) < 1) {  // Force it to [0, modulus-1]
+        sub_test(a, (digit_t*)p546, a, nwords);
+    }
+}
+
+
+void fprandom610_test(digit_t* a)
+{ // Generating a pseudo-random field element in [0, p610-1] 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+    unsigned int i, diff = 640-NBITS_FIELD610, nwords = NBITS_TO_NWORDS(NBITS_FIELD610);
+    unsigned char* string = NULL;
+
+    string = (unsigned char*)a;
+    for (i = 0; i < sizeof(digit_t)*nwords; i++) {
+        *(string + i) = (unsigned char)rand();              // Obtain 640-bit number
+    }
+    a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
+
+    while (compare_words((digit_t*)p610, a, nwords) < 1) {  // Force it to [0, modulus-1]
+        sub_test(a, (digit_t*)p610, a, nwords);
+    }
+}
+
+
+void fprandom697_test(digit_t* a)
+{ // Generating a pseudo-random field element in [0, p697-1] 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+    unsigned int i, diff = 704-NBITS_FIELD697, nwords = NBITS_TO_NWORDS(NBITS_FIELD697);
+    unsigned char* string = NULL;
+
+    string = (unsigned char*)a;
+    for (i = 0; i < sizeof(digit_t)*nwords; i++) {
+        *(string + i) = (unsigned char)rand();              // Obtain 640-bit number
+    }
+    a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
+
+    while (compare_words((digit_t*)p697, a, nwords) < 1) {  // Force it to [0, modulus-1]
+        sub_test(a, (digit_t*)p697, a, nwords);
+    }
+}
+
+
+void fprandom751_test(digit_t* a)
+{ // Generating a pseudo-random field element in [0, p751-1] 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+    unsigned int i, diff = 768-NBITS_FIELD751, nwords = NBITS_TO_NWORDS(NBITS_FIELD751);
+    unsigned char* string = NULL;
+
+    string = (unsigned char*)a;
+    for (i = 0; i < sizeof(digit_t)*nwords; i++) {
+        *(string + i) = (unsigned char)rand();              // Obtain 768-bit number
+    }
+    a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
+
+    while (compare_words((digit_t*)p751, a, nwords) < 1) {  // Force it to [0, modulus-1]
+        sub_test(a, (digit_t*)p751, a, nwords);
+    }
+}
+
+
+void fp2random377_test(digit_t* a)
+{ // Generating a pseudo-random element in GF(p377^2) 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+
+    fprandom377_test(a);
+    fprandom377_test(a+NBITS_TO_NWORDS(NBITS_FIELD377));
+}
+
+
+void fp2random434_test(digit_t* a)
+{ // Generating a pseudo-random element in GF(p434^2) 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+
+    fprandom434_test(a);
+    fprandom434_test(a+NBITS_TO_NWORDS(NBITS_FIELD434));
+}
+
+
+void fp2random503_test(digit_t* a)
+{ // Generating a pseudo-random element in GF(p503^2) 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+
+    fprandom503_test(a);
+    fprandom503_test(a+NBITS_TO_NWORDS(NBITS_FIELD503));
+}
+
+
+void fp2random546_test(digit_t* a)
+{ // Generating a pseudo-random element in GF(p546^2) 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+
+    fprandom546_test(a);
+    fprandom546_test(a+NBITS_TO_NWORDS(NBITS_FIELD546));
+}
+
+
+void fp2random610_test(digit_t* a)
+{ // Generating a pseudo-random element in GF(p610^2) 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+
+    fprandom610_test(a);
+    fprandom610_test(a+NBITS_TO_NWORDS(NBITS_FIELD610));
+}
+
+
+void fp2random697_test(digit_t* a)
+{ // Generating a pseudo-random element in GF(p697^2) 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+
+    fprandom697_test(a);
+    fprandom697_test(a+NBITS_TO_NWORDS(NBITS_FIELD697));
+}
+
+
+void fp2random751_test(digit_t* a)
+{ // Generating a pseudo-random element in GF(p751^2) 
+  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
+
+    fprandom751_test(a);
+    fprandom751_test(a+NBITS_TO_NWORDS(NBITS_FIELD751));
+}
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_extras.h b/SIKE_sw/tests/test_extras.h
new file mode 100644
index 0000000..0a784e1
--- /dev/null
+++ b/SIKE_sw/tests/test_extras.h
@@ -0,0 +1,76 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license 
+*
+* Abstract: utility header file for tests
+*********************************************************************************************/  
+
+#ifndef TEST_EXTRAS_H
+#define TEST_EXTRAS_H
+    
+#include "../src/config.h"
+
+#define PASSED    0
+#define FAILURE   1
+
+
+#if (TARGET == TARGET_ARM64)
+    #define print_unit printf("nsec");
+#else
+    #define print_unit printf("cycles");
+#endif
+
+    
+// Access system counter for benchmarking
+int64_t cpucycles(void);
+
+// Comparing "nword" elements, a=b? : (1) a!=b, (0) a=b
+int compare_words(digit_t* a, digit_t* b, unsigned int nwords);
+
+// Generating a pseudo-random field element in [0, p377-1] 
+void fprandom377_test(digit_t* a);
+
+// Generating a pseudo-random element in GF(p377^2)
+void fp2random377_test(digit_t* a);
+
+// Generating a pseudo-random field element in [0, p434-1] 
+void fprandom434_test(digit_t* a);
+
+// Generating a pseudo-random element in GF(p434^2)
+void fp2random434_test(digit_t* a);
+
+// Generating a pseudo-random field element in [0, p503-1] 
+void fprandom503_test(digit_t* a);
+
+// Generating a pseudo-random element in GF(p503^2)
+void fp2random503_test(digit_t* a);
+
+// Generating a pseudo-random field element in [0, p546-1] 
+void fprandom546_test(digit_t* a);
+
+// Generating a pseudo-random element in GF(p546^2)
+void fp2random546_test(digit_t* a);
+
+// Generating a pseudo-random field element in [0, p610-1] 
+void fprandom610_test(digit_t* a);
+
+// Generating a pseudo-random element in GF(p610^2)
+void fp2random610_test(digit_t* a);
+
+// Generating a pseudo-random field element in [0, p697-1] 
+void fprandom697_test(digit_t* a);
+
+// Generating a pseudo-random element in GF(p697^2)
+void fp2random697_test(digit_t* a);
+
+// Generating a pseudo-random field element in [0, p751-1] 
+void fprandom751_test(digit_t* a);
+
+// Generating a pseudo-random element in GF(p751^2)
+void fp2random751_test(digit_t* a);
+
+
+#endif
\ No newline at end of file
diff --git a/SIKE_sw/tests/test_sike.c b/SIKE_sw/tests/test_sike.c
new file mode 100644
index 0000000..6bb42fe
--- /dev/null
+++ b/SIKE_sw/tests/test_sike.c
@@ -0,0 +1,132 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library 
+* Copyright (c) Microsoft Corporation
+*
+* Website: https://github.com/microsoft/PQCrypto-SIDH
+* Released under MIT license 
+*
+* Abstract: benchmarking/testing isogeny-based key encapsulation mechanism
+*********************************************************************************************/ 
+
+#include "../src/random/random.h"
+
+
+// Benchmark and test parameters  
+#if defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) 
+    #define BENCH_LOOPS        5      // Number of iterations per bench 
+    #define TEST_LOOPS         5      // Number of iterations per test
+#else
+    #define BENCH_LOOPS       100
+    #define TEST_LOOPS        10      
+#endif
+
+
+int cryptotest_kem()
+{ // Testing KEM
+    unsigned int i;
+    unsigned char sk[CRYPTO_SECRETKEYBYTES] = {0};
+    unsigned char pk[CRYPTO_PUBLICKEYBYTES] = {0};
+    unsigned char ct[CRYPTO_CIPHERTEXTBYTES] = {0};
+    unsigned char ss[CRYPTO_BYTES] = {0};
+    unsigned char ss_[CRYPTO_BYTES] = {0};
+    unsigned char bytes[4];
+    uint32_t* pos = (uint32_t*)bytes;
+    bool passed = true;
+
+    printf("\n\nTESTING ISOGENY-BASED KEY ENCAPSULATION MECHANISM %s\n", SCHEME_NAME);
+    printf("--------------------------------------------------------------------------------------------------------\n\n");
+
+    for (i = 0; i < TEST_LOOPS; i++) 
+    {
+        crypto_kem_keypair(pk, sk);
+        crypto_kem_enc(ct, ss, pk);
+        crypto_kem_dec(ss_, ct, sk);
+        
+        if (memcmp(ss, ss_, CRYPTO_BYTES) != 0) {
+            passed = false;
+            break;
+        }
+
+        // Testing decapsulation after changing one bit of ct
+        randombytes(bytes, 4);
+        *pos %= CRYPTO_CIPHERTEXTBYTES;
+        ct[*pos] ^= 1;
+        crypto_kem_dec(ss_, ct, sk);
+        
+        if (memcmp(ss, ss_, CRYPTO_BYTES) == 0) {
+            passed = false;
+            break;
+        }
+    }
+
+    if (passed == true) printf("  KEM tests .................................................... PASSED");
+    else { printf("  KEM tests ... FAILURE"); printf("\n"); return FAILURE; }
+    printf("\n"); 
+
+    return PASSED;
+}
+
+
+int cryptorun_kem()
+{ // Benchmarking key exchange
+    unsigned int n;
+    unsigned char sk[CRYPTO_SECRETKEYBYTES] = {0};
+    unsigned char pk[CRYPTO_PUBLICKEYBYTES] = {0};
+    unsigned char ct[CRYPTO_CIPHERTEXTBYTES] = {0};
+    unsigned char ss[CRYPTO_BYTES] = {0};
+    unsigned char ss_[CRYPTO_BYTES] = {0};
+    unsigned long long cycles_keygen = 0, cycles_encaps = 0, cycles_decaps = 0, cycles1, cycles2;
+
+    printf("\n\nBENCHMARKING ISOGENY-BASED KEY ENCAPSULATION MECHANISM %s\n", SCHEME_NAME);
+    printf("--------------------------------------------------------------------------------------------------------\n\n");
+
+    for (n = 0; n < BENCH_LOOPS; n++)
+    {
+        // Benchmarking key generation
+        cycles1 = cpucycles();
+        crypto_kem_keypair(pk, sk);
+        cycles2 = cpucycles();
+        cycles_keygen = cycles_keygen+(cycles2-cycles1);
+        
+        // Benchmarking encapsulation    
+        cycles1 = cpucycles();
+        crypto_kem_enc(ct, ss, pk);
+        cycles2 = cpucycles();
+        cycles_encaps = cycles_encaps+(cycles2-cycles1);
+
+        // Benchmarking decapsulation
+        cycles1 = cpucycles();
+        crypto_kem_dec(ss_, ct, sk);   
+        cycles2 = cpucycles();
+        cycles_decaps = cycles_decaps+(cycles2-cycles1);
+    }
+
+    printf("  Key generation runs in ....................................... %10lld ", cycles_keygen/BENCH_LOOPS); print_unit;
+    printf("\n");
+    printf("  Encapsulation runs in ........................................ %10lld ", cycles_encaps/BENCH_LOOPS); print_unit;
+    printf("\n");        
+    printf("  Decapsulation runs in ........................................ %10lld ", cycles_decaps/BENCH_LOOPS); print_unit;
+    printf("\n");
+
+    return PASSED;
+}
+
+
+int main()
+{
+    int Status = PASSED;
+    
+    Status = cryptotest_kem();             // Test key encapsulation mechanism
+    if (Status != PASSED) {
+        printf("\n\n   Error detected: KEM_ERROR_SHARED_KEY \n\n");
+        return FAILURE;
+    }
+
+    Status = cryptorun_kem();              // Benchmark key encapsulation mechanism
+    if (Status != PASSED) {
+        printf("\n\n   Error detected: KEM_ERROR_SHARED_KEY \n\n");
+        return FAILURE;
+    }
+
+    return Status;
+}
\ No newline at end of file
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_add.v
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_sub.v
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/.gitignore
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Makefile
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/Montgomery_multiplier_tb.v
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_add.sage
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Montgomery_multiplier_tb/gen_test_sub.sage
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/README b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/README
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/README
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/README
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/.gitignore
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/Makefile
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.tcl
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/board.xdc
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/program.tcl
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/Vivado/proj.src
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/multiplier.v
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_add.v
diff --git a/src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v b/SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v
similarity index 100%
rename from src/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v
rename to SIKE_vOW_hw-sw/hardware/Montgomery_multiplier_two_cycle_pipeline/step_sub.v
diff --git a/src/hardware/README b/SIKE_vOW_hw-sw/hardware/README
similarity index 100%
rename from src/hardware/README
rename to SIKE_vOW_hw-sw/hardware/README
diff --git a/src/hardware/controller_eval_4_isog/README b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/README
similarity index 100%
rename from src/hardware/controller_eval_4_isog/README
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/README
diff --git a/src/hardware/controller_eval_4_isog/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/.gitignore
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/.gitignore
diff --git a/src/hardware/controller_eval_4_isog/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/Makefile
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/Makefile
diff --git a/src/hardware/controller_eval_4_isog/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/batch-synth.sh
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/batch-synth.sh
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/batch-synth.sh
diff --git a/src/hardware/controller_eval_4_isog/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.tcl
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.tcl
diff --git a/src/hardware/controller_eval_4_isog/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.xdc
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/board.xdc
diff --git a/src/hardware/controller_eval_4_isog/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/params.mk
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/params.mk
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/params.mk
diff --git a/src/hardware/controller_eval_4_isog/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/program.tcl
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/program.tcl
diff --git a/src/hardware/controller_eval_4_isog/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/proj.src
similarity index 100%
rename from src/hardware/controller_eval_4_isog/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/Vivado/proj.src
diff --git a/src/hardware/controller_eval_4_isog/controller.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller.v
similarity index 100%
rename from src/hardware/controller_eval_4_isog/controller.v
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller.v
diff --git a/src/hardware/controller_eval_4_isog/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/.gitignore
similarity index 100%
rename from src/hardware/controller_eval_4_isog/controller_tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/.gitignore
diff --git a/src/hardware/controller_eval_4_isog/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/Makefile
similarity index 100%
rename from src/hardware/controller_eval_4_isog/controller_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/Makefile
diff --git a/src/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh
similarity index 100%
rename from src/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/batch-sim.sh
diff --git a/src/hardware/controller_eval_4_isog/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/controller_tb.v
similarity index 100%
rename from src/hardware/controller_eval_4_isog/controller_tb/controller_tb.v
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/controller_tb.v
diff --git a/src/hardware/controller_eval_4_isog/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/gen_test.sage
similarity index 100%
rename from src/hardware/controller_eval_4_isog/controller_tb/gen_test.sage
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/controller_tb/gen_test.sage
diff --git a/src/hardware/controller_eval_4_isog/eval_4_isog_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/eval_4_isog_FSM.v
similarity index 100%
rename from src/hardware/controller_eval_4_isog/eval_4_isog_FSM.v
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/eval_4_isog_FSM.v
diff --git a/src/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v
similarity index 100%
rename from src/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v
rename to SIKE_vOW_hw-sw/hardware/controller_eval_4_isog/single_to_double_memory_wrapper.v
diff --git a/src/hardware/controller_get_4_isog/README b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/README
similarity index 100%
rename from src/hardware/controller_get_4_isog/README
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/README
diff --git a/src/hardware/controller_get_4_isog/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/.gitignore
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/.gitignore
diff --git a/src/hardware/controller_get_4_isog/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/Makefile
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/Makefile
diff --git a/src/hardware/controller_get_4_isog/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/batch-synth.sh
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/batch-synth.sh
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/batch-synth.sh
diff --git a/src/hardware/controller_get_4_isog/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.tcl
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.tcl
diff --git a/src/hardware/controller_get_4_isog/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.xdc
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/board.xdc
diff --git a/src/hardware/controller_get_4_isog/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/params.mk
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/params.mk
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/params.mk
diff --git a/src/hardware/controller_get_4_isog/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/program.tcl
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/program.tcl
diff --git a/src/hardware/controller_get_4_isog/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/proj.src
similarity index 100%
rename from src/hardware/controller_get_4_isog/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/Vivado/proj.src
diff --git a/src/hardware/controller_get_4_isog/controller.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller.v
similarity index 100%
rename from src/hardware/controller_get_4_isog/controller.v
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller.v
diff --git a/src/hardware/controller_get_4_isog/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/.gitignore
similarity index 100%
rename from src/hardware/controller_get_4_isog/controller_tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/.gitignore
diff --git a/src/hardware/controller_get_4_isog/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/Makefile
similarity index 100%
rename from src/hardware/controller_get_4_isog/controller_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/Makefile
diff --git a/src/hardware/controller_get_4_isog/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/batch-sim.sh
similarity index 100%
rename from src/hardware/controller_get_4_isog/controller_tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/batch-sim.sh
diff --git a/src/hardware/controller_get_4_isog/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/controller_tb.v
similarity index 100%
rename from src/hardware/controller_get_4_isog/controller_tb/controller_tb.v
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/controller_tb.v
diff --git a/src/hardware/controller_get_4_isog/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/gen_test.sage
similarity index 100%
rename from src/hardware/controller_get_4_isog/controller_tb/gen_test.sage
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/controller_tb/gen_test.sage
diff --git a/src/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v
similarity index 100%
rename from src/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/double_to_single_memory_wrapper.v
diff --git a/src/hardware/controller_get_4_isog/get_4_isog_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_get_4_isog/get_4_isog_FSM.v
similarity index 100%
rename from src/hardware/controller_get_4_isog/get_4_isog_FSM.v
rename to SIKE_vOW_hw-sw/hardware/controller_get_4_isog/get_4_isog_FSM.v
diff --git a/src/hardware/controller_xADD/README b/SIKE_vOW_hw-sw/hardware/controller_xADD/README
similarity index 100%
rename from src/hardware/controller_xADD/README
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/README
diff --git a/src/hardware/controller_xADD/controller.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller.v
similarity index 100%
rename from src/hardware/controller_xADD/controller.v
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller.v
diff --git a/src/hardware/controller_xADD/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/Makefile
similarity index 100%
rename from src/hardware/controller_xADD/controller_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/Makefile
diff --git a/src/hardware/controller_xADD/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/batch-sim.sh
similarity index 100%
rename from src/hardware/controller_xADD/controller_tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/batch-sim.sh
diff --git a/src/hardware/controller_xADD/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/controller_tb.v
similarity index 100%
rename from src/hardware/controller_xADD/controller_tb/controller_tb.v
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/controller_tb.v
diff --git a/src/hardware/controller_xADD/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/gen_test.sage
similarity index 100%
rename from src/hardware/controller_xADD/controller_tb/gen_test.sage
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/controller_tb/gen_test.sage
diff --git a/src/hardware/controller_xADD/double_to_single_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/double_to_single_memory_wrapper.v
similarity index 100%
rename from src/hardware/controller_xADD/double_to_single_memory_wrapper.v
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/double_to_single_memory_wrapper.v
diff --git a/src/hardware/controller_xADD/xADD_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_xADD/xADD_FSM.v
similarity index 100%
rename from src/hardware/controller_xADD/xADD_FSM.v
rename to SIKE_vOW_hw-sw/hardware/controller_xADD/xADD_FSM.v
diff --git a/src/hardware/controller_xDBL/README b/SIKE_vOW_hw-sw/hardware/controller_xDBL/README
similarity index 100%
rename from src/hardware/controller_xDBL/README
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/README
diff --git a/src/hardware/controller_xDBL/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/.gitignore
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/.gitignore
diff --git a/src/hardware/controller_xDBL/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/Makefile
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/Makefile
diff --git a/src/hardware/controller_xDBL/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/batch-synth.sh
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/batch-synth.sh
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/batch-synth.sh
diff --git a/src/hardware/controller_xDBL/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.tcl
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.tcl
diff --git a/src/hardware/controller_xDBL/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.xdc
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/board.xdc
diff --git a/src/hardware/controller_xDBL/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/params.mk
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/params.mk
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/params.mk
diff --git a/src/hardware/controller_xDBL/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/program.tcl
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/program.tcl
diff --git a/src/hardware/controller_xDBL/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/proj.src
similarity index 100%
rename from src/hardware/controller_xDBL/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/Vivado/proj.src
diff --git a/src/hardware/controller_xDBL/controller.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller.v
similarity index 100%
rename from src/hardware/controller_xDBL/controller.v
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller.v
diff --git a/src/hardware/controller_xDBL/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/.gitignore
similarity index 100%
rename from src/hardware/controller_xDBL/controller_tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/.gitignore
diff --git a/src/hardware/controller_xDBL/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/Makefile
similarity index 100%
rename from src/hardware/controller_xDBL/controller_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/Makefile
diff --git a/src/hardware/controller_xDBL/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/batch-sim.sh
similarity index 100%
rename from src/hardware/controller_xDBL/controller_tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/batch-sim.sh
diff --git a/src/hardware/controller_xDBL/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/controller_tb.v
similarity index 100%
rename from src/hardware/controller_xDBL/controller_tb/controller_tb.v
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/controller_tb.v
diff --git a/src/hardware/controller_xDBL/controller_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/gen_test.sage
similarity index 100%
rename from src/hardware/controller_xDBL/controller_tb/gen_test.sage
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/controller_tb/gen_test.sage
diff --git a/src/hardware/controller_xDBL/double_to_single_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/double_to_single_memory_wrapper.v
similarity index 100%
rename from src/hardware/controller_xDBL/double_to_single_memory_wrapper.v
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/double_to_single_memory_wrapper.v
diff --git a/src/hardware/controller_xDBL/xDBL_FSM.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL/xDBL_FSM.v
similarity index 100%
rename from src/hardware/controller_xDBL/xDBL_FSM.v
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL/xDBL_FSM.v
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/README
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/.gitignore
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/Makefile
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/batch-synth.sh
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.tcl
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/board.xdc
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/program.tcl
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/Vivado/proj.src
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller.v
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/.gitignore
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/Makefile
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/batch-sim.sh
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/controller_tb/controller_tb.v
diff --git a/src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v b/SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v
similarity index 100%
rename from src/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v
rename to SIKE_vOW_hw-sw/hardware/controller_xDBL_get_4_isog_xADD_eval_4_isog/single_to_double_memory_wrapper.v
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/README b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/README
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/README
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/README
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/.gitignore
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/Makefile
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/batch-synth.sh
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.tcl
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/board.xdc
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/params.mk
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/program.tcl
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/Vivado/proj.src
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul.v
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/.gitignore
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/Makefile
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/batch-sim.sh
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/fp2_mont_mul_tb.v
diff --git a/src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage b/SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage
similarity index 100%
rename from src/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage
rename to SIKE_vOW_hw-sw/hardware/fp2_mont_mul_one_cycle_pipeline/fp2_mont_mul_tb/gen_input.sage
diff --git a/src/hardware/fp2_sub_add_correction/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/.gitignore
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/.gitignore
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/.gitignore
diff --git a/src/hardware/fp2_sub_add_correction/README b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/README
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/README
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/README
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/.gitignore
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/.gitignore
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/Makefile
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/Makefile
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/batch-synth.sh
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.tcl
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.tcl
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.xdc
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/board.xdc
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/params.mk b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/params.mk
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/params.mk
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/params.mk
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/program.tcl
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/program.tcl
diff --git a/src/hardware/fp2_sub_add_correction/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/proj.src
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/Vivado/proj.src
diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction.v
diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/.gitignore
diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/Makefile
diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/batch-sim.sh
diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/fp2_sub_add_correction_tb.v
diff --git a/src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/fp2_sub_add_correction_tb/gen_test.sage
diff --git a/src/hardware/fp2_sub_add_correction/gen.mk b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen.mk
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/gen.mk
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen.mk
diff --git a/src/hardware/fp2_sub_add_correction/gen_serial_comparator.py b/SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen_serial_comparator.py
similarity index 100%
rename from src/hardware/fp2_sub_add_correction/gen_serial_comparator.py
rename to SIKE_vOW_hw-sw/hardware/fp2_sub_add_correction/gen_serial_comparator.py
diff --git a/src/hardware/fp_sub_and_add/README b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/README
similarity index 100%
rename from src/hardware/fp_sub_and_add/README
rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/README
diff --git a/src/hardware/fp_sub_and_add/fp_add_and_compare.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_add_and_compare.v
similarity index 100%
rename from src/hardware/fp_sub_and_add/fp_add_and_compare.v
rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_add_and_compare.v
diff --git a/src/hardware/fp_sub_and_add/fp_adder.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_adder.v
similarity index 100%
rename from src/hardware/fp_sub_and_add/fp_adder.v
rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_adder.v
diff --git a/src/hardware/fp_sub_and_add/fp_sub_and_compare.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_sub_and_compare.v
similarity index 100%
rename from src/hardware/fp_sub_and_add/fp_sub_and_compare.v
rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/fp_sub_and_compare.v
diff --git a/src/hardware/fp_sub_and_add/gen.mk b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen.mk
similarity index 100%
rename from src/hardware/fp_sub_and_add/gen.mk
rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen.mk
diff --git a/src/hardware/fp_sub_and_add/gen_serial_comparator.py b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen_serial_comparator.py
similarity index 100%
rename from src/hardware/fp_sub_and_add/gen_serial_comparator.py
rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/gen_serial_comparator.py
diff --git a/src/hardware/fp_sub_and_add/unit_adder.v b/SIKE_vOW_hw-sw/hardware/fp_sub_and_add/unit_adder.v
similarity index 100%
rename from src/hardware/fp_sub_and_add/unit_adder.v
rename to SIKE_vOW_hw-sw/hardware/fp_sub_and_add/unit_adder.v
diff --git a/src/hardware/top_controller/README b/SIKE_vOW_hw-sw/hardware/top_controller/README
similarity index 100%
rename from src/hardware/top_controller/README
rename to SIKE_vOW_hw-sw/hardware/top_controller/README
diff --git a/src/hardware/top_controller/Vivado/.gitignore b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/.gitignore
similarity index 100%
rename from src/hardware/top_controller/Vivado/.gitignore
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/.gitignore
diff --git a/src/hardware/top_controller/Vivado/Makefile b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/Makefile
similarity index 100%
rename from src/hardware/top_controller/Vivado/Makefile
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/Makefile
diff --git a/src/hardware/top_controller/Vivado/batch-synth.sh b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/batch-synth.sh
similarity index 100%
rename from src/hardware/top_controller/Vivado/batch-synth.sh
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/batch-synth.sh
diff --git a/src/hardware/top_controller/Vivado/board.tcl b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.tcl
similarity index 100%
rename from src/hardware/top_controller/Vivado/board.tcl
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.tcl
diff --git a/src/hardware/top_controller/Vivado/board.xdc b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.xdc
similarity index 100%
rename from src/hardware/top_controller/Vivado/board.xdc
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/board.xdc
diff --git a/src/hardware/top_controller/Vivado/gen.mk b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen.mk
similarity index 100%
rename from src/hardware/top_controller/Vivado/gen.mk
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen.mk
diff --git a/src/hardware/top_controller/Vivado/gen_p_mem.sage b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen_p_mem.sage
similarity index 100%
rename from src/hardware/top_controller/Vivado/gen_p_mem.sage
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/gen_p_mem.sage
diff --git a/src/hardware/top_controller/Vivado/program.tcl b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/program.tcl
similarity index 100%
rename from src/hardware/top_controller/Vivado/program.tcl
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/program.tcl
diff --git a/src/hardware/top_controller/Vivado/proj.src b/SIKE_vOW_hw-sw/hardware/top_controller/Vivado/proj.src
similarity index 100%
rename from src/hardware/top_controller/Vivado/proj.src
rename to SIKE_vOW_hw-sw/hardware/top_controller/Vivado/proj.src
diff --git a/src/hardware/top_controller/gen_mem_wrapper.py b/SIKE_vOW_hw-sw/hardware/top_controller/gen_mem_wrapper.py
similarity index 100%
rename from src/hardware/top_controller/gen_mem_wrapper.py
rename to SIKE_vOW_hw-sw/hardware/top_controller/gen_mem_wrapper.py
diff --git a/src/hardware/top_controller/opt/top_controller.v b/SIKE_vOW_hw-sw/hardware/top_controller/opt/top_controller.v
similarity index 100%
rename from src/hardware/top_controller/opt/top_controller.v
rename to SIKE_vOW_hw-sw/hardware/top_controller/opt/top_controller.v
diff --git a/src/hardware/top_controller/tb/.gitignore b/SIKE_vOW_hw-sw/hardware/top_controller/tb/.gitignore
similarity index 100%
rename from src/hardware/top_controller/tb/.gitignore
rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/.gitignore
diff --git a/src/hardware/top_controller/tb/Makefile b/SIKE_vOW_hw-sw/hardware/top_controller/tb/Makefile
similarity index 100%
rename from src/hardware/top_controller/tb/Makefile
rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/Makefile
diff --git a/src/hardware/top_controller/tb/batch-sim.sh b/SIKE_vOW_hw-sw/hardware/top_controller/tb/batch-sim.sh
similarity index 100%
rename from src/hardware/top_controller/tb/batch-sim.sh
rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/batch-sim.sh
diff --git a/src/hardware/top_controller/tb/gen_test.sage b/SIKE_vOW_hw-sw/hardware/top_controller/tb/gen_test.sage
similarity index 100%
rename from src/hardware/top_controller/tb/gen_test.sage
rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/gen_test.sage
diff --git a/src/hardware/top_controller/tb/top_tb.v b/SIKE_vOW_hw-sw/hardware/top_controller/tb/top_tb.v
similarity index 100%
rename from src/hardware/top_controller/tb/top_tb.v
rename to SIKE_vOW_hw-sw/hardware/top_controller/tb/top_tb.v
diff --git a/src/hardware/util/clog2.v b/SIKE_vOW_hw-sw/hardware/util/clog2.v
similarity index 100%
rename from src/hardware/util/clog2.v
rename to SIKE_vOW_hw-sw/hardware/util/clog2.v
diff --git a/src/hardware/util/delay.v b/SIKE_vOW_hw-sw/hardware/util/delay.v
similarity index 100%
rename from src/hardware/util/delay.v
rename to SIKE_vOW_hw-sw/hardware/util/delay.v
diff --git a/src/hardware/util/single_port_mem.v b/SIKE_vOW_hw-sw/hardware/util/single_port_mem.v
similarity index 100%
rename from src/hardware/util/single_port_mem.v
rename to SIKE_vOW_hw-sw/hardware/util/single_port_mem.v
diff --git a/src/murax/README b/SIKE_vOW_hw-sw/murax/README
similarity index 100%
rename from src/murax/README
rename to SIKE_vOW_hw-sw/murax/README
diff --git a/src/murax/software/README.md b/SIKE_vOW_hw-sw/murax/software/README.md
similarity index 100%
rename from src/murax/software/README.md
rename to SIKE_vOW_hw-sw/murax/software/README.md
diff --git a/src/murax/software/VexRiscvSocSoftware/README.md b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/README.md
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/README.md
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/README.md
diff --git a/src/murax/software/VexRiscvSocSoftware/libs/gpio.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/gpio.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/libs/gpio.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/gpio.h
diff --git a/src/murax/software/VexRiscvSocSoftware/libs/hex.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/hex.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/libs/hex.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/hex.h
diff --git a/src/murax/software/VexRiscvSocSoftware/libs/interrupt.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/interrupt.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/libs/interrupt.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/interrupt.h
diff --git a/src/murax/software/VexRiscvSocSoftware/libs/prescaler.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/prescaler.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/libs/prescaler.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/prescaler.h
diff --git a/src/murax/software/VexRiscvSocSoftware/libs/timer.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/timer.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/libs/timer.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/timer.h
diff --git a/src/murax/software/VexRiscvSocSoftware/libs/uart.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/uart.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/libs/uart.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/uart.h
diff --git a/src/murax/software/VexRiscvSocSoftware/libs/vga.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/vga.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/libs/vga.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/libs/vga.h
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/cmd.gbd
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/makefile
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/crt.S
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/hex/src/main.c
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/linker.ld
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/makefile
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax.h
diff --git a/src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h b/SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h
similarity index 100%
rename from src/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h
rename to SIKE_vOW_hw-sw/murax/software/VexRiscvSocSoftware/projects/murax/libs/murax_hex.h