DiskANN/apps/utils/CMakeLists.txt

110 строки
4.2 KiB
CMake
Исходник Обычный вид История

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
2020-09-05 10:25:44 +03:00
add_executable(fvecs_to_bin fvecs_to_bin.cpp)
2022-06-24 03:18:12 +03:00
add_executable(fvecs_to_bvecs fvecs_to_bvecs.cpp)
add_executable(rand_data_gen rand_data_gen.cpp)
target_link_libraries(rand_data_gen ${PROJECT_NAME} Boost::program_options)
add_executable(float_bin_to_int8 float_bin_to_int8.cpp)
2020-09-05 10:25:44 +03:00
add_executable(ivecs_to_bin ivecs_to_bin.cpp)
2022-09-08 03:23:05 +03:00
add_executable(count_bfs_levels count_bfs_levels.cpp)
target_link_libraries(count_bfs_levels ${PROJECT_NAME} Boost::program_options)
2021-05-15 11:55:41 +03:00
add_executable(tsv_to_bin tsv_to_bin.cpp)
add_executable(bin_to_tsv bin_to_tsv.cpp)
2020-09-05 10:25:44 +03:00
add_executable(int8_to_float int8_to_float.cpp)
target_link_libraries(int8_to_float ${PROJECT_NAME})
2020-09-05 10:25:44 +03:00
add_executable(int8_to_float_scale int8_to_float_scale.cpp)
target_link_libraries(int8_to_float_scale ${PROJECT_NAME})
Revised inner product (#10) * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * added a normalizer to vector analysis * fixed one bug for MIPS * addressed all comments of PR * fixed minor typos. now running unit tests * ran clang-format as it doesnt run by default due to LINUX flag not set anywhere * clang introduced a bug in distance.h, fixed itt * added unit tester partially * minor bugfix * finished unit tester * changed back training size to 100K for now, we can increase to 1M later if necessary * added comments for unit_tester.sh * added auto tuning parameters for unit tester * re-ran clang formatting * small change to unit tester * fixed minor bug in unit tester * fixed some formatting on unit tester * started code for range search support in pq_flash_index * added more code for range search in disk index * added range search support * tested range search on small dataset * Update memory_mapper.h * minor edits Co-authored-by: ravishankar <rakri@microsoft.com>
2021-08-12 02:36:44 +03:00
add_executable(uint8_to_float uint8_to_float.cpp)
target_link_libraries(uint8_to_float ${PROJECT_NAME})
Revised inner product (#10) * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * added a normalizer to vector analysis * fixed one bug for MIPS * addressed all comments of PR * fixed minor typos. now running unit tests * ran clang-format as it doesnt run by default due to LINUX flag not set anywhere * clang introduced a bug in distance.h, fixed itt * added unit tester partially * minor bugfix * finished unit tester * changed back training size to 100K for now, we can increase to 1M later if necessary * added comments for unit_tester.sh * added auto tuning parameters for unit tester * re-ran clang formatting * small change to unit tester * fixed minor bug in unit tester * fixed some formatting on unit tester * started code for range search support in pq_flash_index * added more code for range search in disk index * added range search support * tested range search on small dataset * Update memory_mapper.h * minor edits Co-authored-by: ravishankar <rakri@microsoft.com>
2021-08-12 02:36:44 +03:00
2020-09-05 10:25:44 +03:00
add_executable(uint32_to_uint8 uint32_to_uint8.cpp)
target_link_libraries(uint32_to_uint8 ${PROJECT_NAME})
Revised inner product (#10) * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * added a normalizer to vector analysis * fixed one bug for MIPS * addressed all comments of PR * fixed minor typos. now running unit tests * ran clang-format as it doesnt run by default due to LINUX flag not set anywhere * clang introduced a bug in distance.h, fixed itt * added unit tester partially * minor bugfix * finished unit tester * changed back training size to 100K for now, we can increase to 1M later if necessary * added comments for unit_tester.sh * added auto tuning parameters for unit tester * re-ran clang formatting * small change to unit tester * fixed minor bug in unit tester * fixed some formatting on unit tester * started code for range search support in pq_flash_index * added more code for range search in disk index * added range search support * tested range search on small dataset * Update memory_mapper.h * minor edits Co-authored-by: ravishankar <rakri@microsoft.com>
2021-08-12 02:36:44 +03:00
add_executable(vector_analysis vector_analysis.cpp)
target_link_libraries(vector_analysis ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS})
Revised inner product (#10) * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * added a normalizer to vector analysis * fixed one bug for MIPS * addressed all comments of PR * fixed minor typos. now running unit tests * ran clang-format as it doesnt run by default due to LINUX flag not set anywhere * clang introduced a bug in distance.h, fixed itt * added unit tester partially * minor bugfix * finished unit tester * changed back training size to 100K for now, we can increase to 1M later if necessary * added comments for unit_tester.sh * added auto tuning parameters for unit tester * re-ran clang formatting * small change to unit tester * fixed minor bug in unit tester * fixed some formatting on unit tester * started code for range search support in pq_flash_index * added more code for range search in disk index * added range search support * tested range search on small dataset * Update memory_mapper.h * minor edits Co-authored-by: ravishankar <rakri@microsoft.com>
2021-08-12 02:36:44 +03:00
2020-09-05 10:25:44 +03:00
add_executable(gen_random_slice gen_random_slice.cpp)
target_link_libraries(gen_random_slice ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS})
2020-09-05 10:25:44 +03:00
add_executable(simulate_aggregate_recall simulate_aggregate_recall.cpp)
2020-09-05 10:25:44 +03:00
add_executable(calculate_recall calculate_recall.cpp)
target_link_libraries(calculate_recall ${PROJECT_NAME} ${DISKANN_ASYNC_LIB} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS})
2020-09-05 10:25:44 +03:00
# Compute ground truth thing outside of DiskANN main source that depends on MKL.
2020-09-05 10:25:44 +03:00
add_executable(compute_groundtruth compute_groundtruth.cpp)
target_include_directories(compute_groundtruth PRIVATE ${DISKANN_MKL_INCLUDE_DIRECTORIES})
target_link_libraries(compute_groundtruth ${PROJECT_NAME} ${DISKANN_MKL_LINK_LIBRARIES} ${DISKANN_ASYNC_LIB} Boost::program_options)
2020-09-05 10:25:44 +03:00
add_executable(compute_groundtruth_for_filters compute_groundtruth_for_filters.cpp)
target_include_directories(compute_groundtruth_for_filters PRIVATE ${DISKANN_MKL_INCLUDE_DIRECTORIES})
target_link_libraries(compute_groundtruth_for_filters ${PROJECT_NAME} ${DISKANN_MKL_LINK_LIBRARIES} ${DISKANN_ASYNC_LIB} Boost::program_options)
2020-09-05 10:25:44 +03:00
add_executable(generate_pq generate_pq.cpp)
target_link_libraries(generate_pq ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS})
2020-09-05 10:25:44 +03:00
add_executable(partition_data partition_data.cpp)
target_link_libraries(partition_data ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS})
add_executable(partition_with_ram_budget partition_with_ram_budget.cpp)
target_link_libraries(partition_with_ram_budget ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS})
add_executable(merge_shards merge_shards.cpp)
target_link_libraries(merge_shards ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} ${DISKANN_ASYNC_LIB})
add_executable(create_disk_layout create_disk_layout.cpp)
target_link_libraries(create_disk_layout ${PROJECT_NAME} ${DISKANN_ASYNC_LIB} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS})
add_executable(generate_synthetic_labels generate_synthetic_labels.cpp)
target_link_libraries(generate_synthetic_labels ${PROJECT_NAME} Boost::program_options)
add_executable(stats_label_data stats_label_data.cpp)
target_link_libraries(stats_label_data ${PROJECT_NAME} Boost::program_options)
if (NOT MSVC)
include(GNUInstallDirs)
install(TARGETS fvecs_to_bin
fvecs_to_bvecs
rand_data_gen
float_bin_to_int8
ivecs_to_bin
count_bfs_levels
tsv_to_bin
bin_to_tsv
int8_to_float
int8_to_float_scale
uint8_to_float
uint32_to_uint8
vector_analysis
gen_random_slice
simulate_aggregate_recall
calculate_recall
compute_groundtruth
compute_groundtruth_for_filters
generate_pq
partition_data
partition_with_ram_budget
merge_shards
create_disk_layout
generate_synthetic_labels
stats_label_data
RUNTIME
)
endif()