From 6529a77c3875d325d55e6812f57ab5dd6116e49a Mon Sep 17 00:00:00 2001 From: chenqi Date: Tue, 22 Mar 2022 11:56:18 +0000 Subject: [PATCH] support pip package --- AnnService/inc/Core/Common/TruthSet.h | 3 +- AnnService/inc/Core/Common/cuda/KNN.hxx | 27 ++++-- AnnService/src/Core/VectorIndex.cpp | 42 +++++++-- MANIFEST.in | 1 + Wrappers/PythonClient.vcxproj | 12 +-- Wrappers/PythonCore.vcxproj | 9 +- Wrappers/packages.config | 2 +- setup.py | 108 ++++++++++++++++++++++++ setup.txt | 3 + 9 files changed, 178 insertions(+), 29 deletions(-) create mode 100644 MANIFEST.in create mode 100644 setup.py create mode 100644 setup.txt diff --git a/AnnService/inc/Core/Common/TruthSet.h b/AnnService/inc/Core/Common/TruthSet.h index 267104a..ce03fe6 100644 --- a/AnnService/inc/Core/Common/TruthSet.h +++ b/AnnService/inc/Core/Common/TruthSet.h @@ -165,6 +165,7 @@ namespace SPTAG exit(-1); } + LOG(Helper::LogLevel::LL_Info, "Begin to generate truth for query(%d,%d) and doc(%d,%d)...\n", querySet->Count(), querySet->Dimension(), vectorSet->Count(), vectorSet->Dimension()); std::vector< std::vector > truthset(querySet->Count(), std::vector(K, 0)); std::vector< std::vector > distset(querySet->Count(), std::vector(K, 0)); #pragma omp parallel for @@ -185,7 +186,7 @@ namespace SPTAG } } - + LOG(Helper::LogLevel::LL_Info, "Start to write truth file...\n"); writeTruthFile(truthFile, querySet->Count(), K, truthset, distset, p_truthFileType); auto ptr = SPTAG::f_createIO(); diff --git a/AnnService/inc/Core/Common/cuda/KNN.hxx b/AnnService/inc/Core/Common/cuda/KNN.hxx index b9efd2f..99b94ec 100644 --- a/AnnService/inc/Core/Common/cuda/KNN.hxx +++ b/AnnService/inc/Core/Common/cuda/KNN.hxx @@ -990,12 +990,6 @@ void buildGraph(SPTAG::VectorIndex* index, int m_iGraphSize, int m_iNeighborhood int m_iFeatureDim = index->GetFeatureDim(); int m_disttype = (int)index->GetDistCalcMethod(); - // Make sure that neighborhood size is a power of 2 - if(m_iNeighborhoodSize == 0 || (m_iNeighborhoodSize & (m_iNeighborhoodSize-1)) != 0) { - LOG(SPTAG::Helper::LogLevel::LL_Error, "NeighborhoodSize (with scaling factor applied) is %d but must be a power of 2 for GPU construction.\n", m_iNeighborhoodSize); - exit(1); - } - // Have to give compiler-time known bounds on dimensions so that we can store points in registers // This significantly speeds up distance comparisons. // Create other options here for other commonly-used dimension values. @@ -1016,6 +1010,15 @@ void buildGraph(SPTAG::VectorIndex* index, int m_iGraphSize, int m_iNeighborhood else if (m_iFeatureDim <= 768) { buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); } + else if (m_iFeatureDim <= 1024) { + buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); + } + else if (m_iFeatureDim <= 2048) { + buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); + } + else if (m_iFeatureDim <= 4096) { + buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); + } else { LOG(SPTAG::Helper::LogLevel::LL_Error, "%d dimensions not currently supported for GPU construction.\n"); exit(1); @@ -1036,7 +1039,17 @@ void buildGraph(SPTAG::VectorIndex* index, int m_iGraphSize, int m_iNeighborhood } else if (m_iFeatureDim <= 768) { buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); - } else { + } + else if (m_iFeatureDim <= 1024) { + buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); + } + else if (m_iFeatureDim <= 2048) { + buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); + } + else if (m_iFeatureDim <= 4096) { + buildGraphGPU_Batch(index, (size_t)m_iGraphSize, (size_t)m_iNeighborhoodSize, trees, results, graph, leafSize, NUM_GPUS, balanceFactor); + } + else { LOG(SPTAG::Helper::LogLevel::LL_Error, "%d dimensions not currently supported for GPU construction.\n"); exit(1); } diff --git a/AnnService/src/Core/VectorIndex.cpp b/AnnService/src/Core/VectorIndex.cpp index 2ff368f..75295ee 100644 --- a/AnnService/src/Core/VectorIndex.cpp +++ b/AnnService/src/Core/VectorIndex.cpp @@ -822,11 +822,23 @@ void VectorIndex::ApproximateRNG(std::shared_ptr& fullVectors, std::u #define DefineVectorValueType(Name, Type) \ case VectorValueType::Name: \ if(fullVectors->Dimension() <= 64) { \ - getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, 64, replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ } else if (fullVectors->Dimension() <= 100) { \ - getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, 100, replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + } else if (fullVectors->Dimension() <= 128) { \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + } else if (fullVectors->Dimension() <= 200) { \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + } else if (fullVectors->Dimension() <= 768) { \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + } else if (fullVectors->Dimension() <= 1024) { \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + } else if (fullVectors->Dimension() <= 2048) { \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ + } else if (fullVectors->Dimension() <= 4096) { \ + getTailNeighborsTPT((Type*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); \ } else { \ - LOG(Helper::LogLevel::LL_Error, "Datasets of >100 dimensions not currently supported for GPU Index build\n"); \ + LOG(Helper::LogLevel::LL_Error, "Datasets of >768 dimensions not currently supported for GPU Index build\n"); \ exit(1); \ } \ break; \ @@ -841,13 +853,31 @@ void VectorIndex::ApproximateRNG(std::shared_ptr& fullVectors, std::u typedef float SUMTYPE; if (fullVectors->Dimension() <= 64) { - getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, 64, replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); } else if (fullVectors->Dimension() <= 100) { - getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, 100, replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + } + else if (fullVectors->Dimension() <= 128) { + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + } + else if (fullVectors->Dimension() <= 200) { + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + } + else if (fullVectors->Dimension() <= 768) { + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + } + else if (fullVectors->Dimension() <= 1024) { + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + } + else if (fullVectors->Dimension() <= 2048) { + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); + } + else if (fullVectors->Dimension() <= 4096) { + getTailNeighborsTPT((float*)fullVectors->GetData(), fullVectors->Count(), this, exceptIDS, fullVectors->Dimension(), replicaCount, numThreads, numTrees, leafSize, metric, numGPUs, selections); } else { - LOG(Helper::LogLevel::LL_Error, "Datasets of >100 dimensions not currently supported for GPU Index build\n"); + LOG(Helper::LogLevel::LL_Error, "Datasets of >768 dimensions not currently supported for GPU Index build\n"); exit(1); } } diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..0be181b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include sptag *.py _SPTAG* Server.exe server \ No newline at end of file diff --git a/Wrappers/PythonClient.vcxproj b/Wrappers/PythonClient.vcxproj index e3f22b3..cf37678 100644 --- a/Wrappers/PythonClient.vcxproj +++ b/Wrappers/PythonClient.vcxproj @@ -1,6 +1,6 @@  - + Debug @@ -83,14 +83,9 @@ - CoreLibrary.lib;SocketLib.lib;$(SolutionDir)packages\python.3.10.1\tools\libs\python310.lib;%(AdditionalDependencies) + CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies) - - - $(SolutionDir)packages\python.3.10.1\tools\include\;%(AdditionalIncludeDirectories) - - Level3 @@ -123,7 +118,6 @@ _WINDLL;_SCL_SECURE_NO_WARNINGS;SWIG_PYTHON_INTERPRETER_NO_DEBUG;%(PreprocessorDefinitions) Guard ProgramDatabase - MultiThreadedDebugDLL /guard:cf %(AdditionalOptions) @@ -181,11 +175,11 @@ This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - + \ No newline at end of file diff --git a/Wrappers/PythonCore.vcxproj b/Wrappers/PythonCore.vcxproj index 5b0ece2..2785fb4 100644 --- a/Wrappers/PythonCore.vcxproj +++ b/Wrappers/PythonCore.vcxproj @@ -1,5 +1,6 @@  + Debug @@ -82,13 +83,12 @@ - CoreLibrary.lib;$(SolutionDir)packages\python.3.10.1\tools\libs\python310.lib;%(AdditionalDependencies) + CoreLibrary.lib;%(AdditionalDependencies) _WINDLL;SWIG_PYTHON_INTERPRETER_NO_DEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - $(SolutionDir)packages\python.3.10.1\tools\include\;%(AdditionalIncludeDirectories) Guard ProgramDatabase _WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) @@ -113,8 +113,7 @@ - - + @@ -126,7 +125,7 @@ This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - + \ No newline at end of file diff --git a/Wrappers/packages.config b/Wrappers/packages.config index 2b70154..784b338 100644 --- a/Wrappers/packages.config +++ b/Wrappers/packages.config @@ -7,6 +7,6 @@ - + \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3c7229e --- /dev/null +++ b/setup.py @@ -0,0 +1,108 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Script for installation and distribution. +You can use environment variable `SPTAG_RELEASE` to set release version. +If release version is not set, default to a development build whose version string will be `0.0.0.dev`. +## Prepare Environment ## +Install dependencies: + $ pip install -U -r setup.txt +## Development ## +Build and install for development: + $ python setup.py develop +Uninstall: + $ pip uninstall sptag +Remove generated files: (use "--all" to remove toolchain and built wheel) + $ python setup.py clean [--all] +## Release ## +Build wheel package: + $ SPTAG_RELEASE=1.0 python setup.py bdist_wheel -p win_amd64 +Where "1.0" is version string and "win_amd64" is platform. +The platform may also be "manylinux1_x86_64". +""" + +from distutils.cmd import Command +from distutils.command.build import build +from distutils.command.clean import clean +import glob +import os +import shutil +import sys + +import setuptools +from setuptools.command.develop import develop + +release = os.environ.get('SPTAG_RELEASE') +python_version = "%d.%d" % (sys.version_info.major, sys.version_info.minor) +print ("Python version:%s" % python_version) + +def _setup(): + setuptools.setup( + name = 'sptag', + version = release or '0.0.0.dev', + description = 'SPTAG: A library for fast approximate nearest neighbor search', + long_description = open('README.md', encoding='utf-8').read(), + long_description_content_type = 'text/markdown', + url = 'https://github.com/Microsoft/SPTAG', + author = 'Microsoft SPTAG Team', + author_email = 'cheqi@microsoft.com', + license = 'MIT', + include_package_data=True, + classifiers = [ + 'License :: OSI Approved :: MIT License', + 'Operating System :: Microsoft :: Windows :: Windows 10', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3', + 'Intended Audience :: Science/Research', + ], + + packages = _find_python_packages(), + python_requires = '>=3.7', + install_requires = ['numpy'], + + cmdclass = { + 'build': Build, + 'clean': Clean, + 'develop': Develop, + } + ) + +def _find_python_packages(): + if os.path.exists('sptag'): shutil.rmtree('sptag') + + if os.path.exists('Release'): + shutil.copytree('Release', 'sptag') + elif os.path.exists(os.path.join('x64', 'Release')): + shutil.copytree(os.path.join('x64', 'Release'), 'sptag') + f = open(os.path.join('sptag', '__init__.py'), 'w') + f.close() + return ['sptag'] + +class Build(build): + def run(self): + if not release: + sys.exit('Please set environment variable "SPTAG_RELEASE="') + + open('sptag/version.py', 'w').write(f"__version__ = '{release}'") + super().run() + +class Develop(develop): + def run(self): + open('sptag/version.py', 'w').write("__version__ = '0.0.0.dev'") + super().run() + +class Clean(clean): + def finalize_options(self): + self._all = self.all + self.all = True # always use `clean --all` + super().finalize_options() + + def run(self): + super().run() + shutil.rmtree('sptag.egg-info', ignore_errors=True) + if self._all: + shutil.rmtree('dist', ignore_errors=True) + +if __name__ == '__main__': + _setup() \ No newline at end of file diff --git a/setup.txt b/setup.txt new file mode 100644 index 0000000..07ad3f8 --- /dev/null +++ b/setup.txt @@ -0,0 +1,3 @@ +pip < 21.4 +setuptools < 61 +wheel < 0.38 \ No newline at end of file