Query: Fixes plumbing VectorEmbeddingPolicy to ServiceInterop to choose correct default distance function (#4538)

* Plumb the collection VectorEmbeddingPolicy to ServiceInterop

* Add query plan baseline tests for vector search

* Correct typo in the query for baseline test

* Fix build errors

* fix runtime issue in mock setup due to the extra argument for vector embedding policy
This commit is contained in:
neildsh 2024-06-11 06:46:12 -07:00 коммит произвёл GitHub
Родитель 5994b1608b
Коммит 8c8d3e955c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
24 изменённых файлов: 414 добавлений и 72 удалений

Просмотреть файл

@ -609,6 +609,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.ExecutionContext
inputParameters.SqlQuerySpec,
cosmosQueryContext.ResourceTypeEnum,
partitionKeyDefinition,
containerQueryProperties.VectorEmbeddingPolicy,
inputParameters.PartitionKey != null,
containerQueryProperties.GeospatialType,
cosmosQueryContext.UseSystemPrefix,

Просмотреть файл

@ -14,11 +14,13 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
string resourceId,
IReadOnlyList<Range<string>> effectivePartitionKeyRanges,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
Cosmos.GeospatialType geospatialType)
{
this.ResourceId = resourceId;
this.EffectiveRangesForPartitionKey = effectivePartitionKeyRanges;
this.PartitionKeyDefinition = partitionKeyDefinition;
this.VectorEmbeddingPolicy = vectorEmbeddingPolicy;
this.GeospatialType = geospatialType;
}
@ -27,7 +29,11 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
//A PartitionKey has one range when it is a full PartitionKey value.
//It can span many it is a prefix PartitionKey for a sub-partitioned container.
public IReadOnlyList<Range<string>> EffectiveRangesForPartitionKey { get; }
public PartitionKeyDefinition PartitionKeyDefinition { get; }
public Cosmos.VectorEmbeddingPolicy VectorEmbeddingPolicy { get; }
public Cosmos.GeospatialType GeospatialType { get; }
}
}

Просмотреть файл

@ -41,6 +41,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
Documents.PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,

Просмотреть файл

@ -120,6 +120,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
public TryCatch<PartitionedQueryExecutionInfo> TryGetPartitionedQueryExecutionInfo(
string querySpecJsonString,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -131,6 +132,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
TryCatch<PartitionedQueryExecutionInfoInternal> tryGetInternalQueryInfo = this.TryGetPartitionedQueryExecutionInfoInternal(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregateQuery,
@ -180,6 +182,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
internal TryCatch<PartitionedQueryExecutionInfoInternal> TryGetPartitionedQueryExecutionInfoInternal(
string querySpecJsonString,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -224,6 +227,10 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
uint errorCode;
uint serializedQueryExecutionInfoResultLength;
string vectorEmbeddingPolicyString = vectorEmbeddingPolicy != null ?
JsonConvert.SerializeObject(vectorEmbeddingPolicy) :
null;
unsafe
{
ServiceInteropWrapper.PartitionKeyRangesApiOptions partitionKeyRangesApiOptions =
@ -241,13 +248,15 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
fixed (byte* bytePtr = buffer)
{
errorCode = ServiceInteropWrapper.GetPartitionKeyRangesFromQuery3(
errorCode = ServiceInteropWrapper.GetPartitionKeyRangesFromQuery4(
this.serviceProvider,
querySpecJsonString,
partitionKeyRangesApiOptions,
allParts,
partsLengths,
(uint)partitionKeyDefinition.Paths.Count,
vectorEmbeddingPolicyString,
vectorEmbeddingPolicyString?.Length ?? 0,
new IntPtr(bytePtr),
(uint)buffer.Length,
out serializedQueryExecutionInfoResultLength);

Просмотреть файл

@ -26,6 +26,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
QueryFeatures supportedQueryFeatures,
bool hasLogicalPartitionKey,
bool useSystemPrefix,
@ -48,6 +49,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
sqlQuerySpec,
resourceType,
partitionKeyDefinition,
vectorEmbeddingPolicy,
hasLogicalPartitionKey,
useSystemPrefix,
geospatialType,
@ -76,6 +78,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool hasLogicalPartitionKey,
bool useSystemPrefix,
GeospatialType geospatialType,
@ -97,6 +100,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
sqlQuerySpec,
resourceType,
partitionKeyDefinition,
vectorEmbeddingPolicy,
hasLogicalPartitionKey,
useSystemPrefix,
geospatialType,
@ -116,6 +120,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool hasLogicalPartitionKey,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
@ -127,6 +132,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
sqlQuerySpec: sqlQuerySpec,
resourceType: resourceType,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -51,6 +51,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool hasLogicalPartitionKey,
GeospatialType geospatialType,
bool useSystemPrefix,
@ -82,6 +83,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
sqlQuerySpec,
resourceType,
partitionKeyDefinition,
vectorEmbeddingPolicy,
QueryPlanRetriever.SupportedQueryFeatures,
hasLogicalPartitionKey,
useSystemPrefix,

Просмотреть файл

@ -317,6 +317,7 @@ namespace Microsoft.Azure.Cosmos.Query
allowNonValueAggregates: false,
useSystemPrefix: false,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: collection.VectorEmbeddingPolicy,
queryPartitionProvider: queryPartitionProvider,
clientApiVersion: version,
geospatialType: collection.GeospatialConfig.GeospatialType,

Просмотреть файл

@ -165,6 +165,7 @@ namespace Microsoft.Azure.Cosmos.Query
public async Task<PartitionedQueryExecutionInfo> GetPartitionedQueryExecutionInfoAsync(
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -180,6 +181,7 @@ namespace Microsoft.Azure.Cosmos.Query
TryCatch<PartitionedQueryExecutionInfo> tryGetPartitionedQueryExecutionInfo = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: JsonConvert.SerializeObject(this.QuerySpec),
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregateQuery,

Просмотреть файл

@ -99,6 +99,7 @@ namespace Microsoft.Azure.Cosmos.Query
//need to make it not rely on information from collection cache.
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo = await queryExecutionContext.GetPartitionedQueryExecutionInfoAsync(
partitionKeyDefinition: collection.PartitionKey,
vectorEmbeddingPolicy: collection.VectorEmbeddingPolicy,
requireFormattableOrderByQuery: true,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -86,6 +86,7 @@ namespace Microsoft.Azure.Cosmos
containerProperties.ResourceId,
effectivePartitionKeyRange,
containerProperties.PartitionKey,
containerProperties.VectorEmbeddingPolicy,
containerProperties.GeospatialConfig.GeospatialType);
}
@ -93,6 +94,7 @@ namespace Microsoft.Azure.Cosmos
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -117,6 +119,7 @@ namespace Microsoft.Azure.Cosmos
return (await this.documentClient.QueryPartitionProvider).TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: queryString,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregateQuery,

Просмотреть файл

@ -35,6 +35,7 @@ namespace Microsoft.Azure.Cosmos.Routing
bool allowNonValueAggregates,
bool useSystemPrefix,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
QueryPartitionProvider queryPartitionProvider,
string clientApiVersion,
Cosmos.GeospatialType geospatialType,
@ -58,6 +59,7 @@ namespace Microsoft.Azure.Cosmos.Routing
TryCatch<PartitionedQueryExecutionInfo> tryGetPartitionQueryExecutionInfo = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: VersionUtility.IsLaterThan(clientApiVersion, HttpConstants.VersionDates.v2016_11_14),
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregates,

Просмотреть файл

@ -1723,9 +1723,10 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests
ContainerQueryProperties containerQueryProperties = new ContainerQueryProperties(
containerResponse.Resource.ResourceId,
null,
effectivePartitionKeyRanges: null,
//new List<Documents.Routing.Range<string>> { new Documents.Routing.Range<string>("AA", "AA", true, true) },
containerResponse.Resource.PartitionKey,
vectorEmbeddingPolicy: null,
containerResponse.Resource.GeospatialConfig.GeospatialType);
// There should only be one range since the EPK option is set.

Просмотреть файл

@ -54,8 +54,9 @@ namespace Microsoft.Azure.Cosmos.EmulatorTests.Query
ContainerQueryProperties containerQueryProperties = new ContainerQueryProperties(
containerResponse.Resource.ResourceId,
null,
effectivePartitionKeyRanges: null,
containerResponse.Resource.PartitionKey,
vectorEmbeddingPolicy: null,
containerResponse.Resource.GeospatialConfig.GeospatialType);
IReadOnlyList<FeedRange> feedTokens = await container.GetFeedRangesAsync();

Просмотреть файл

@ -121,6 +121,7 @@ namespace Microsoft.Azure.Cosmos.Query
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = QueryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: JsonConvert.SerializeObject(sqlQuerySpec),
partitionKeyDefinition: PartitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -0,0 +1,176 @@
<Results>
<Result>
<Input>
<Description>Euclidean Distance</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Ascending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
<Result>
<Input>
<Description>Cosine Similarity</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Descending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
<Result>
<Input>
<Description>Dot Product</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Descending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
</Results>

Просмотреть файл

@ -260,6 +260,9 @@
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.Top.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.VectorSearch.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="BaselineTest\TestBaseline\OptimisticDirectExecutionQueryBaselineTests.PositiveOptimisticDirectExecutionOutput.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>

Просмотреть файл

@ -30,7 +30,6 @@
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Routing;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Moq;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
@ -821,6 +820,7 @@
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: pkDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: true,
allowNonValueAggregateQuery: true,
@ -1311,6 +1311,7 @@
true)
},
new PartitionKeyDefinition(),
vectorEmbeddingPolicy: null,
Cosmos.GeospatialType.Geometry));
}
@ -1334,7 +1335,19 @@
throw new NotImplementedException();
}
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken)
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
bool hasLogicalPartitionKey,
bool allowDCount,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
CancellationToken cancellationToken)
{
CosmosSerializerCore serializerCore = new CosmosSerializerCore();
using StreamReader streamReader = new StreamReader(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));

Просмотреть файл

@ -376,6 +376,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -384,7 +385,19 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
It.IsAny<bool>(),
It.IsAny<Cosmos.GeospatialType>(),
It.IsAny<CancellationToken>()))
.Returns((SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken) =>
.Returns((
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
bool hasLogicalPartitionKey,
bool allowDCount,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
CancellationToken cancellationToken) =>
{
CosmosSerializerCore serializerCore = new();
using StreamReader streamReader = new(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
@ -612,6 +625,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
JsonConvert.SerializeObject(new SqlQuerySpec(query)),
partitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -34,6 +34,7 @@
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: sqlQuerySpec,
partitionKeyDefinition: PartitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,
@ -50,6 +51,7 @@
tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: sqlQuerySpec,
partitionKeyDefinition: PartitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -1342,8 +1342,8 @@
PartitionKeyDefinition pkDefinitions = CreateHashPartitionKey("/key");
return new List<QueryPlanBaselineTestInput>
{
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geography },
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geometry }
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, vectorEmbeddingPolicy: null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geography),
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, vectorEmbeddingPolicy : null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geometry)
};
})
.ToList();
@ -1351,6 +1351,61 @@
this.ExecuteTestSuite(testVariations);
}
[TestMethod]
[Owner("ndeshpan")]
public void VectorSearch()
{
List<QueryPlanBaselineTestInput> testCases = new List<QueryPlanBaselineTestInput>
{
MakeVectorTest("Euclidean Distance", Cosmos.DistanceFunction.Euclidean),
MakeVectorTest("Cosine Similarity", Cosmos.DistanceFunction.Cosine),
MakeVectorTest("Dot Product", Cosmos.DistanceFunction.DotProduct),
};
this.ExecuteTestSuite(testCases);
}
private static QueryPlanBaselineTestInput MakeVectorTest(string description, Cosmos.DistanceFunction distanceFunction)
{
PartitionKeyDefinition partitionKeyDefinition = CreateHashPartitionKey("/PartitionKey");
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy = new Cosmos.VectorEmbeddingPolicy(new Collection<Cosmos.Embedding>
{
new Cosmos.Embedding
{
Path = "/embedding",
DataType = Cosmos.VectorDataType.Float32,
Dimensions = 8,
DistanceFunction = distanceFunction
}
});
string queryText = @"SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)";
SqlQuerySpec sqlQuerySpec = new SqlQuerySpec(
queryText,
new SqlParameterCollection(new SqlParameter[] { new SqlParameter("@vectorEmbedding", VectorEmbedding) }));
return new QueryPlanBaselineTestInput(
description,
partitionKeyDefinition,
vectorEmbeddingPolicy,
sqlQuerySpec,
Cosmos.GeospatialType.Geography);
}
private static readonly double[] VectorEmbedding = new double[] {
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184 };
private static PartitionKeyDefinition CreateHashPartitionKey(
params string[] partitionKeys) => new PartitionKeyDefinition()
{
@ -1440,6 +1495,7 @@
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
JsonConvert.SerializeObject(input.SqlQuerySpec),
input.PartitionKeyDefinition,
input.VectorEmbeddingPolicy,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,
@ -1459,20 +1515,38 @@
public sealed class QueryPlanBaselineTestInput : BaselineTestInput
{
internal PartitionKeyDefinition PartitionKeyDefinition { get; set; }
internal SqlQuerySpec SqlQuerySpec { get; set; }
internal Cosmos.GeospatialType? GeospatialType { get; set; }
internal PartitionKeyDefinition PartitionKeyDefinition { get; }
internal Cosmos.VectorEmbeddingPolicy VectorEmbeddingPolicy { get; }
internal SqlQuerySpec SqlQuerySpec { get; }
internal Cosmos.GeospatialType? GeospatialType { get; }
internal QueryPlanBaselineTestInput(
string description,
PartitionKeyDefinition partitionKeyDefinition,
SqlQuerySpec sqlQuerySpec)
: base(description)
: this(description, partitionKeyDefinition, vectorEmbeddingPolicy: null, sqlQuerySpec, geospatialType: null)
{
this.PartitionKeyDefinition = partitionKeyDefinition;
this.SqlQuerySpec = sqlQuerySpec;
}
internal QueryPlanBaselineTestInput(
string description,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
SqlQuerySpec sqlQuerySpec,
Cosmos.GeospatialType? geospatialType)
: base(description)
{
this.PartitionKeyDefinition = partitionKeyDefinition;
this.VectorEmbeddingPolicy = vectorEmbeddingPolicy;
this.SqlQuerySpec = sqlQuerySpec;
this.GeospatialType = geospatialType;
}
public override void SerializeAsXml(XmlWriter xmlWriter)
{
xmlWriter.WriteElementString("Description", this.Description);

Просмотреть файл

@ -32,6 +32,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -46,6 +47,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
new SqlQuerySpec("selectttttt * from c"),
ResourceType.Document,
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
vectorEmbeddingPolicy:null,
hasLogicalPartitionKey: false,
geospatialType: Cosmos.GeospatialType.Geography,
useSystemPrefix: false,
@ -67,6 +69,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -82,6 +85,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
new SqlQuerySpec("selectttttt * from c"),
ResourceType.Document,
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
vectorEmbeddingPolicy: null,
hasLogicalPartitionKey: false,
geospatialType: Cosmos.GeospatialType.Geography,
useSystemPrefix: false,
@ -101,6 +105,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -115,6 +120,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
new SqlQuerySpec("Super secret query that triggers bug"),
ResourceType.Document,
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
vectorEmbeddingPolicy: null,
hasLogicalPartitionKey: false,
geospatialType: Cosmos.GeospatialType.Geography,
useSystemPrefix: false,

Просмотреть файл

@ -172,6 +172,7 @@
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: pkDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: true,
allowNonValueAggregateQuery: true,
@ -326,6 +327,7 @@
true)
},
SubpartitionTests.CreatePartitionKeyDefinition(),
vectorEmbeddingPolicy: null,
Cosmos.GeospatialType.Geometry));
}
@ -349,14 +351,26 @@
throw new NotImplementedException();
}
public override async Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken)
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
bool hasLogicalPartitionKey,
bool allowDCount,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
CancellationToken cancellationToken)
{
CosmosSerializerCore serializerCore = new();
using StreamReader streamReader = new(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
string sqlQuerySpecJsonString = streamReader.ReadToEnd();
(PartitionedQueryExecutionInfo partitionedQueryExecutionInfo, QueryPartitionProvider queryPartitionProvider) = OptimisticDirectExecutionQueryBaselineTests.GetPartitionedQueryExecutionInfoAndPartitionProvider(sqlQuerySpecJsonString, partitionKeyDefinition);
return TryCatch<PartitionedQueryExecutionInfo>.FromResult(partitionedQueryExecutionInfo);
return Task.FromResult(TryCatch<PartitionedQueryExecutionInfo>.FromResult(partitionedQueryExecutionInfo));
}
}
}

Просмотреть файл

@ -713,6 +713,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Routing
QueryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: JsonConvert.SerializeObject(new SqlQuerySpec(queryText)),
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: true,
allowNonValueAggregateQuery: false,
@ -863,6 +864,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Routing
allowNonValueAggregates: true,
useSystemPrefix: false,
partitionKeyDefinition: new PartitionKeyDefinition { Paths = new Collection<string> { testcase.PartitionKey }, Kind = PartitionKind.Hash },
vectorEmbeddingPolicy: null,
queryPartitionProvider: QueryPartitionProviderTestInstance.Object,
clientApiVersion: testcase.ClientApiVersion,
geospatialType: Cosmos.GeospatialType.Geography,

Просмотреть файл

@ -770,6 +770,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Tracing
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
Newtonsoft.Json.JsonConvert.SerializeObject(new SqlQuerySpec(query)),
partitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,