Query: Fixes plumbing VectorEmbeddingPolicy to ServiceInterop to choose correct default distance function (#4538)

* Plumb the collection VectorEmbeddingPolicy to ServiceInterop

* Add query plan baseline tests for vector search

* Correct typo in the query for baseline test

* Fix build errors

* fix runtime issue in mock setup due to the extra argument for vector embedding policy
This commit is contained in:
neildsh 2024-06-11 06:46:12 -07:00 коммит произвёл GitHub
Родитель 5994b1608b
Коммит 8c8d3e955c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
24 изменённых файлов: 414 добавлений и 72 удалений

Просмотреть файл

@ -609,6 +609,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.ExecutionContext
inputParameters.SqlQuerySpec,
cosmosQueryContext.ResourceTypeEnum,
partitionKeyDefinition,
containerQueryProperties.VectorEmbeddingPolicy,
inputParameters.PartitionKey != null,
containerQueryProperties.GeospatialType,
cosmosQueryContext.UseSystemPrefix,

Просмотреть файл

@ -13,12 +13,14 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
public ContainerQueryProperties(
string resourceId,
IReadOnlyList<Range<string>> effectivePartitionKeyRanges,
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
Cosmos.GeospatialType geospatialType)
{
this.ResourceId = resourceId;
this.EffectiveRangesForPartitionKey = effectivePartitionKeyRanges;
this.PartitionKeyDefinition = partitionKeyDefinition;
this.PartitionKeyDefinition = partitionKeyDefinition;
this.VectorEmbeddingPolicy = vectorEmbeddingPolicy;
this.GeospatialType = geospatialType;
}
@ -27,7 +29,11 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
//A PartitionKey has one range when it is a full PartitionKey value.
//It can span many it is a prefix PartitionKey for a sub-partitioned container.
public IReadOnlyList<Range<string>> EffectiveRangesForPartitionKey { get; }
public PartitionKeyDefinition PartitionKeyDefinition { get; }
public PartitionKeyDefinition PartitionKeyDefinition { get; }
public Cosmos.VectorEmbeddingPolicy VectorEmbeddingPolicy { get; }
public Cosmos.GeospatialType GeospatialType { get; }
}
}

Просмотреть файл

@ -40,7 +40,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
public abstract Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
Documents.PartitionKeyDefinition partitionKeyDefinition,
Documents.PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,

Просмотреть файл

@ -120,6 +120,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
public TryCatch<PartitionedQueryExecutionInfo> TryGetPartitionedQueryExecutionInfo(
string querySpecJsonString,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -130,7 +131,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
{
TryCatch<PartitionedQueryExecutionInfoInternal> tryGetInternalQueryInfo = this.TryGetPartitionedQueryExecutionInfoInternal(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: partitionKeyDefinition,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregateQuery,
@ -179,7 +181,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
internal TryCatch<PartitionedQueryExecutionInfoInternal> TryGetPartitionedQueryExecutionInfoInternal(
string querySpecJsonString,
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -222,8 +225,12 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
Span<byte> buffer = stackalloc byte[QueryPartitionProvider.InitialBufferSize];
uint errorCode;
uint serializedQueryExecutionInfoResultLength;
uint serializedQueryExecutionInfoResultLength;
string vectorEmbeddingPolicyString = vectorEmbeddingPolicy != null ?
JsonConvert.SerializeObject(vectorEmbeddingPolicy) :
null;
unsafe
{
ServiceInteropWrapper.PartitionKeyRangesApiOptions partitionKeyRangesApiOptions =
@ -241,13 +248,15 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
fixed (byte* bytePtr = buffer)
{
errorCode = ServiceInteropWrapper.GetPartitionKeyRangesFromQuery3(
errorCode = ServiceInteropWrapper.GetPartitionKeyRangesFromQuery4(
this.serviceProvider,
querySpecJsonString,
partitionKeyRangesApiOptions,
allParts,
partsLengths,
(uint)partitionKeyDefinition.Paths.Count,
(uint)partitionKeyDefinition.Paths.Count,
vectorEmbeddingPolicyString,
vectorEmbeddingPolicyString?.Length ?? 0,
new IntPtr(bytePtr),
(uint)buffer.Length,
out serializedQueryExecutionInfoResultLength);

Просмотреть файл

@ -25,7 +25,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
public async Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetQueryPlanAsync(
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
QueryFeatures supportedQueryFeatures,
bool hasLogicalPartitionKey,
bool useSystemPrefix,
@ -47,7 +48,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryInfo = await this.TryGetQueryInfoAsync(
sqlQuerySpec,
resourceType,
partitionKeyDefinition,
partitionKeyDefinition,
vectorEmbeddingPolicy,
hasLogicalPartitionKey,
useSystemPrefix,
geospatialType,
@ -75,7 +77,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
QueryFeatures supportedQueryFeatures,
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool hasLogicalPartitionKey,
bool useSystemPrefix,
GeospatialType geospatialType,
@ -96,7 +99,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryInfo = await this.TryGetQueryInfoAsync(
sqlQuerySpec,
resourceType,
partitionKeyDefinition,
partitionKeyDefinition,
vectorEmbeddingPolicy,
hasLogicalPartitionKey,
useSystemPrefix,
geospatialType,
@ -115,7 +119,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
private Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetQueryInfoAsync(
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool hasLogicalPartitionKey,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
@ -126,7 +131,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
return this.queryClient.TryGetPartitionedQueryExecutionInfoAsync(
sqlQuerySpec: sqlQuerySpec,
resourceType: resourceType,
partitionKeyDefinition: partitionKeyDefinition,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -50,7 +50,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
CosmosQueryClient queryClient,
SqlQuerySpec sqlQuerySpec,
Documents.ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool hasLogicalPartitionKey,
GeospatialType geospatialType,
bool useSystemPrefix,
@ -81,7 +82,8 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = await queryPlanHandler.TryGetQueryPlanAsync(
sqlQuerySpec,
resourceType,
partitionKeyDefinition,
partitionKeyDefinition,
vectorEmbeddingPolicy,
QueryPlanRetriever.SupportedQueryFeatures,
hasLogicalPartitionKey,
useSystemPrefix,

Просмотреть файл

@ -316,7 +316,8 @@ namespace Microsoft.Azure.Cosmos.Query
allowDCount: false,
allowNonValueAggregates: false,
useSystemPrefix: false,
partitionKeyDefinition: partitionKeyDefinition,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: collection.VectorEmbeddingPolicy,
queryPartitionProvider: queryPartitionProvider,
clientApiVersion: version,
geospatialType: collection.GeospatialConfig.GeospatialType,

Просмотреть файл

@ -164,7 +164,8 @@ namespace Microsoft.Azure.Cosmos.Query
public Guid CorrelatedActivityId { get; }
public async Task<PartitionedQueryExecutionInfo> GetPartitionedQueryExecutionInfoAsync(
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -179,7 +180,8 @@ namespace Microsoft.Azure.Cosmos.Query
QueryPartitionProvider queryPartitionProvider = await this.Client.GetQueryPartitionProviderAsync();
TryCatch<PartitionedQueryExecutionInfo> tryGetPartitionedQueryExecutionInfo = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: JsonConvert.SerializeObject(this.QuerySpec),
partitionKeyDefinition: partitionKeyDefinition,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregateQuery,

Просмотреть файл

@ -98,7 +98,8 @@ namespace Microsoft.Azure.Cosmos.Query
//if collection is deleted/created with same name.
//need to make it not rely on information from collection cache.
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo = await queryExecutionContext.GetPartitionedQueryExecutionInfoAsync(
partitionKeyDefinition: collection.PartitionKey,
partitionKeyDefinition: collection.PartitionKey,
vectorEmbeddingPolicy: collection.VectorEmbeddingPolicy,
requireFormattableOrderByQuery: true,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -85,14 +85,16 @@ namespace Microsoft.Azure.Cosmos
return new ContainerQueryProperties(
containerProperties.ResourceId,
effectivePartitionKeyRange,
containerProperties.PartitionKey,
containerProperties.PartitionKey,
containerProperties.VectorEmbeddingPolicy,
containerProperties.GeospatialConfig.GeospatialType);
}
public override async Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
@ -116,7 +118,8 @@ namespace Microsoft.Azure.Cosmos
return (await this.documentClient.QueryPartitionProvider).TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: queryString,
partitionKeyDefinition: partitionKeyDefinition,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregateQuery,

Просмотреть файл

@ -35,6 +35,7 @@ namespace Microsoft.Azure.Cosmos.Routing
bool allowNonValueAggregates,
bool useSystemPrefix,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
QueryPartitionProvider queryPartitionProvider,
string clientApiVersion,
Cosmos.GeospatialType geospatialType,
@ -57,7 +58,8 @@ namespace Microsoft.Azure.Cosmos.Routing
TryCatch<PartitionedQueryExecutionInfo> tryGetPartitionQueryExecutionInfo = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: partitionKeyDefinition,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
requireFormattableOrderByQuery: VersionUtility.IsLaterThan(clientApiVersion, HttpConstants.VersionDates.v2016_11_14),
isContinuationExpected: isContinuationExpected,
allowNonValueAggregateQuery: allowNonValueAggregates,

Просмотреть файл

@ -1723,9 +1723,10 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests
ContainerQueryProperties containerQueryProperties = new ContainerQueryProperties(
containerResponse.Resource.ResourceId,
null,
effectivePartitionKeyRanges: null,
//new List<Documents.Routing.Range<string>> { new Documents.Routing.Range<string>("AA", "AA", true, true) },
containerResponse.Resource.PartitionKey,
containerResponse.Resource.PartitionKey,
vectorEmbeddingPolicy: null,
containerResponse.Resource.GeospatialConfig.GeospatialType);
// There should only be one range since the EPK option is set.

Просмотреть файл

@ -54,8 +54,9 @@ namespace Microsoft.Azure.Cosmos.EmulatorTests.Query
ContainerQueryProperties containerQueryProperties = new ContainerQueryProperties(
containerResponse.Resource.ResourceId,
null,
containerResponse.Resource.PartitionKey,
effectivePartitionKeyRanges: null,
containerResponse.Resource.PartitionKey,
vectorEmbeddingPolicy: null,
containerResponse.Resource.GeospatialConfig.GeospatialType);
IReadOnlyList<FeedRange> feedTokens = await container.GetFeedRangesAsync();

Просмотреть файл

@ -120,7 +120,8 @@ namespace Microsoft.Azure.Cosmos.Query
{
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = QueryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: JsonConvert.SerializeObject(sqlQuerySpec),
partitionKeyDefinition: PartitionKeyDefinition,
partitionKeyDefinition: PartitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -0,0 +1,176 @@
<Results>
<Result>
<Input>
<Description>Euclidean Distance</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Ascending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
<Result>
<Input>
<Description>Cosine Similarity</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Descending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
<Result>
<Input>
<Description>Dot Product</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Descending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
</Results>

Просмотреть файл

@ -260,6 +260,9 @@
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.Top.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.VectorSearch.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="BaselineTest\TestBaseline\OptimisticDirectExecutionQueryBaselineTests.PositiveOptimisticDirectExecutionOutput.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>

Просмотреть файл

@ -30,7 +30,6 @@
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Routing;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Moq;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
@ -143,7 +142,7 @@
clientDisableOde: clientForceDisableODEFromBackend);
Assert.Fail("Expected exception. Received none.");
}
catch(InvalidOperationException ex)
catch (InvalidOperationException ex)
{
Assert.IsTrue(ex.ToString().Contains("Execution of this query cannot resume using Optimistic Direct Execution continuation token due to partition split. Please restart the query without the continuation token."));
}
@ -604,8 +603,8 @@
public void TestTextDistributionPlanParsingFromStream()
{
string textPath = "../../../Query/DistributionPlans/Text";
string[] filePaths = Directory.GetFiles(textPath);
string[] filePaths = Directory.GetFiles(textPath);
foreach (string filePath in filePaths)
{
string testResponse = File.ReadAllText(filePath);
@ -820,15 +819,16 @@
QueryPartitionProvider queryPartitionProvider = CreateCustomQueryPartitionProvider("clientDisableOptimisticDirectExecution", clientDisableOde.ToString().ToLower());
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: pkDefinition,
partitionKeyDefinition: pkDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: true,
allowNonValueAggregateQuery: true,
hasLogicalPartitionKey: false,
allowDCount: true,
useSystemPrefix: false,
geospatialType: Cosmos.GeospatialType.Geography);
geospatialType: Cosmos.GeospatialType.Geography);
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo = tryGetQueryPlan.Succeeded ? tryGetQueryPlan.Result : throw tryGetQueryPlan.Exception;
return Tuple.Create(partitionedQueryExecutionInfo, queryPartitionProvider);
}
@ -1025,8 +1025,8 @@
isNonStreamingOrderByQueryFeatureDisabled: false,
testInjections: queryRequestOptions.TestSettings);
List<PartitionKeyRange> targetPkRanges = new ();
foreach(FeedRangeEpk feedRangeEpk in containerRanges)
List<PartitionKeyRange> targetPkRanges = new();
foreach (FeedRangeEpk feedRangeEpk in containerRanges)
{
targetPkRanges.Add(new PartitionKeyRange
{
@ -1266,7 +1266,7 @@
private readonly IReadOnlyList<PartitionKeyRange> targetPartitionKeyRanges;
public TestCosmosQueryClient(QueryPartitionProvider queryPartitionProvider, IEnumerable<PartitionKeyRange> targetPartitionKeyRanges)
{
{
this.queryPartitionProvider = queryPartitionProvider;
this.targetPartitionKeyRanges = targetPartitionKeyRanges.ToList();
}
@ -1303,14 +1303,15 @@
return Task.FromResult(new ContainerQueryProperties(
"test",
new List<Range<string>>
{
{
new Range<string>(
PartitionKeyInternal.MinimumInclusiveEffectivePartitionKey,
PartitionKeyInternal.MaximumExclusiveEffectivePartitionKey,
true,
true)
},
new PartitionKeyDefinition(),
new PartitionKeyDefinition(),
vectorEmbeddingPolicy: null,
Cosmos.GeospatialType.Geometry));
}
@ -1334,12 +1335,24 @@
throw new NotImplementedException();
}
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken)
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
bool hasLogicalPartitionKey,
bool allowDCount,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
CancellationToken cancellationToken)
{
CosmosSerializerCore serializerCore = new CosmosSerializerCore();
using StreamReader streamReader = new StreamReader(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
string sqlQuerySpecJsonString = streamReader.ReadToEnd();
string sqlQuerySpecJsonString = streamReader.ReadToEnd();
(PartitionedQueryExecutionInfo partitionedQueryExecutionInfo, QueryPartitionProvider queryPartitionProvider) = OptimisticDirectExecutionQueryBaselineTests.GetPartitionedQueryExecutionInfoAndPartitionProvider(sqlQuerySpecJsonString, partitionKeyDefinition);
return Task.FromResult(TryCatch<PartitionedQueryExecutionInfo>.FromResult(partitionedQueryExecutionInfo));
}

Просмотреть файл

@ -375,7 +375,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
mockClient.Setup(x => x.TryGetPartitionedQueryExecutionInfoAsync(
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<PartitionKeyDefinition>(),
It.IsAny<PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -384,7 +385,19 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
It.IsAny<bool>(),
It.IsAny<Cosmos.GeospatialType>(),
It.IsAny<CancellationToken>()))
.Returns((SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken) =>
.Returns((
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
bool hasLogicalPartitionKey,
bool allowDCount,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
CancellationToken cancellationToken) =>
{
CosmosSerializerCore serializerCore = new();
using StreamReader streamReader = new(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
@ -611,7 +624,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
{
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
JsonConvert.SerializeObject(new SqlQuerySpec(query)),
partitionKeyDefinition,
partitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -33,7 +33,8 @@
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: sqlQuerySpec,
partitionKeyDefinition: PartitionKeyDefinition,
partitionKeyDefinition: PartitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,
@ -49,7 +50,8 @@
tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: sqlQuerySpec,
partitionKeyDefinition: PartitionKeyDefinition,
partitionKeyDefinition: PartitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,

Просмотреть файл

@ -1342,14 +1342,69 @@
PartitionKeyDefinition pkDefinitions = CreateHashPartitionKey("/key");
return new List<QueryPlanBaselineTestInput>
{
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geography },
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geometry }
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, vectorEmbeddingPolicy: null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geography),
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, vectorEmbeddingPolicy : null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geometry)
};
})
.ToList();
this.ExecuteTestSuite(testVariations);
}
}
[TestMethod]
[Owner("ndeshpan")]
public void VectorSearch()
{
List<QueryPlanBaselineTestInput> testCases = new List<QueryPlanBaselineTestInput>
{
MakeVectorTest("Euclidean Distance", Cosmos.DistanceFunction.Euclidean),
MakeVectorTest("Cosine Similarity", Cosmos.DistanceFunction.Cosine),
MakeVectorTest("Dot Product", Cosmos.DistanceFunction.DotProduct),
};
this.ExecuteTestSuite(testCases);
}
private static QueryPlanBaselineTestInput MakeVectorTest(string description, Cosmos.DistanceFunction distanceFunction)
{
PartitionKeyDefinition partitionKeyDefinition = CreateHashPartitionKey("/PartitionKey");
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy = new Cosmos.VectorEmbeddingPolicy(new Collection<Cosmos.Embedding>
{
new Cosmos.Embedding
{
Path = "/embedding",
DataType = Cosmos.VectorDataType.Float32,
Dimensions = 8,
DistanceFunction = distanceFunction
}
});
string queryText = @"SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)";
SqlQuerySpec sqlQuerySpec = new SqlQuerySpec(
queryText,
new SqlParameterCollection(new SqlParameter[] { new SqlParameter("@vectorEmbedding", VectorEmbedding) }));
return new QueryPlanBaselineTestInput(
description,
partitionKeyDefinition,
vectorEmbeddingPolicy,
sqlQuerySpec,
Cosmos.GeospatialType.Geography);
}
private static readonly double[] VectorEmbedding = new double[] {
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184 };
private static PartitionKeyDefinition CreateHashPartitionKey(
params string[] partitionKeys) => new PartitionKeyDefinition()
@ -1439,7 +1494,8 @@
{
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
JsonConvert.SerializeObject(input.SqlQuerySpec),
input.PartitionKeyDefinition,
input.PartitionKeyDefinition,
input.VectorEmbeddingPolicy,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,
@ -1459,18 +1515,36 @@
public sealed class QueryPlanBaselineTestInput : BaselineTestInput
{
internal PartitionKeyDefinition PartitionKeyDefinition { get; set; }
internal SqlQuerySpec SqlQuerySpec { get; set; }
internal Cosmos.GeospatialType? GeospatialType { get; set; }
internal PartitionKeyDefinition PartitionKeyDefinition { get; }
internal Cosmos.VectorEmbeddingPolicy VectorEmbeddingPolicy { get; }
internal SqlQuerySpec SqlQuerySpec { get; }
internal Cosmos.GeospatialType? GeospatialType { get; }
internal QueryPlanBaselineTestInput(
string description,
PartitionKeyDefinition partitionKeyDefinition,
SqlQuerySpec sqlQuerySpec)
: base(description)
: this(description, partitionKeyDefinition, vectorEmbeddingPolicy: null, sqlQuerySpec, geospatialType: null)
{
this.PartitionKeyDefinition = partitionKeyDefinition;
this.SqlQuerySpec = sqlQuerySpec;
}
internal QueryPlanBaselineTestInput(
string description,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
SqlQuerySpec sqlQuerySpec,
Cosmos.GeospatialType? geospatialType)
: base(description)
{
this.PartitionKeyDefinition = partitionKeyDefinition;
this.VectorEmbeddingPolicy = vectorEmbeddingPolicy;
this.SqlQuerySpec = sqlQuerySpec;
this.GeospatialType = geospatialType;
}
public override void SerializeAsXml(XmlWriter xmlWriter)

Просмотреть файл

@ -31,7 +31,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
queryClient.Setup(c => c.TryGetPartitionedQueryExecutionInfoAsync(
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -45,7 +46,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
queryClient.Object,
new SqlQuerySpec("selectttttt * from c"),
ResourceType.Document,
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
vectorEmbeddingPolicy:null,
hasLogicalPartitionKey: false,
geospatialType: Cosmos.GeospatialType.Geography,
useSystemPrefix: false,
@ -66,7 +68,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
queryClient.Setup(c => c.TryGetPartitionedQueryExecutionInfoAsync(
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -81,7 +84,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
queryClient.Object,
new SqlQuerySpec("selectttttt * from c"),
ResourceType.Document,
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
vectorEmbeddingPolicy: null,
hasLogicalPartitionKey: false,
geospatialType: Cosmos.GeospatialType.Geography,
useSystemPrefix: false,
@ -100,7 +104,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
queryClient.Setup(c => c.TryGetPartitionedQueryExecutionInfoAsync(
It.IsAny<SqlQuerySpec>(),
It.IsAny<ResourceType>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Documents.PartitionKeyDefinition>(),
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
It.IsAny<bool>(),
@ -115,7 +120,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
new SqlQuerySpec("Super secret query that triggers bug"),
ResourceType.Document,
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
hasLogicalPartitionKey: false,
vectorEmbeddingPolicy: null,
hasLogicalPartitionKey: false,
geospatialType: Cosmos.GeospatialType.Geography,
useSystemPrefix: false,
NoOpTrace.Singleton));

Просмотреть файл

@ -171,7 +171,8 @@
QueryPartitionProvider queryPartitionProvider = CreateCustomQueryPartitionProvider();
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: querySpecJsonString,
partitionKeyDefinition: pkDefinition,
partitionKeyDefinition: pkDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: true,
allowNonValueAggregateQuery: true,
@ -325,7 +326,8 @@
true,
true)
},
SubpartitionTests.CreatePartitionKeyDefinition(),
SubpartitionTests.CreatePartitionKeyDefinition(),
vectorEmbeddingPolicy: null,
Cosmos.GeospatialType.Geometry));
}
@ -349,14 +351,26 @@
throw new NotImplementedException();
}
public override async Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken)
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
SqlQuerySpec sqlQuerySpec,
ResourceType resourceType,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
bool requireFormattableOrderByQuery,
bool isContinuationExpected,
bool allowNonValueAggregateQuery,
bool hasLogicalPartitionKey,
bool allowDCount,
bool useSystemPrefix,
Cosmos.GeospatialType geospatialType,
CancellationToken cancellationToken)
{
CosmosSerializerCore serializerCore = new();
using StreamReader streamReader = new(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
string sqlQuerySpecJsonString = streamReader.ReadToEnd();
(PartitionedQueryExecutionInfo partitionedQueryExecutionInfo, QueryPartitionProvider queryPartitionProvider) = OptimisticDirectExecutionQueryBaselineTests.GetPartitionedQueryExecutionInfoAndPartitionProvider(sqlQuerySpecJsonString, partitionKeyDefinition);
return TryCatch<PartitionedQueryExecutionInfo>.FromResult(partitionedQueryExecutionInfo);
return Task.FromResult(TryCatch<PartitionedQueryExecutionInfo>.FromResult(partitionedQueryExecutionInfo));
}
}
}

Просмотреть файл

@ -712,7 +712,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Routing
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan =
QueryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
querySpecJsonString: JsonConvert.SerializeObject(new SqlQuerySpec(queryText)),
partitionKeyDefinition: partitionKeyDefinition,
partitionKeyDefinition: partitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: true,
allowNonValueAggregateQuery: false,
@ -862,7 +863,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Routing
allowDCount: false,
allowNonValueAggregates: true,
useSystemPrefix: false,
partitionKeyDefinition: new PartitionKeyDefinition { Paths = new Collection<string> { testcase.PartitionKey }, Kind = PartitionKind.Hash },
partitionKeyDefinition: new PartitionKeyDefinition { Paths = new Collection<string> { testcase.PartitionKey }, Kind = PartitionKind.Hash },
vectorEmbeddingPolicy: null,
queryPartitionProvider: QueryPartitionProviderTestInstance.Object,
clientApiVersion: testcase.ClientApiVersion,
geospatialType: Cosmos.GeospatialType.Geography,

Просмотреть файл

@ -769,7 +769,8 @@ namespace Microsoft.Azure.Cosmos.Tests.Tracing
{
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
Newtonsoft.Json.JsonConvert.SerializeObject(new SqlQuerySpec(query)),
partitionKeyDefinition,
partitionKeyDefinition,
vectorEmbeddingPolicy: null,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,