Query: Fixes plumbing VectorEmbeddingPolicy to ServiceInterop to choose correct default distance function (#4538)
* Plumb the collection VectorEmbeddingPolicy to ServiceInterop * Add query plan baseline tests for vector search * Correct typo in the query for baseline test * Fix build errors * fix runtime issue in mock setup due to the extra argument for vector embedding policy
This commit is contained in:
Родитель
5994b1608b
Коммит
8c8d3e955c
|
@ -609,6 +609,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.ExecutionContext
|
|||
inputParameters.SqlQuerySpec,
|
||||
cosmosQueryContext.ResourceTypeEnum,
|
||||
partitionKeyDefinition,
|
||||
containerQueryProperties.VectorEmbeddingPolicy,
|
||||
inputParameters.PartitionKey != null,
|
||||
containerQueryProperties.GeospatialType,
|
||||
cosmosQueryContext.UseSystemPrefix,
|
||||
|
|
|
@ -14,11 +14,13 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
|
|||
string resourceId,
|
||||
IReadOnlyList<Range<string>> effectivePartitionKeyRanges,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
Cosmos.GeospatialType geospatialType)
|
||||
{
|
||||
this.ResourceId = resourceId;
|
||||
this.EffectiveRangesForPartitionKey = effectivePartitionKeyRanges;
|
||||
this.PartitionKeyDefinition = partitionKeyDefinition;
|
||||
this.VectorEmbeddingPolicy = vectorEmbeddingPolicy;
|
||||
this.GeospatialType = geospatialType;
|
||||
}
|
||||
|
||||
|
@ -27,7 +29,11 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
|
|||
//A PartitionKey has one range when it is a full PartitionKey value.
|
||||
//It can span many it is a prefix PartitionKey for a sub-partitioned container.
|
||||
public IReadOnlyList<Range<string>> EffectiveRangesForPartitionKey { get; }
|
||||
|
||||
public PartitionKeyDefinition PartitionKeyDefinition { get; }
|
||||
|
||||
public Cosmos.VectorEmbeddingPolicy VectorEmbeddingPolicy { get; }
|
||||
|
||||
public Cosmos.GeospatialType GeospatialType { get; }
|
||||
}
|
||||
}
|
|
@ -41,6 +41,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryClient
|
|||
SqlQuerySpec sqlQuerySpec,
|
||||
Documents.ResourceType resourceType,
|
||||
Documents.PartitionKeyDefinition partitionKeyDefinition,
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
|
|
|
@ -120,6 +120,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
public TryCatch<PartitionedQueryExecutionInfo> TryGetPartitionedQueryExecutionInfo(
|
||||
string querySpecJsonString,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
|
@ -131,6 +132,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
TryCatch<PartitionedQueryExecutionInfoInternal> tryGetInternalQueryInfo = this.TryGetPartitionedQueryExecutionInfoInternal(
|
||||
querySpecJsonString: querySpecJsonString,
|
||||
partitionKeyDefinition: partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
|
||||
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
|
||||
isContinuationExpected: isContinuationExpected,
|
||||
allowNonValueAggregateQuery: allowNonValueAggregateQuery,
|
||||
|
@ -180,6 +182,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
internal TryCatch<PartitionedQueryExecutionInfoInternal> TryGetPartitionedQueryExecutionInfoInternal(
|
||||
string querySpecJsonString,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
|
@ -224,6 +227,10 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
uint errorCode;
|
||||
uint serializedQueryExecutionInfoResultLength;
|
||||
|
||||
string vectorEmbeddingPolicyString = vectorEmbeddingPolicy != null ?
|
||||
JsonConvert.SerializeObject(vectorEmbeddingPolicy) :
|
||||
null;
|
||||
|
||||
unsafe
|
||||
{
|
||||
ServiceInteropWrapper.PartitionKeyRangesApiOptions partitionKeyRangesApiOptions =
|
||||
|
@ -241,13 +248,15 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
|
||||
fixed (byte* bytePtr = buffer)
|
||||
{
|
||||
errorCode = ServiceInteropWrapper.GetPartitionKeyRangesFromQuery3(
|
||||
errorCode = ServiceInteropWrapper.GetPartitionKeyRangesFromQuery4(
|
||||
this.serviceProvider,
|
||||
querySpecJsonString,
|
||||
partitionKeyRangesApiOptions,
|
||||
allParts,
|
||||
partsLengths,
|
||||
(uint)partitionKeyDefinition.Paths.Count,
|
||||
vectorEmbeddingPolicyString,
|
||||
vectorEmbeddingPolicyString?.Length ?? 0,
|
||||
new IntPtr(bytePtr),
|
||||
(uint)buffer.Length,
|
||||
out serializedQueryExecutionInfoResultLength);
|
||||
|
|
|
@ -26,6 +26,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
SqlQuerySpec sqlQuerySpec,
|
||||
Documents.ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
QueryFeatures supportedQueryFeatures,
|
||||
bool hasLogicalPartitionKey,
|
||||
bool useSystemPrefix,
|
||||
|
@ -48,6 +49,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
sqlQuerySpec,
|
||||
resourceType,
|
||||
partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy,
|
||||
hasLogicalPartitionKey,
|
||||
useSystemPrefix,
|
||||
geospatialType,
|
||||
|
@ -76,6 +78,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
SqlQuerySpec sqlQuerySpec,
|
||||
Documents.ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool hasLogicalPartitionKey,
|
||||
bool useSystemPrefix,
|
||||
GeospatialType geospatialType,
|
||||
|
@ -97,6 +100,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
sqlQuerySpec,
|
||||
resourceType,
|
||||
partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy,
|
||||
hasLogicalPartitionKey,
|
||||
useSystemPrefix,
|
||||
geospatialType,
|
||||
|
@ -116,6 +120,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
SqlQuerySpec sqlQuerySpec,
|
||||
Documents.ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool hasLogicalPartitionKey,
|
||||
bool useSystemPrefix,
|
||||
Cosmos.GeospatialType geospatialType,
|
||||
|
@ -127,6 +132,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
sqlQuerySpec: sqlQuerySpec,
|
||||
resourceType: resourceType,
|
||||
partitionKeyDefinition: partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: false,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
|
|
@ -51,6 +51,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
SqlQuerySpec sqlQuerySpec,
|
||||
Documents.ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool hasLogicalPartitionKey,
|
||||
GeospatialType geospatialType,
|
||||
bool useSystemPrefix,
|
||||
|
@ -82,6 +83,7 @@ namespace Microsoft.Azure.Cosmos.Query.Core.QueryPlan
|
|||
sqlQuerySpec,
|
||||
resourceType,
|
||||
partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy,
|
||||
QueryPlanRetriever.SupportedQueryFeatures,
|
||||
hasLogicalPartitionKey,
|
||||
useSystemPrefix,
|
||||
|
|
|
@ -317,6 +317,7 @@ namespace Microsoft.Azure.Cosmos.Query
|
|||
allowNonValueAggregates: false,
|
||||
useSystemPrefix: false,
|
||||
partitionKeyDefinition: partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: collection.VectorEmbeddingPolicy,
|
||||
queryPartitionProvider: queryPartitionProvider,
|
||||
clientApiVersion: version,
|
||||
geospatialType: collection.GeospatialConfig.GeospatialType,
|
||||
|
|
|
@ -165,6 +165,7 @@ namespace Microsoft.Azure.Cosmos.Query
|
|||
|
||||
public async Task<PartitionedQueryExecutionInfo> GetPartitionedQueryExecutionInfoAsync(
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
|
@ -180,6 +181,7 @@ namespace Microsoft.Azure.Cosmos.Query
|
|||
TryCatch<PartitionedQueryExecutionInfo> tryGetPartitionedQueryExecutionInfo = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: JsonConvert.SerializeObject(this.QuerySpec),
|
||||
partitionKeyDefinition: partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
|
||||
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
|
||||
isContinuationExpected: isContinuationExpected,
|
||||
allowNonValueAggregateQuery: allowNonValueAggregateQuery,
|
||||
|
|
|
@ -99,6 +99,7 @@ namespace Microsoft.Azure.Cosmos.Query
|
|||
//need to make it not rely on information from collection cache.
|
||||
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo = await queryExecutionContext.GetPartitionedQueryExecutionInfoAsync(
|
||||
partitionKeyDefinition: collection.PartitionKey,
|
||||
vectorEmbeddingPolicy: collection.VectorEmbeddingPolicy,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: isContinuationExpected,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
|
|
@ -86,6 +86,7 @@ namespace Microsoft.Azure.Cosmos
|
|||
containerProperties.ResourceId,
|
||||
effectivePartitionKeyRange,
|
||||
containerProperties.PartitionKey,
|
||||
containerProperties.VectorEmbeddingPolicy,
|
||||
containerProperties.GeospatialConfig.GeospatialType);
|
||||
}
|
||||
|
||||
|
@ -93,6 +94,7 @@ namespace Microsoft.Azure.Cosmos
|
|||
SqlQuerySpec sqlQuerySpec,
|
||||
ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
|
@ -117,6 +119,7 @@ namespace Microsoft.Azure.Cosmos
|
|||
return (await this.documentClient.QueryPartitionProvider).TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: queryString,
|
||||
partitionKeyDefinition: partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
|
||||
requireFormattableOrderByQuery: requireFormattableOrderByQuery,
|
||||
isContinuationExpected: isContinuationExpected,
|
||||
allowNonValueAggregateQuery: allowNonValueAggregateQuery,
|
||||
|
|
|
@ -35,6 +35,7 @@ namespace Microsoft.Azure.Cosmos.Routing
|
|||
bool allowNonValueAggregates,
|
||||
bool useSystemPrefix,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
QueryPartitionProvider queryPartitionProvider,
|
||||
string clientApiVersion,
|
||||
Cosmos.GeospatialType geospatialType,
|
||||
|
@ -58,6 +59,7 @@ namespace Microsoft.Azure.Cosmos.Routing
|
|||
TryCatch<PartitionedQueryExecutionInfo> tryGetPartitionQueryExecutionInfo = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: querySpecJsonString,
|
||||
partitionKeyDefinition: partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: vectorEmbeddingPolicy,
|
||||
requireFormattableOrderByQuery: VersionUtility.IsLaterThan(clientApiVersion, HttpConstants.VersionDates.v2016_11_14),
|
||||
isContinuationExpected: isContinuationExpected,
|
||||
allowNonValueAggregateQuery: allowNonValueAggregates,
|
||||
|
|
|
@ -1723,9 +1723,10 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests
|
|||
|
||||
ContainerQueryProperties containerQueryProperties = new ContainerQueryProperties(
|
||||
containerResponse.Resource.ResourceId,
|
||||
null,
|
||||
effectivePartitionKeyRanges: null,
|
||||
//new List<Documents.Routing.Range<string>> { new Documents.Routing.Range<string>("AA", "AA", true, true) },
|
||||
containerResponse.Resource.PartitionKey,
|
||||
vectorEmbeddingPolicy: null,
|
||||
containerResponse.Resource.GeospatialConfig.GeospatialType);
|
||||
|
||||
// There should only be one range since the EPK option is set.
|
||||
|
|
|
@ -54,8 +54,9 @@ namespace Microsoft.Azure.Cosmos.EmulatorTests.Query
|
|||
|
||||
ContainerQueryProperties containerQueryProperties = new ContainerQueryProperties(
|
||||
containerResponse.Resource.ResourceId,
|
||||
null,
|
||||
effectivePartitionKeyRanges: null,
|
||||
containerResponse.Resource.PartitionKey,
|
||||
vectorEmbeddingPolicy: null,
|
||||
containerResponse.Resource.GeospatialConfig.GeospatialType);
|
||||
|
||||
IReadOnlyList<FeedRange> feedTokens = await container.GetFeedRangesAsync();
|
||||
|
|
|
@ -121,6 +121,7 @@ namespace Microsoft.Azure.Cosmos.Query
|
|||
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = QueryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: JsonConvert.SerializeObject(sqlQuerySpec),
|
||||
partitionKeyDefinition: PartitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: false,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
|
|
@ -0,0 +1,176 @@
|
|||
<Results>
|
||||
<Result>
|
||||
<Input>
|
||||
<Description>Euclidean Distance</Description>
|
||||
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
|
||||
FROM c
|
||||
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
|
||||
<PartitionKeys>
|
||||
<Key>/PartitionKey</Key>
|
||||
</PartitionKeys>
|
||||
<PartitionKeyType>Hash</PartitionKeyType>
|
||||
<GeospatialType>Geography</GeospatialType>
|
||||
<QueryParameters><![CDATA[[
|
||||
{
|
||||
"name": "@vectorEmbedding",
|
||||
"value": [
|
||||
0.0039695268496870995,
|
||||
0.027338456362485886,
|
||||
-0.005676387343555689,
|
||||
-0.013547309674322605,
|
||||
-0.002445543883368373,
|
||||
0.01579204574227333,
|
||||
-0.016796082258224487,
|
||||
-0.012471556663513184
|
||||
]
|
||||
}
|
||||
]]]></QueryParameters>
|
||||
</Input>
|
||||
<Output>
|
||||
<PartitionedQueryExecutionInfoInternal>
|
||||
<QueryInfo>
|
||||
<DistinctType>None</DistinctType>
|
||||
<Top>10</Top>
|
||||
<Offset />
|
||||
<Limit />
|
||||
<GroupByExpressions />
|
||||
<OrderBy>
|
||||
<SortOrder>Ascending</SortOrder>
|
||||
</OrderBy>
|
||||
<OrderByExpressions>
|
||||
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
|
||||
</OrderByExpressions>
|
||||
<Aggregates />
|
||||
<GroupByAliasToAggregateType />
|
||||
<GroupByAliases />
|
||||
<HasSelectValue>False</HasSelectValue>
|
||||
</QueryInfo>
|
||||
<QueryRanges>
|
||||
<Range>
|
||||
<Range>[[],"Infinity")</Range>
|
||||
</Range>
|
||||
</QueryRanges>
|
||||
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
|
||||
FROM c
|
||||
WHERE ({documentdb-formattableorderbyquery-filter})
|
||||
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
|
||||
</PartitionedQueryExecutionInfoInternal>
|
||||
</Output>
|
||||
</Result>
|
||||
<Result>
|
||||
<Input>
|
||||
<Description>Cosine Similarity</Description>
|
||||
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
|
||||
FROM c
|
||||
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
|
||||
<PartitionKeys>
|
||||
<Key>/PartitionKey</Key>
|
||||
</PartitionKeys>
|
||||
<PartitionKeyType>Hash</PartitionKeyType>
|
||||
<GeospatialType>Geography</GeospatialType>
|
||||
<QueryParameters><![CDATA[[
|
||||
{
|
||||
"name": "@vectorEmbedding",
|
||||
"value": [
|
||||
0.0039695268496870995,
|
||||
0.027338456362485886,
|
||||
-0.005676387343555689,
|
||||
-0.013547309674322605,
|
||||
-0.002445543883368373,
|
||||
0.01579204574227333,
|
||||
-0.016796082258224487,
|
||||
-0.012471556663513184
|
||||
]
|
||||
}
|
||||
]]]></QueryParameters>
|
||||
</Input>
|
||||
<Output>
|
||||
<PartitionedQueryExecutionInfoInternal>
|
||||
<QueryInfo>
|
||||
<DistinctType>None</DistinctType>
|
||||
<Top>10</Top>
|
||||
<Offset />
|
||||
<Limit />
|
||||
<GroupByExpressions />
|
||||
<OrderBy>
|
||||
<SortOrder>Descending</SortOrder>
|
||||
</OrderBy>
|
||||
<OrderByExpressions>
|
||||
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
|
||||
</OrderByExpressions>
|
||||
<Aggregates />
|
||||
<GroupByAliasToAggregateType />
|
||||
<GroupByAliases />
|
||||
<HasSelectValue>False</HasSelectValue>
|
||||
</QueryInfo>
|
||||
<QueryRanges>
|
||||
<Range>
|
||||
<Range>[[],"Infinity")</Range>
|
||||
</Range>
|
||||
</QueryRanges>
|
||||
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
|
||||
FROM c
|
||||
WHERE ({documentdb-formattableorderbyquery-filter})
|
||||
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
|
||||
</PartitionedQueryExecutionInfoInternal>
|
||||
</Output>
|
||||
</Result>
|
||||
<Result>
|
||||
<Input>
|
||||
<Description>Dot Product</Description>
|
||||
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
|
||||
FROM c
|
||||
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)</Query>
|
||||
<PartitionKeys>
|
||||
<Key>/PartitionKey</Key>
|
||||
</PartitionKeys>
|
||||
<PartitionKeyType>Hash</PartitionKeyType>
|
||||
<GeospatialType>Geography</GeospatialType>
|
||||
<QueryParameters><![CDATA[[
|
||||
{
|
||||
"name": "@vectorEmbedding",
|
||||
"value": [
|
||||
0.0039695268496870995,
|
||||
0.027338456362485886,
|
||||
-0.005676387343555689,
|
||||
-0.013547309674322605,
|
||||
-0.002445543883368373,
|
||||
0.01579204574227333,
|
||||
-0.016796082258224487,
|
||||
-0.012471556663513184
|
||||
]
|
||||
}
|
||||
]]]></QueryParameters>
|
||||
</Input>
|
||||
<Output>
|
||||
<PartitionedQueryExecutionInfoInternal>
|
||||
<QueryInfo>
|
||||
<DistinctType>None</DistinctType>
|
||||
<Top>10</Top>
|
||||
<Offset />
|
||||
<Limit />
|
||||
<GroupByExpressions />
|
||||
<OrderBy>
|
||||
<SortOrder>Descending</SortOrder>
|
||||
</OrderBy>
|
||||
<OrderByExpressions>
|
||||
<OrderByExpression>VectorDistance(c.embedding, @vectorEmbedding, true)</OrderByExpression>
|
||||
</OrderByExpressions>
|
||||
<Aggregates />
|
||||
<GroupByAliasToAggregateType />
|
||||
<GroupByAliases />
|
||||
<HasSelectValue>False</HasSelectValue>
|
||||
</QueryInfo>
|
||||
<QueryRanges>
|
||||
<Range>
|
||||
<Range>[[],"Infinity")</Range>
|
||||
</Range>
|
||||
</QueryRanges>
|
||||
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.embedding, @vectorEmbedding, true)} AS payload
|
||||
FROM c
|
||||
WHERE ({documentdb-formattableorderbyquery-filter})
|
||||
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)]]></RewrittenQuery>
|
||||
</PartitionedQueryExecutionInfoInternal>
|
||||
</Output>
|
||||
</Result>
|
||||
</Results>
|
|
@ -260,6 +260,9 @@
|
|||
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.Top.xml">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.VectorSearch.xml">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="BaselineTest\TestBaseline\OptimisticDirectExecutionQueryBaselineTests.PositiveOptimisticDirectExecutionOutput.xml">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
using Microsoft.Azure.Documents;
|
||||
using Microsoft.Azure.Documents.Routing;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
using Moq;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
|
||||
|
@ -821,6 +820,7 @@
|
|||
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: querySpecJsonString,
|
||||
partitionKeyDefinition: pkDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: true,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
@ -1311,6 +1311,7 @@
|
|||
true)
|
||||
},
|
||||
new PartitionKeyDefinition(),
|
||||
vectorEmbeddingPolicy: null,
|
||||
Cosmos.GeospatialType.Geometry));
|
||||
}
|
||||
|
||||
|
@ -1334,7 +1335,19 @@
|
|||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken)
|
||||
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
|
||||
SqlQuerySpec sqlQuerySpec,
|
||||
ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
bool hasLogicalPartitionKey,
|
||||
bool allowDCount,
|
||||
bool useSystemPrefix,
|
||||
Cosmos.GeospatialType geospatialType,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
CosmosSerializerCore serializerCore = new CosmosSerializerCore();
|
||||
using StreamReader streamReader = new StreamReader(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
|
||||
|
|
|
@ -376,6 +376,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
|
|||
It.IsAny<SqlQuerySpec>(),
|
||||
It.IsAny<ResourceType>(),
|
||||
It.IsAny<PartitionKeyDefinition>(),
|
||||
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
|
@ -384,7 +385,19 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
|
|||
It.IsAny<bool>(),
|
||||
It.IsAny<Cosmos.GeospatialType>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.Returns((SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken) =>
|
||||
.Returns((
|
||||
SqlQuerySpec sqlQuerySpec,
|
||||
ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
bool hasLogicalPartitionKey,
|
||||
bool allowDCount,
|
||||
bool useSystemPrefix,
|
||||
Cosmos.GeospatialType geospatialType,
|
||||
CancellationToken cancellationToken) =>
|
||||
{
|
||||
CosmosSerializerCore serializerCore = new();
|
||||
using StreamReader streamReader = new(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
|
||||
|
@ -612,6 +625,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline
|
|||
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
|
||||
JsonConvert.SerializeObject(new SqlQuerySpec(query)),
|
||||
partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: false,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: sqlQuerySpec,
|
||||
partitionKeyDefinition: PartitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: false,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
@ -50,6 +51,7 @@
|
|||
tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: sqlQuerySpec,
|
||||
partitionKeyDefinition: PartitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: false,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
|
|
@ -1342,8 +1342,8 @@
|
|||
PartitionKeyDefinition pkDefinitions = CreateHashPartitionKey("/key");
|
||||
return new List<QueryPlanBaselineTestInput>
|
||||
{
|
||||
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geography },
|
||||
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geometry }
|
||||
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, vectorEmbeddingPolicy: null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geography),
|
||||
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, vectorEmbeddingPolicy : null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geometry)
|
||||
};
|
||||
})
|
||||
.ToList();
|
||||
|
@ -1351,6 +1351,61 @@
|
|||
this.ExecuteTestSuite(testVariations);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[Owner("ndeshpan")]
|
||||
public void VectorSearch()
|
||||
{
|
||||
List<QueryPlanBaselineTestInput> testCases = new List<QueryPlanBaselineTestInput>
|
||||
{
|
||||
MakeVectorTest("Euclidean Distance", Cosmos.DistanceFunction.Euclidean),
|
||||
MakeVectorTest("Cosine Similarity", Cosmos.DistanceFunction.Cosine),
|
||||
MakeVectorTest("Dot Product", Cosmos.DistanceFunction.DotProduct),
|
||||
};
|
||||
|
||||
this.ExecuteTestSuite(testCases);
|
||||
}
|
||||
|
||||
private static QueryPlanBaselineTestInput MakeVectorTest(string description, Cosmos.DistanceFunction distanceFunction)
|
||||
{
|
||||
PartitionKeyDefinition partitionKeyDefinition = CreateHashPartitionKey("/PartitionKey");
|
||||
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy = new Cosmos.VectorEmbeddingPolicy(new Collection<Cosmos.Embedding>
|
||||
{
|
||||
new Cosmos.Embedding
|
||||
{
|
||||
Path = "/embedding",
|
||||
DataType = Cosmos.VectorDataType.Float32,
|
||||
Dimensions = 8,
|
||||
DistanceFunction = distanceFunction
|
||||
}
|
||||
});
|
||||
|
||||
string queryText = @"SELECT TOP 10 c.title AS Title, VectorDistance(c.embedding, @vectorEmbedding, true) AS SimilarityScore
|
||||
FROM c
|
||||
ORDER BY VectorDistance(c.embedding, @vectorEmbedding, true)";
|
||||
|
||||
SqlQuerySpec sqlQuerySpec = new SqlQuerySpec(
|
||||
queryText,
|
||||
new SqlParameterCollection(new SqlParameter[] { new SqlParameter("@vectorEmbedding", VectorEmbedding) }));
|
||||
|
||||
return new QueryPlanBaselineTestInput(
|
||||
description,
|
||||
partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy,
|
||||
sqlQuerySpec,
|
||||
Cosmos.GeospatialType.Geography);
|
||||
}
|
||||
|
||||
private static readonly double[] VectorEmbedding = new double[] {
|
||||
0.0039695268496870995,
|
||||
0.027338456362485886,
|
||||
-0.005676387343555689,
|
||||
-0.013547309674322605,
|
||||
-0.002445543883368373,
|
||||
0.01579204574227333,
|
||||
-0.016796082258224487,
|
||||
-0.012471556663513184 };
|
||||
|
||||
private static PartitionKeyDefinition CreateHashPartitionKey(
|
||||
params string[] partitionKeys) => new PartitionKeyDefinition()
|
||||
{
|
||||
|
@ -1440,6 +1495,7 @@
|
|||
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
|
||||
JsonConvert.SerializeObject(input.SqlQuerySpec),
|
||||
input.PartitionKeyDefinition,
|
||||
input.VectorEmbeddingPolicy,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: false,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
@ -1459,20 +1515,38 @@
|
|||
|
||||
public sealed class QueryPlanBaselineTestInput : BaselineTestInput
|
||||
{
|
||||
internal PartitionKeyDefinition PartitionKeyDefinition { get; set; }
|
||||
internal SqlQuerySpec SqlQuerySpec { get; set; }
|
||||
internal Cosmos.GeospatialType? GeospatialType { get; set; }
|
||||
internal PartitionKeyDefinition PartitionKeyDefinition { get; }
|
||||
|
||||
internal Cosmos.VectorEmbeddingPolicy VectorEmbeddingPolicy { get; }
|
||||
|
||||
internal SqlQuerySpec SqlQuerySpec { get; }
|
||||
|
||||
internal Cosmos.GeospatialType? GeospatialType { get; }
|
||||
|
||||
internal QueryPlanBaselineTestInput(
|
||||
string description,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
SqlQuerySpec sqlQuerySpec)
|
||||
: base(description)
|
||||
: this(description, partitionKeyDefinition, vectorEmbeddingPolicy: null, sqlQuerySpec, geospatialType: null)
|
||||
{
|
||||
this.PartitionKeyDefinition = partitionKeyDefinition;
|
||||
this.SqlQuerySpec = sqlQuerySpec;
|
||||
}
|
||||
|
||||
internal QueryPlanBaselineTestInput(
|
||||
string description,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
SqlQuerySpec sqlQuerySpec,
|
||||
Cosmos.GeospatialType? geospatialType)
|
||||
: base(description)
|
||||
{
|
||||
this.PartitionKeyDefinition = partitionKeyDefinition;
|
||||
this.VectorEmbeddingPolicy = vectorEmbeddingPolicy;
|
||||
this.SqlQuerySpec = sqlQuerySpec;
|
||||
this.GeospatialType = geospatialType;
|
||||
}
|
||||
|
||||
public override void SerializeAsXml(XmlWriter xmlWriter)
|
||||
{
|
||||
xmlWriter.WriteElementString("Description", this.Description);
|
||||
|
|
|
@ -32,6 +32,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
|
|||
It.IsAny<SqlQuerySpec>(),
|
||||
It.IsAny<ResourceType>(),
|
||||
It.IsAny<Documents.PartitionKeyDefinition>(),
|
||||
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
|
@ -46,6 +47,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
|
|||
new SqlQuerySpec("selectttttt * from c"),
|
||||
ResourceType.Document,
|
||||
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
|
||||
vectorEmbeddingPolicy:null,
|
||||
hasLogicalPartitionKey: false,
|
||||
geospatialType: Cosmos.GeospatialType.Geography,
|
||||
useSystemPrefix: false,
|
||||
|
@ -67,6 +69,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
|
|||
It.IsAny<SqlQuerySpec>(),
|
||||
It.IsAny<ResourceType>(),
|
||||
It.IsAny<Documents.PartitionKeyDefinition>(),
|
||||
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
|
@ -82,6 +85,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
|
|||
new SqlQuerySpec("selectttttt * from c"),
|
||||
ResourceType.Document,
|
||||
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
|
||||
vectorEmbeddingPolicy: null,
|
||||
hasLogicalPartitionKey: false,
|
||||
geospatialType: Cosmos.GeospatialType.Geography,
|
||||
useSystemPrefix: false,
|
||||
|
@ -101,6 +105,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
|
|||
It.IsAny<SqlQuerySpec>(),
|
||||
It.IsAny<ResourceType>(),
|
||||
It.IsAny<Documents.PartitionKeyDefinition>(),
|
||||
It.IsAny<Cosmos.VectorEmbeddingPolicy>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
It.IsAny<bool>(),
|
||||
|
@ -115,6 +120,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query
|
|||
new SqlQuerySpec("Super secret query that triggers bug"),
|
||||
ResourceType.Document,
|
||||
new Documents.PartitionKeyDefinition() { Paths = new Collection<string>() { "/id" } },
|
||||
vectorEmbeddingPolicy: null,
|
||||
hasLogicalPartitionKey: false,
|
||||
geospatialType: Cosmos.GeospatialType.Geography,
|
||||
useSystemPrefix: false,
|
||||
|
|
|
@ -172,6 +172,7 @@
|
|||
TryCatch<PartitionedQueryExecutionInfo> tryGetQueryPlan = queryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: querySpecJsonString,
|
||||
partitionKeyDefinition: pkDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: true,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
@ -326,6 +327,7 @@
|
|||
true)
|
||||
},
|
||||
SubpartitionTests.CreatePartitionKeyDefinition(),
|
||||
vectorEmbeddingPolicy: null,
|
||||
Cosmos.GeospatialType.Geometry));
|
||||
}
|
||||
|
||||
|
@ -349,14 +351,26 @@
|
|||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public override async Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(SqlQuerySpec sqlQuerySpec, ResourceType resourceType, PartitionKeyDefinition partitionKeyDefinition, bool requireFormattableOrderByQuery, bool isContinuationExpected, bool allowNonValueAggregateQuery, bool hasLogicalPartitionKey, bool allowDCount, bool useSystemPrefix, Cosmos.GeospatialType geospatialType, CancellationToken cancellationToken)
|
||||
public override Task<TryCatch<PartitionedQueryExecutionInfo>> TryGetPartitionedQueryExecutionInfoAsync(
|
||||
SqlQuerySpec sqlQuerySpec,
|
||||
ResourceType resourceType,
|
||||
PartitionKeyDefinition partitionKeyDefinition,
|
||||
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
|
||||
bool requireFormattableOrderByQuery,
|
||||
bool isContinuationExpected,
|
||||
bool allowNonValueAggregateQuery,
|
||||
bool hasLogicalPartitionKey,
|
||||
bool allowDCount,
|
||||
bool useSystemPrefix,
|
||||
Cosmos.GeospatialType geospatialType,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
CosmosSerializerCore serializerCore = new();
|
||||
using StreamReader streamReader = new(serializerCore.ToStreamSqlQuerySpec(sqlQuerySpec, Documents.ResourceType.Document));
|
||||
string sqlQuerySpecJsonString = streamReader.ReadToEnd();
|
||||
|
||||
(PartitionedQueryExecutionInfo partitionedQueryExecutionInfo, QueryPartitionProvider queryPartitionProvider) = OptimisticDirectExecutionQueryBaselineTests.GetPartitionedQueryExecutionInfoAndPartitionProvider(sqlQuerySpecJsonString, partitionKeyDefinition);
|
||||
return TryCatch<PartitionedQueryExecutionInfo>.FromResult(partitionedQueryExecutionInfo);
|
||||
return Task.FromResult(TryCatch<PartitionedQueryExecutionInfo>.FromResult(partitionedQueryExecutionInfo));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -713,6 +713,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Routing
|
|||
QueryPartitionProvider.TryGetPartitionedQueryExecutionInfo(
|
||||
querySpecJsonString: JsonConvert.SerializeObject(new SqlQuerySpec(queryText)),
|
||||
partitionKeyDefinition: partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: true,
|
||||
allowNonValueAggregateQuery: false,
|
||||
|
@ -863,6 +864,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Routing
|
|||
allowNonValueAggregates: true,
|
||||
useSystemPrefix: false,
|
||||
partitionKeyDefinition: new PartitionKeyDefinition { Paths = new Collection<string> { testcase.PartitionKey }, Kind = PartitionKind.Hash },
|
||||
vectorEmbeddingPolicy: null,
|
||||
queryPartitionProvider: QueryPartitionProviderTestInstance.Object,
|
||||
clientApiVersion: testcase.ClientApiVersion,
|
||||
geospatialType: Cosmos.GeospatialType.Geography,
|
||||
|
|
|
@ -770,6 +770,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Tracing
|
|||
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
|
||||
Newtonsoft.Json.JsonConvert.SerializeObject(new SqlQuerySpec(query)),
|
||||
partitionKeyDefinition,
|
||||
vectorEmbeddingPolicy: null,
|
||||
requireFormattableOrderByQuery: true,
|
||||
isContinuationExpected: false,
|
||||
allowNonValueAggregateQuery: true,
|
||||
|
|
Загрузка…
Ссылка в новой задаче