Benchmark queries for Kusto and Spark SQL

Benchmark queries for Kusto and Spark SQL
This commit is contained in:
Hauke Mallow 2021-12-07 12:25:24 +01:00 коммит произвёл GitHub
Родитель 945cd5280e
Коммит 5034703008
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
38 изменённых файлов: 609 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,4 @@
Logs
| where Timestamp between(datetime(2014-03-08) .. 12h)
| where Level == 'Warning' and Message has 'enabled'
| count

Просмотреть файл

@ -0,0 +1,4 @@
Logs
| where Level == 'Error'
| where Message has 'safeArrayRankMismatch'
| count

Просмотреть файл

@ -0,0 +1,4 @@
Logs
| where Timestamp between(datetime(2014-03-8 03:00:00) .. 1h)
| where Source startswith 'im' and Message contains 'response'
| summarize Count=count() by Component

Просмотреть файл

@ -0,0 +1,4 @@
Logs
| where Timestamp between(datetime(2014-03-08 03:00:00) .. 1h)
| where * has 'Internal'
| top 1000 by Timestamp

11
queries/kusto/query05.kql Normal file
Просмотреть файл

@ -0,0 +1,11 @@
Logs
| where ClientRequestId in (
'd71ab629-ebaf-9380-5fe8-942541387ce5',
'6bb29a30-ce0d-1288-36f0-27dbd57d66b0',
'1f82e290-a7c4-ac84-7117-52209b3b9c91',
'ecc12181-8c5a-4f87-1ca3-712b4a82c4f0',
'd275a6f0-ba1d-22cf-b06b-6dac508ece4b',
'f0565381-29db-bf73-ca1b-319e80debe1c',
'54807a9a-e442-883f-6d8b-186c1c2a1041',
'f1d10647-fc31-dbc3-9e25-67f68a6fe194')
| count

Просмотреть файл

@ -0,0 +1,8 @@
Logs
| where Timestamp between(datetime(2014-03-08) .. 10d)
| where Source in ('IMAGINEFIRST0', 'HAVINGCOLUMN182', 'THEREFORESTORE156', 'HOSTNODES207')
| extend LogType = case(Component in ('CLOUDREPORTSERVER', 'COMMON1', 'FABRICINTEGRATOR', 'REQUESTPROTECTION', 'DIRECTORYSERVICE', 'REPORTSERVERSERVICETRACE', 'ACONFIGURATION', 'EXPLORESERVICEWATCHDOG', 'COMMUNICATIONRUNTIME'), 'Security',
Component in ('REPORTNETWORKING', 'PUSHDATASERVICETRACE', 'HEALTHSERVICE', 'UTILS', 'PROVIDERSCOMMON'), 'Performance',
Component in ('WORKERSERVICECONTENT', 'XMLACOMMON', 'INTEGRATIONDATABASE', 'DATABASEMANAGEMENT'), 'Ingestion',
'Other')
| summarize Count=count() by LogType

Просмотреть файл

@ -0,0 +1,6 @@
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 1h)
| where Level=='Error' and Message startswith 'exception'
| parse Message with 'Exception=' ExeptionType ';' * 'Message=' ExceptionMessage ';' *
| summarize Count=count() by ExeptionType, ExceptionMessage
| top 10 by Count

Просмотреть файл

@ -0,0 +1,3 @@
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 6h)
| summarize Count=count() by Level

Просмотреть файл

@ -0,0 +1,4 @@
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 6h)
| summarize Count=count() by Level, Component
| top 50 by Count

Просмотреть файл

@ -0,0 +1,5 @@
Logs
| where Timestamp between(datetime(2014-03-08) .. 3d)
| where Source == 'IMAGINEFIRST0' and Message has 'downloaded'
| summarize hint.shufflekey=ClientRequestId Count=count() by Level, Component,Node, ClientRequestId
| top 10 by Count

11
queries/kusto/query11.kql Normal file
Просмотреть файл

@ -0,0 +1,11 @@
let top_nodes =
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 6h)
| where Level == 'Error'
| summarize count() by Node
| top 10 by count_
| project Node;
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 6h)
| where Node in (top_nodes)
| summarize count() by Level, Node

Просмотреть файл

@ -0,0 +1,4 @@
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 6h)
| where Level == 'Error'
| summarize dcount(ClientRequestId) by bin(Timestamp, 1h)

Просмотреть файл

@ -0,0 +1,7 @@
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 6h)
| where Message startswith 'IngestionCompletionEvent'
| where Source in ('IMAGINEFIRST0', 'CLIMBSTEADY83', 'INTERNALFIRST79', 'WORKWITHIN77', 'ADOPTIONCUSTOMERS81', 'FIVENEARLY85',
'WHATABOUT98', 'PUBLICBRAINCHILD89', 'WATCHPREVIEW91', 'LATERYEARS87', 'GUTHRIESSCOTT93', 'THISSTORING16')
| where Properties has 'parquet'
| summarize MaxRowCount= max(tolong(Properties.rowCount)), percentiles(totimespan(Properties.duration), 50, 90, 95) by Source

Просмотреть файл

@ -0,0 +1,5 @@
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 1h)
| where Component == "DOWNLOADER"
| summarize DownloadRate=max(todouble(Properties.compressedSize) / ((totimespan(Properties.downloadDuration) / 1sec))) by Source
| top 10 by DownloadRate

12
queries/kusto/query15.kql Normal file
Просмотреть файл

@ -0,0 +1,12 @@
let Data = Logs
| where Source == 'IMAGINEFIRST0'
| where Timestamp between(datetime(2014-03-08 12:00) .. 5d)
| where Message startswith 'IngestionCompletionEvent';
let TopNodesByCPU = Data
| summarize MaxCPU = max(totimespan(Properties.cpuTime)) by Node
| order by MaxCPU desc, Node desc
| take 10
| project Node;
Data
| where Node in (TopNodesByCPU)
| summarize AverageProcessTimeInSeconds=avg(totimespan(Properties.cpuTime)) by bin(Timestamp, 5m), Node

10
queries/kusto/query16.kql Normal file
Просмотреть файл

@ -0,0 +1,10 @@
Logs
| where Timestamp between(datetime(2014-03-08) .. 3d)
| where Level == 'Error'
| where Message has 'ArrayTypeMismatch'
| where Source in ('IMAGINEFIRST0', 'CLIMBSTEADY83', 'INTERNALFIRST79', 'WORKWITHIN77', 'ADOPTIONCUSTOMERS81', 'FIVENEARLY85',
'WHATABOUT98', 'PUBLICBRAINCHILD89', 'WATCHPREVIEW91', 'LATERYEARS87', 'GUTHRIESSCOTT93', 'THISSTORING16')
| top-nested of Source by max(1),
top-nested 3 of Node by NodeErrors=count(),
top-nested 3 of Component with others = 'Other components' by ComponentErrors=count()
| project Source, Node, Component, NodeErrors, ComponentErrors

12
queries/kusto/query17.kql Normal file
Просмотреть файл

@ -0,0 +1,12 @@
let Data = Logs
| where Source == 'IMAGINEFIRST0'
| where Timestamp between(datetime(2014-03-08 12:00) .. 3d)
| where Message startswith 'IngestionCompletionEvent';
let TopNodesByCPU = Data
| summarize MaxCPU = max(totimespan(Properties.cpuTime)) by Node
| order by MaxCPU desc, Node desc
| take 10
| project Node;
Data
| where Node in (TopNodesByCPU)
| summarize AverageProcessTimeInSeconds=avg(totimespan(Properties.cpuTime)) by bin(Timestamp, 5m), Node

16
queries/kusto/query18.kql Normal file
Просмотреть файл

@ -0,0 +1,16 @@
let Data =
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 3d)
| where Source == 'IMAGINEFIRST0';
Data
| where Message startswith '$$IngestionCommand'
| parse Message with '$$IngestionCommand table=' Table ' ' *
| distinct hint.shufflekey=ClientRequestId ClientRequestId, Table
| join kind=inner hint.shufflekey=ClientRequestId (
Data
| where Message startswith 'IngestionCompletionEvent'
) on ClientRequestId
| extend Format=tostring(Properties.format), RowCount=tolong(Properties.rowCount)
| top-nested of Format by max(1),
top-nested 10 of Table by Rows=sum(RowCount)
| project Format, Table, Rows

19
queries/kusto/query19.kql Normal file
Просмотреть файл

@ -0,0 +1,19 @@
let Data =
Logs
| where Timestamp between(datetime(2014-03-08 12:00) .. 3d)
| where Source in ('IMAGINEFIRST0')
;
let Downloading =
Data
| where Message startswith 'Downloading file path:'
| parse Message with * 'path:' path
| project DownloadClientRequestId = ClientRequestId, path;
let IngestionCompletion =
Data
| where Message startswith 'IngestionCompletionEvent'
| parse Message with * 'path:' path
| project CompleteClientRequestId = ClientRequestId, path;
Downloading
| join hint.shufflekey=path kind=inner (IngestionCompletion) on path
| where DownloadClientRequestId != CompleteClientRequestId
| count

Просмотреть файл

@ -0,0 +1,7 @@
select
count(*)
from
logs_tpc
where Timestamp between '2014-03-08 00:00:00' and '2014-03-08 12:00:00'
and Level = 'Warning'
and Message like '%enabled%'

Просмотреть файл

@ -0,0 +1,6 @@
select
count(*)
from
logs_tpc
where
Level = 'Error' and lower (Message) like '%safearrayrankmismatch%'

Просмотреть файл

@ -0,0 +1,9 @@
select
Component, count(*)
from
logs_tpc
where
Day = '2014-03-08 00:00:00' and
Timestamp between '2014-03-08 03:00:00' and '2014-03-08 04:00:00'
and Source like 'IN%' and Message ILIKE '%response%'
group by Component

Просмотреть файл

@ -0,0 +1,10 @@
select
*
from
logs_tpc
where
Timestamp between '2014-03-08 03:00:00' and '2014-03-08 04:00:00'
and (lower (message) like "%internal%")
order by Timestamp
limit 1000

Просмотреть файл

@ -0,0 +1,15 @@
select
count(*)
from
logs_tpc
where ClientRequestId in
(
'd71ab629-ebaf-9380-5fe8-942541387ce5',
'6bb29a30-ce0d-1288-36f0-27dbd57d66b0',
'1f82e290-a7c4-ac84-7117-52209b3b9c91',
'ecc12181-8c5a-4f87-1ca3-712b4a82c4f0',
'd275a6f0-ba1d-22cf-b06b-6dac508ece4b',
'f0565381-29db-bf73-ca1b-319e80debe1c',
'54807a9a-e442-883f-6d8b-186c1c2a1041',
'f1d10647-fc31-dbc3-9e25-67f68a6fe194')

Просмотреть файл

@ -0,0 +1,16 @@
select
case
when Component in ('CLOUDREPORTSERVER', 'COMMON1', 'FABRICINTEGRATOR', 'REQUESTPROTECTION', 'DIRECTORYSERVICE', 'REPORTSERVERSERVICETRACE', 'ACONFIGURATION', 'EXPLORESERVICEWATCHDOG', 'COMMUNICATIONRUNTIME') then 'Security'
when Component in ('REPORTNETWORKING', 'PUSHDATASERVICETRACE', 'HEALTHSERVICE', 'UTILS', 'PROVIDERSCOMMON') then 'Performance'
when Component in ('WORKERSERVICECONTENT', 'XMLACOMMON', 'INTEGRATIONDATABASE', 'DATABASEMANAGEMENT') then 'Ingestion'
else 'Other'
end as LogType,
count(*)
from
logs_tpc
where
Timestamp between '2014-03-08 00:00:00' and '2014-03-08 10:00:00'
and Source in ('IMAGINEFIRST0', 'HAVINGCOLUMN182', 'THEREFORESTORE156', 'HOSTNODES207')
group by
LogType

Просмотреть файл

@ -0,0 +1,18 @@
select
regexp_extract(Message, 'Exception=(.*);') as ExceptionType,
regexp_extract(Message, 'Message=(.*);') as ExceptionMessage,
count(*) as ExceptionCount
from
logs_tpc
where
Day = '2014-03-08 00:00:00' and
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 13:00:00' and
Level = 'Error' and
Message like 'Exception%'
group by
ExceptionType,
ExceptionMessage
order by
ExceptionCount desc
limit 10

Просмотреть файл

@ -0,0 +1,10 @@
select
Level,
count(*) as Count
from
logs_tpc
where
Day = '2014-03-08 00:00:00' and
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 18:00:00'
group by
Level

Просмотреть файл

@ -0,0 +1,15 @@
-- q9
select
Level,
Component,
count(*) as Count
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 18:00:00'
group by
Level,
Component
order by
Count desc
limit 50

Просмотреть файл

@ -0,0 +1,21 @@
--Q10
select
Component,
Level,
Node,
ClientRequestId,
count(*) as Count
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00' and '2014-03-11 00:00:00' and
-- Source = 'IMAGINEFIRST0' and
lower(Message) like "%downloaded%"
group by
Component,
Level,
Node,
ClientRequestId
order by
Count desc
limit 10

Просмотреть файл

@ -0,0 +1,27 @@
-- query 11
with top_nodes as (
select
Node
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 18:00:00' and
Level = 'Error'
group by
Node
order by
count(*) desc
limit 10
)
select
Level,
Node,
count(*) as Count
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 18:00:00' and
Node in (select Node from top_nodes)
group by
Level,
Node

Просмотреть файл

@ -0,0 +1,13 @@
--Q12;
select
date_trunc("Hour", Timestamp) as bin,
count(distinct ClientRequestId) as dcount
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 18:00:00'
and Level = 'Error'
group by
bin
order by
bin

Просмотреть файл

@ -0,0 +1,15 @@
with CPUTime as (
select
Source,
json_tuple(Properties, 'rowCount', 'duration') as (rowCount, durationdt),
cast ((to_timestamp(durationdt , 'HH:mm:ss.SSSSSSS')) as double) as duration
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00'and '2014-03-08 18:00:00'
and lower(Message) like 'ingestioncompletionevent%'
and Source in ('IMAGINEFIRST0', 'CLIMBSTEADY83', 'INTERNALFIRST79', 'WORKWITHIN77', 'ADOPTIONCUSTOMERS81', 'FIVENEARLY85', 'WHATABOUT98', 'PUBLICBRAINCHILD89', 'WATCHPREVIEW91', 'LATERYEARS87', 'GUTHRIESSCOTT93', 'THISSTORING16')
and lower(Properties) like '%parquet%')
select Source, max(rowCount) as MaxRowCount, percentile(duration, 0.50) as p50, percentile(duration, 0.90) as p90, percentile(duration, 0.95) as p95
from CPUTime
group by source

Просмотреть файл

@ -0,0 +1,19 @@
with DownloadRates as (
select
Source,
Properties,
json_tuple(Properties, 'compressedSize', 'downloadDuration') as (compressedSize, downloadDuration ),
cast ((to_timestamp(downloadDuration , 'HH:mm:ss.SSSSSSS')) as double) as duration
from
logs_tpc
where
timestamp between '2014-03-08 12:00:00' and '2014-03-08 13:00:00'
and Component = 'DOWNLOADER'
)
select
Source, max(compressedSize/duration) as DownloadRate
from DownloadRates
group by
Source
order by DownloadRate desc
limit 10

Просмотреть файл

@ -0,0 +1,45 @@
create or replace temporary view Data as (
select timestamp, node, properties
from
logs_tpc
where isNotNull(Properties)
-- and Source = 'IMAGINEFIRST0'
and Timestamp between '2014-03-08 12:00:00' and '2014-03-13 12:00:00'
and lower(Message) like 'ingestioncompletionevent%'
);
select
count(*)
from
logs_tpc
where
Day = '2014-03-08 00:00:00' and
Timestamp between '2014-03-08 00:00:00' and '2014-03-08 14:00:00'
and Level = 'Warning' and Message like '%enabled%';
create or replace temporary view TopNodesByCPU as
(with CPUTime as (
select
Node,
Properties,
json_tuple(Properties, 'cpuTime') as (cpuTimeDate),
cast ((to_timestamp(cpuTimeDate , 'HH:mm:ss.SSSSSSS')) as double) as cpuTime
from
data
)
select Node, max(cpuTime) as MaxCPU
from CPUTime
group by Node
order by Node desc, MaxCPU desc
limit 10);
with CPUTime as (select
Node,
cast (round (cast (timestamp as double) / 300L)*300 as timestamp) as bin, -- 5 min bin
json_tuple(Properties, 'cpuTime') as (cpuTimeDate),
cast ((to_timestamp(cpuTimeDate , 'HH:mm:ss.SSSSSSS')) as double) as cpuTime
from
data
where isNotNull(Properties) and Node in (select node from TopNodesByCPU))
select Node,
bin,
avg(cpuTime) as AvgCPU
from CPUTime
group by Node, bin

Просмотреть файл

@ -0,0 +1,104 @@
--Q16;
with SourceMax as (
select
Source,
count(*) as SourceCount
from
logs_tpc
where
Timestamp between '2014-03-08 00:00:00' and '2014-03-08 03:00:00'
and Level = 'Error'
and Source in ('IMAGINEFIRST0', 'CLIMBSTEADY83', 'INTERNALFIRST79', 'WORKWITHIN77', 'ADOPTIONCUSTOMERS81', 'FIVENEARLY85', 'WHATABOUT98', 'PUBLICBRAINCHILD89', 'WATCHPREVIEW91', 'LATERYEARS87', 'GUTHRIESSCOTT93', 'THISSTORING16')
and lower(Message) like '%arraytypemismatch%'
group by
Source
)
, TopNestedNodes as (
select
*
from (
select
*,
row_number() over (partition by Source order by NodeErrors desc) as rownum
from (
select
Source,
Node,
count(*) as NodeErrors
from
logs_tpc
where
Timestamp between '2014-03-08 00:00:00' and '2014-03-08 03:00:00'
and Level = 'Error'
and lower(Message) like '%arraytypemismatch%'
group by
Node,
Source
order by
Source,
NodeErrors desc
)
)
where
rownum <= 3
)
, TopNestedComponents as (
select
Source,
Node,
case
-- added first due to aggregation error
when first(rownum) <= 3 then Component
else 'Other components'
end as Component,
sum(ComponentErrors) as Errors
from (
select
*,
row_number() over (partition by Source,Node order by ComponentErrors desc) as rownum
from (
select
Source,
Node,
Component,
count(*) as ComponentErrors
from
logs_tpc
where
Timestamp between '2014-03-08 00:00:00' and '2014-03-08 03:00:00'
and Level = 'Error'
and lower (Message) like '%arraytypemismatch%'
group by
Component,
Node,
Source
)
)
group by
Source,
Node,
Component
order by
Source,
Node,
Errors desc
)
select
S.Source,
N.Node,
N.NodeErrors,
C.Component,
C.Errors as ComponentErrors
from
TopNestedComponents C
inner join
SourceMax S
on C.Source = S.Source
inner join
TopNestedNodes N
on C.Node = N.Node
and C.Source = N.Source
order by
S.Source,
N.Node,
C.Component

Просмотреть файл

@ -0,0 +1,37 @@
create or replace temporary view Data as (
select timestamp, node, properties
from
logs_tpc
where isNotNull(Properties)
and Source = 'IMAGINEFIRST0'
and Timestamp between '2014-03-08 12:00:00' and '2014-03-08 15:00:00'
and lower(Message) like 'ingestioncompletionevent%'
);
create or replace temporary view TopNodesByCPU as
(with CPUTime as (
select
Node,
Properties,
json_tuple(Properties, 'cpuTime') as (cpuTimeDate),
cast ((to_timestamp(cpuTimeDate , 'HH:mm:ss.SSSSSSS')) as double) as cpuTime
from
data
)
select Node, max(cpuTime) as MaxCPU
from CPUTime
group by Node
order by Node desc, MaxCPU desc
limit 10);
with CPUTime as (select
Node,
cast (round (cast (timestamp as double) / 300L)*300 as timestamp) as bin, -- 5 min bin
json_tuple(Properties, 'cpuTime') as (cpuTimeDate),
cast ((to_timestamp(cpuTimeDate , 'HH:mm:ss.SSSSSSS')) as double) as cpuTime
from
data
where isNotNull(Properties) and Node in (select node from TopNodesByCPU))
select Node,
bin,
avg(cpuTime) as AvgCPU
from CPUTime
group by Node, bin

Просмотреть файл

@ -0,0 +1,36 @@
create or replace temporary view Data as (
select
*
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 15:00:00'
and Source = 'IMAGINEFIRST0' or Source ='PAPERWHITE113'
);
create or replace temporary view Data2 as (
select
distinct regexp_extract(Message, 'table=(.*) ') as icTable,
ClientRequestId
from
Data l
where
lower(substring(l.Message, 3)) like 'ingestioncommand%'
);
create or replace temporary view Data3 as (
select
json_tuple(l.Properties, 'format', 'rowCount') as (Format, rowCount),
icTable
from
Data l
inner join
Data2 d
on l.ClientRequestId = d.ClientRequestId
where
lower(l.Message) like 'ingestioncompletionevent%'
);
select Format, icTable, sum(rowCount) as rowCount
from
Data3
group by
Format,
icTable

Просмотреть файл

@ -0,0 +1,37 @@
--Q19;
with Data as (
select
*
from
logs_tpc
where
Timestamp between '2014-03-08 12:00:00' and '2014-03-08 15:00:00'
and Source in ('IMAGINEFIRST0')
),
Downloading as (
select
regexp_extract(Message, 'path:(.*)') as Path,
ClientRequestId as DownloadClientRequestId
from
Data
where
lower (Message) like 'downloading file path:%'
),
IngestionCompletion as (
select
regexp_extract(Message, 'path:(.*)') as Path,
ClientRequestId as CompleteClientRequestId
from
Data
where
lower (Message) like 'ingestioncompletionevent%'
)
select
count(*)
from
Downloading d
inner join
IngestionCompletion ic
on d.Path = ic.path
where
DownloadClientRequestId <> CompleteClientRequestId