task(auth): Audit db state around tokens

Because:
- We want to spot check db state
- We want to be able to measure changes in db state

This Commit:
- Adds script that can track of row counts, ages of rows, and orphaned rows
- Can be used to generate raw sql script
- Can be used as a cron job to periodically emit statistics.
This commit is contained in:
dschom 2022-11-08 15:53:20 -08:00
Родитель fd16bbd818
Коммит bce1c5c15e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: F26AEE99174EE68B
3 изменённых файлов: 792 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,518 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
import program from 'commander';
import { StatsD } from 'hot-shots';
import { setupDatabase } from 'fxa-shared/db';
import pckg from '../package.json';
const config = require('../config').getProperties();
const statsd = new StatsD(config.statsd);
const log = require('../lib/log')(config.log.level, 'audit-tokens', statsd);
const knex = setupDatabase({
...config.database.mysql.auth,
});
//#region Table Definitions
/** Defines table and key column */
type TargetTable = { name: string; keyCol: string };
/** Prefixes table names with db name. */
const toTable = (name: string, db = 'fxa') => `${db}.${name}`;
/** List of common tables */
const tables = {
accountCustomers: toTable('accountCustomers'),
accountResetTokens: toTable('accountResetTokens'),
accounts: toTable('accounts'),
devices: toTable('devices'),
deviceCommands: toTable('deviceCommands'),
emails: toTable('emails'),
keyFetchTokens: toTable('keyFetchTokens'),
linkedAccounts: toTable('linkedAccounts'),
passwordChangeTokens: toTable('passwordChangeTokens'),
passwordForgotTokens: toTable('passwordForgotTokens'),
paypalCustomers: toTable('paypalCustomers'),
recoveryCodes: toTable('recoveryCodes'),
recoveryKeys: toTable('recoveryCodes'),
securityEvents: toTable('securityEvents'),
sentEmails: toTable('sentEmails'),
sessionTokens: toTable('sessionTokens'),
signinCodes: toTable('signinCodes'),
totp: toTable('totp'),
unblockCodes: toTable('unblockCodes'),
unverifiedTokens: toTable('unverifiedTokens'),
verificationReminders: toTable('verificationReminders'),
clientDevelopers: toTable('clientDevelopers', 'fxa_oauth'),
clients: toTable('clients', 'fxa_oauth'),
codes: toTable('codes', 'fxa_oauth'),
developers: toTable('developers', 'fxa_oauth'),
refreshTokens: toTable('refreshTokens', 'fxa_oauth'),
tokens: toTable('tokens', 'fxa_oauth'),
profile: toTable('profile', 'fxa_profile'),
};
//#endregion
//#region Result Handling
function formatStatLabel(label: string) {
let cleaned = label;
cleaned = cleaned.replace(/(fxa|fxa_profile|fxa_oauth)\./g, '$1_');
return cleaned;
}
function formatResult(result: any) {
let formatted: any;
// For key value pairs pivot the result
if (result?.[0]?.[0]?.group_key && result?.[0]?.[0]?.group_value) {
const pivotResult: any = {};
for (const row of result[0] || []) {
pivotResult[row.group_key] = row.group_value;
}
formatted = pivotResult;
} else {
// Return the first row
formatted = result?.[0]?.[0];
}
return formatted;
}
export function emitStats(name: string, result: any) {
if (!result) {
return;
}
Object.entries(result).forEach(([key, val]) => {
if (typeof val === 'number') {
const label = formatStatLabel(`db-audit.${name}.${key}`);
statsd.gauge(label, val);
log.debug('emit-stats', { label });
}
});
}
export function logResult(name: string, query: string, result: any) {
log.info('result', { name, result });
if (program.verbose) {
function resultSummary() {
if (result) {
const pairs = Object.entries(result)
.map(([k, v]) => `\n-- ${k}: ${v}`)
.join('');
return `\n-- RESULT SUMMARY:\n${pairs}`;
}
return '-- RESULT SUMMARY: No Result';
}
console.log(
`\n-- AUDIT: ${name}\n-- QUERY:\n${query}\n${resultSummary()}\n\n\n`
);
}
}
//#endregion
//#region Audits
/** Helper function to determine the number of rows to query. */
function getSampleSize(_tableName: string) {
// We probably are fine just using a large number for sample size. Down
// the road we might consider doing something fancier and try to calculate
// a minimum population size based on a confidence interval and current
// std dev. Setting this to any 'large' value is probably adequate to
// get a feel for the data at the current moment.
return program.maxSampleSize;
}
/** Looks up the table size, i.e. row count on the table. */
function getTableSize(tableName: string) {
const size = rowCounts.get(tableName);
if (size === undefined) {
throw new Error(`Could not locate table, ${tableName}`);
}
return size;
}
/* Helper function for producing a SQL 'limit clause' */
function buildLimit(table: string) {
const sampleSize = getSampleSize(table);
if (sampleSize) {
return `LIMIT ${sampleSize}`;
}
return '';
}
/** Adds the number of rows sampled and the total table size to each result set. This can be useful metadata when graphing metrics. */
function decorateResultWithTableStats(table: string, result: any) {
const tableSize = getTableSize(table);
let sampleSize = getSampleSize(table);
if (tableSize < sampleSize) {
sampleSize = tableSize;
}
result.table_size = tableSize;
result.sample_size = sampleSize;
}
/** Conducts audit with a sql query that outputs a single row stats. */
async function audit(name: string, raw: string) {
// Make sure query passes filter. We always run the RowCount tests since they
// are needed by other audits.
const filter = program.grep ? new RegExp(program.grep) : undefined;
const skip = !/RowCount/i.test(name) && filter && !filter.test(name);
if (skip) {
log.info('audit', { msg: `-- Excluding ${name} due to grep filter.` });
if (program.verbose) {
console.log(`-- Excluding ${name} due to grep filter.`);
}
return;
} else {
if (program.verbose) {
console.log('!!! ', name);
}
}
if (program.dry) {
logResult(name, raw, '');
return '';
}
try {
const isolationLevel = 'read uncommitted';
const trx = await knex.transaction({ isolationLevel });
const rawResult = await trx.raw(raw);
await trx.commit();
return formatResult(rawResult);
} catch (err) {
log.error(err);
if (program.verbose) {
console.log(err);
}
}
}
/* Holds on to row counts per table. These are useful for other statistics. */
const rowCounts: Map<string, number> = new Map();
/** Queries for current row counts. */
export async function auditRowCounts(table: string) {
function buildQuery(table: string) {
return `
SELECT
table_rows AS table_size
FROM INFORMATION_SCHEMA.TABLES
WHERE table_schema = '${table.split('.')[0]}' and table_name = '${
table.split('.')[1]
}'
`;
}
const name = `${table}.RowCount.`;
const query = buildQuery(table);
let result = await audit(name, query);
logResult(name, query, result);
emitStats(name, result);
if (typeof result.table_size === 'number') {
rowCounts.set(table, result?.table_size);
}
return result;
}
/** Groups tables by year month and gets counts. */
export async function auditAge(
table: string,
colName: string,
colSort: string
) {
function buildQuery(table: string, timeCol: string, sortCol: string) {
return `
SELECT
DATE_FORMAT(FROM_UNIXTIME(${timeCol} / 1000), "%Y-%M") as group_key,
COUNT(${sortCol}) as group_value
FROM
(
SELECT ${sortCol} ${sortCol !== timeCol ? `, ${timeCol}` : ''}
FROM ${table}
ORDER BY ${sortCol}
${buildLimit(table)}
) as times
GROUP BY group_key
`;
}
const name = `${table}.AgeAudit.${colName}.`;
const query = buildQuery(table, colName, colSort);
const result = await audit(name, query);
logResult(name, query, result);
emitStats(name, result);
return result;
}
/** Looks for rows missing an implied parent relationship */
export async function auditOrphanedRows(
child: TargetTable,
parent: TargetTable
) {
function buildQuery() {
return `
SELECT
total_missing,
CASE when total > 0 THEN 100 * total_missing / total ELSE 0 END AS percent_missing
FROM (
SELECT
COUNT(*) total,
COUNT(IF(parent.${parent.keyCol} is NULL, 1, NULL)) AS total_missing
FROM
(SELECT ${child.keyCol} FROM ${child.name} ${buildLimit(
child.name
)}) as child
LEFT JOIN ${parent.name} parent ON child.${child.keyCol} = parent.${
parent.keyCol
}
${buildLimit(child.name)}
) AS missing;
`;
}
const query = buildQuery();
const name = `${child.name}.OrphanedRows.On-${parent.name}`;
const result = await audit(name, query);
decorateResultWithTableStats(child.name, result);
logResult(name, query, result);
emitStats(name, result);
return result;
}
/** Looks for devices that have been orphaned, or have a parent row which was orphaned. */
export async function auditOrphanedDeviceRows() {
// Orphaned devices. A slightly more complex query that explicitly checks that
// neither devices > sessionTokens > account nor devices > refreshToken > client exist.
const query = `
SELECT
*,
100 * total_missing_both / total AS percent_missing_both,
100 * total_missing_refresh_token / total AS percent_missing_refresh_token,
100 * total_missing_session_token / total AS percent_missing_session_token
FROM
(
SELECT
COUNT(*) as total,
COUNT( IF (missing_session_token = 1 and missing_refresh_token = 1, 1, NULL)) AS total_missing_both,
COUNT( IF (missing_refresh_token = 1, 1, NULL)) AS total_missing_refresh_token,
COUNT( IF (missing_session_token = 1, 1, NULL)) AS total_missing_session_token
FROM
(
SELECT
d.id,
case
WHEN s.tokenId is NULL OR a.uid is NULL
THEN 1 ELSE 0 END AS missing_session_token,
case
WHEN r.token is NULL OR c.id is NULL
THEN 1 ELSE 0 END AS missing_refresh_token
FROM
(
SELECT id, sessionTokenId, refreshTokenId
FROM ${tables.devices}
${buildLimit(tables.devices)}
) as d
left join ${tables.sessionTokens} s on s.tokenId = d.sessionTokenId
left join ${tables.accounts} a on s.uid = a.uid
left join ${tables.refreshTokens} r on r.token = d.refreshTokenId
left join ${tables.clients} c on c.id = r.clientId
) AS status
) AS totals
`;
const name = `${tables.devices}.OrphanedRows.On-Many`;
const result = await audit(name, query);
decorateResultWithTableStats(tables.devices, result);
logResult(name, query, result);
emitStats(name, result);
return result;
}
/** Runs audits according to current cli arguments */
async function auditAll() {
// We always audit row counts. These queries are fast, and
// row counts are used by subsequent queries.
for (const table of Object.values(tables)) {
await auditRowCounts(table);
}
// If requested audit the age distribution of rows in the table.
if (program.auditAge) {
let set: any[] = [
[tables.accountCustomers, 'createdAt', 'uid'],
[tables.accountResetTokens, 'createdAt', 'tokenId'],
[tables.accounts, 'createdAt', 'uid'],
[tables.devices, 'createdAt', 'id'],
[tables.emails, 'createdAt', 'id'],
[tables.keyFetchTokens, 'createdAt', 'tokenId'],
[tables.passwordChangeTokens, 'createdAt', 'tokenId'],
[tables.passwordForgotTokens, 'createdAt', 'tokenId'],
[tables.sentEmails, 'sentAt', 'id'],
[tables.sessionTokens, 'createdAt', 'tokenId'],
[tables.sessionTokens, 'lastAccessTime', 'tokenId'],
[tables.totp, 'createdAt', 'uid'],
[tables.unblockCodes, 'createdAt', 'unblockCodeHash'],
[tables.unverifiedTokens, 'tokenVerificationCodeExpiresAt', 'tokenId'],
[tables.verificationReminders, 'createdAt', 'uid'],
[tables.tokens, 'createdAt', 'token'],
[tables.tokens, 'expiresAt', 'token'],
[tables.refreshTokens, 'createdAt', 'token'],
[tables.refreshTokens, 'lastUsedAt', 'token'],
];
for (const [table, colName, colSort] of set) {
await auditAge(table, colName, colSort);
}
}
// If requested look for potentially orphaned rows. These are rows
// were an implied parent key is missing.
if (program.auditOrphanedRows) {
let set = [
tables.accountCustomers,
tables.accountResetTokens,
tables.devices,
tables.emails,
tables.keyFetchTokens,
tables.linkedAccounts,
tables.passwordChangeTokens,
tables.passwordChangeTokens,
tables.paypalCustomers,
tables.recoveryCodes,
tables.recoveryCodes,
tables.securityEvents,
tables.sentEmails,
tables.sessionTokens,
tables.signinCodes,
tables.totp,
tables.unblockCodes,
tables.unverifiedTokens,
tables.verificationReminders,
];
for (const table of set) {
await auditOrphanedRows(
{ name: table, keyCol: 'uid' },
{ name: tables.accounts, keyCol: 'uid' }
);
}
// Rows orphaned by missing oauth client
set = [
tables.clientDevelopers,
tables.tokens,
tables.codes,
tables.refreshTokens,
];
for (const table of set) {
await auditOrphanedRows(
{ name: table, keyCol: 'clientId' },
{ name: tables.clients, keyCol: 'id' }
);
}
// Rows orphaned by missing developer
set = [tables.clientDevelopers];
for (const table of set) {
await auditOrphanedRows(
{ name: table, keyCol: 'developerId' },
{ name: tables.developers, keyCol: 'developerId' }
);
}
await auditOrphanedDeviceRows();
}
}
//#endregion
/**
* Main routine
* @returns
*/
export async function run() {
try {
program
.version(pckg.version)
.option(
'--grep <string>',
'Regular expression to target a specific audit',
''
)
.option(
'--maxSampleSize <number>',
'The maximum number of rows to sample at anyone time.',
1e5
)
.option(
'--dry',
'Indicates that the db queries should not be executed. When combined with verbose this can be useful for generating SQL queries.'
)
.option(
'--verbose',
'Indicates to turn on verbose output. This will output raw queries to console.'
)
.option(
'--auditAge',
'Toggles auditing of age based metrics on table rows.'
)
.option('--auditOrphanedRows', 'Toggles auditing of orphaned rows.')
.option(
'--loopInterval <number>',
'When defined puts the program into a loop that executes every X seconds.',
0
)
.parse(process.argv);
if (program.loopInterval) {
// Keep polling stats. Useful to local monitoring.
return new Promise(() => {
setInterval(async () => {
await auditAll();
}, program.loopInterval * 1000);
}).catch((err) => {
throw err;
});
} else {
await auditAll();
}
} catch (err) {
console.error(err);
return 2;
}
return 0;
}
// Main entry point
if (require.main === module) {
process.on('exit', (code) => log.info('exit', { code }));
run()
.then((result) => log.info('result', { result }))
.then(() => {
// Make sure statsd closes cleanly so we don't lose any metrics
return new Promise((resolve) => {
statsd.close((err) => {
if (err) {
log.warn('statsd', { closed: true, err });
} else {
log.info('statsd', { closed: true });
}
resolve(true);
});
});
})
.catch(log.error)
.finally(process.exit);
}

Просмотреть файл

@ -0,0 +1,165 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
'use strict';
const { assert } = require('chai');
const util = require('node:util');
const path = require('path');
const {
auditRowCounts,
auditAge,
auditOrphanedDeviceRows,
auditOrphanedRows,
} = require('../../scripts/audit-tokens');
const mocks = require(`../../test/mocks`);
const config = require('../../config').getProperties();
const log = mocks.mockLog();
const { Account } = require('fxa-shared/db/models/auth/account');
const { clearDb, scaffoldDb, connectToDb } = require('./db-helpers');
const exec = util.promisify(require('node:child_process').exec);
const cwd = path.resolve(__dirname, '../..');
describe('scripts/audit-tokens', () => {
const uid = 'f9916686c226415abd06ae550f073cea';
const email = 'user1@test.com';
const createdAt = new Date('2022-10').getTime();
const lastAccessTime = new Date('2022-11').getTime();
before(async () => {
await connectToDb(config, log);
await clearDb();
await scaffoldDb(uid, email, createdAt, lastAccessTime);
// Manually delete the account to simulate orphaned record situation.
await Account.knexQuery().del();
});
describe('query checks)', () => {
after(async () => {
await clearDb();
});
it('counts rows', async () => {
const result = await auditRowCounts('fxa.devices');
assert.equal(result.table_size, 1);
});
it('gets age', async () => {
const result = await auditAge(
'fxa.sessionTokens',
'createdAt',
'tokenId'
);
assert.equal(result['2022-October'], 1);
});
it('finds orphan', async () => {
await auditRowCounts('fxa.sessionTokens');
const result = await auditOrphanedRows(
{
name: 'fxa.sessionTokens',
keyCol: 'uid',
},
{
name: 'fxa.accounts',
keyCol: 'uid',
}
);
assert.equal(result.percent_missing, 100);
assert.equal(result.total_missing, 1);
assert.equal(result.table_size, 1);
});
it('finds orphaned devices', async () => {
await auditRowCounts('fxa.sessionTokens');
await auditRowCounts('fxa.devices');
const result = await auditOrphanedDeviceRows();
assert.equal(result.total, 1);
assert.equal(result.table_size, 1);
assert.equal(result.total_missing_both, 1);
assert.equal(result.total_missing_refresh_token, 1);
assert.equal(result.total_missing_session_token, 1);
assert.equal(result.percent_missing_both, 100);
assert.equal(result.percent_missing_refresh_token, 100);
assert.equal(result.percent_missing_session_token, 100);
});
});
describe('cli', () => {
async function testScript(args) {
// Note that logger output, directs to standard err.
const { stderr, stdout } = await exec(
`NODE_ENV=dev node -r esbuild-register scripts/audit-tokens.ts ${args}`,
{
cwd,
shell: '/bin/bash',
}
);
return { stderr, stdout };
}
it('applies no args', async () => {
const { stderr, stdout } = await testScript({});
assert.isOk(/RowCount/.test(stderr));
assert.isNotOk(/AgeAudit/.test(stderr));
assert.isNotOk(/OrphanedRows/.test(stderr));
assert.isNotOk(/SELECT/.test(stderr));
assert.isNotOk(/AgeAudit/.test(stdout));
assert.isNotOk(/OrphanedRows/.test(stdout));
assert.isNotOk(/SELECT/.test(stdout));
});
it('applies verbose option', async () => {
const { stdout } = await testScript('--verbose');
assert.isOk(/-- AUDIT:/.test(stdout));
assert.isOk(/-- QUERY:/.test(stdout));
assert.isOk(/-- RESULT SUMMARY:/.test(stdout));
assert.isOk(/-- table_size/.test(stdout));
assert.isOk(/SELECT/.test(stdout));
});
it('applies dry option', async () => {
const { stdout } = await testScript('--verbose --dry');
assert.isOk(/-- AUDIT: /.test(stdout));
assert.isOk(/-- QUERY:/.test(stdout));
assert.isOk(/-- RESULT SUMMARY: No Result/.test(stdout));
});
it('applies auditAge option', async () => {
const { stdout } = await testScript('--verbose --auditAge');
assert.isOk(/-- AUDIT:.*AgeAudit/.test(stdout));
});
it('auditOrphanedRows option', async () => {
const { stdout } = await testScript('--verbose --auditOrphanedRows');
assert.isOk(/OrphanedRows/.test(stdout));
});
it('applies grep option', async () => {
const { stdout } = await testScript(
'--verbose --auditAge --grep sessionTokens '
);
assert.isOk(
/-- Excluding fxa.accountCustomers.AgeAudit.createdAt/.test(stdout)
);
assert.isOk(
/-- AUDIT: fxa.sessionTokens.AgeAudit.createdAt/.test(stdout)
);
});
it('limits by sample size', async () => {
const { stdout } = await testScript(
'--verbose --auditAge --grep=sessionTokens --maxSampleSize=2 '
);
assert.isOk(/LIMIT 2/.test(stdout));
});
});
});

Просмотреть файл

@ -0,0 +1,109 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
const {
Device,
Email,
Account,
SessionToken,
SignInCodes,
} = require('fxa-shared/db/models/auth');
const { uuidTransformer } = require('fxa-shared/db/transformers');
const crypto = require('crypto');
export const toZeroBuff = (size) =>
Buffer.from(Array(size).fill(0), 'hex').toString('hex');
export const toRandomBuff = (size) =>
uuidTransformer.to(crypto.randomBytes(size).toString('hex'));
export async function clearDb() {
await Email.knexQuery().del();
await Account.knexQuery().del();
await Device.knexQuery().del();
await SessionToken.knexQuery().del();
await SignInCodes.knexQuery().del();
}
export async function connectToDb(config, log) {
const Token = require('../../../lib/tokens')(log, config);
const UnblockCode = require('../../../lib/crypto/random').base32(
config.signinUnblock.codeLength
);
const db = require('../../../lib/db')(config, log, Token, UnblockCode);
await db.connect(Object.assign({}, config, { log: { level: 'error' } }));
return db;
}
const account = (uid, email, createdAt) => ({
uid,
createdAt,
email,
emailCode: toZeroBuff(16),
normalizedEmail: email,
emailVerified: false,
verifierVersion: 1,
verifyHash: toZeroBuff(32),
authSalt: toZeroBuff(32),
kA: toZeroBuff(32),
wrapWrapKb: toZeroBuff(32),
verifierSetAt: createdAt,
locale: 'en-US',
});
const device = (uid, sessionTokenId, createdAt) => {
return {
id: toRandomBuff(16),
uid,
sessionTokenId,
refreshTokenId: null,
name: null,
type: null,
createdAt,
pushCallback: null,
pushPublicKey: null,
pushAuthKey: null,
availableCommands: null,
};
};
const sessionToken = (uid, createdAt, lastAccessTime) => ({
id: toRandomBuff(32),
data: toRandomBuff(32),
tokenVerificationId: null,
uid,
createdAt,
lastAccessTime,
location: {
city: 'pdx',
state: 'or',
stateCode: 'or',
country: 'usa',
countryCode: 'usa',
},
uaBrowser: '',
uaBrowserVersion: '',
uaOS: '',
uaOSVersion: '',
uaDeviceType: '',
uaFormFactor: '',
});
const signInCode = (uid, createdAt) => ({
hash: toRandomBuff(32),
flowid: toRandomBuff(32),
uid,
createdAt,
});
export async function scaffoldDb(uid, email, createdAt, lastAccessTime) {
const tUid = uuidTransformer.to(uid);
const token = sessionToken(tUid, createdAt, lastAccessTime);
await Account.create(account(tUid, email, createdAt));
await SessionToken.create(token);
await Device.create(device(tUid, token.id, createdAt));
await SignInCodes.knexQuery().insert(signInCode(tUid, createdAt));
}