Merge branch 'main' into mku-SparseLedger

This commit is contained in:
Amaury Chamayou 2024-04-23 19:36:13 +00:00
Родитель eeaa616dfe cd8b8dddcb
Коммит fbf6ab2cc4
23 изменённых файлов: 204 добавлений и 106 удалений

Просмотреть файл

@ -29,6 +29,7 @@ jobs:
name: ado-virtual-ccf-sub # To build CCF quickly
demands:
- WorkFolder -equals /mnt/storage
container: virtual
steps:
- checkout: self
@ -66,7 +67,6 @@ jobs:
- script: |
set -ex
sudo apt-get install -y python3.8-venv
python3.8 -m venv ./scripts/azure_deployment/.env
source ./scripts/azure_deployment/.env/bin/activate
pip install -r ./scripts/azure_deployment/requirements.txt
@ -91,8 +91,11 @@ jobs:
- job: cleanup_aci
displayName: "Cleanup ACI"
container: virtual
pool:
name: ado-virtual-ccf-sub # For access to managed identity
demands:
- WorkFolder -equals /mnt/storage
dependsOn:
- generate_ssh_key
- deploy_primary_aci

Просмотреть файл

@ -7,6 +7,7 @@ jobs:
displayName: ${{ parameters.display_name }}
dependsOn: ${{ parameters.depends_on }}
condition: ${{ parameters.condition }}
container: virtual
pool:
name: ado-virtual-ccf-sub
demands:

Просмотреть файл

@ -22,15 +22,13 @@ steps:
set -o pipefail
cd tla/
mkdir -p traces/consistency
# Passing prefix, following lines require implementing TruncateLedgerAction
# Will become mv ../build/consistency traces/ once every action matches
head -12 ../build/consistency/trace.ndjson > traces/consistency/trace.ndjson
mv ../build/consistency/trace.ndjson traces/consistency/
JSON=traces/consistency/trace.ndjson ./tlc.sh -dump dot,constrained,colorize,actionlabels traces/consistency/trace.dot -dumpTrace tla traces/consistency/trace.trace.tla -dumpTrace json traces/consistency/trace.trace.json consistency/TraceMultiNodeReads.tla
ls traces
displayName: "Run Consistency Trace Validation"
- task: PublishPipelineArtifact@1
inputs:
artifactName: "Trace Validation Output: ${{ parameters.suffix }}"
artifactName: "Trace Validation Output ${{ parameters.suffix }}"
targetPath: tla/traces
condition: or(failed(), canceled())

1
.github/workflows/tlaplus.yml поставляемый
Просмотреть файл

@ -114,6 +114,7 @@ jobs:
tlai-linter:
runs-on: ubuntu-latest
if: github.repository == 'microsoft/CCF'
env:
## https://microsoft.github.io/genaiscript/reference/token/

Просмотреть файл

@ -12,3 +12,6 @@ furo
# docutils 0.17.0 causes "AttributeError: module
# 'docutils.nodes' has no attribute 'meta'" error when building doc
docutils==0.18.*
# Required-by: sphinxcontrib-openapi
# cannot import name 'error_string' from 'docutils.io'
sphinx-mdinclude==0.5.4

Просмотреть файл

@ -7,6 +7,8 @@ import sys
import time
import os
from loguru import logger as LOG
def run(args):
with infra.network.network(
@ -28,6 +30,7 @@ def run(args):
os.makedirs(output_dir, exist_ok=True)
with open("consistency/trace.ndjson", "w") as trace_file:
LOG.info(f"Starting {' '.join(cli)} > {trace_file.name}")
tvc = subprocess.Popen(cli, stdout=trace_file)
# Do some normal transactions
time.sleep(2)
@ -38,6 +41,9 @@ def run(args):
primary.resume()
# Do some more transactions
time.sleep(5)
tvc.poll()
if tvc.returncode is not None:
raise Exception(f"tvc failed with rc {tvc.returncode}")
tvc.send_signal(subprocess.signal.SIGINT)
tvc.wait()

Просмотреть файл

@ -32,25 +32,8 @@ KEY = "0"
VALUE = "value"
class Log:
"""
A simple way to defer logging until the end of transaction cycle,
and to prepend actions if necessary, such as TruncateLedgerAction
"""
def __init__(self):
self.entries = []
def prepend(self, **kwargs):
self.entries.insert(0, kwargs)
def __call__(self, **kwargs):
self.entries.append(kwargs)
def dump(self):
for entry in self.entries:
print(json.dumps(entry))
self.entries = []
def log(**kwargs):
print(json.dumps(kwargs))
def tx_id(string):
@ -58,74 +41,72 @@ def tx_id(string):
return int(view), int(seqno)
def retry(call, urls, **kwargs):
"""
Retry http calls if they time out (process suspended during execution),
or return a non-200/204 code (unable to forward because primary unknown).
Pick a random URL, to avoid getting stuck too long on a suspended node.
"""
response = None
while response is None or response.status_code not in (200, 204):
try:
url = random.choice(urls)
response = call(url, **kwargs)
except httpx.ReadTimeout:
pass
return response
def run(targets, cacert):
transport = httpx.HTTPTransport(retries=10, verify=cacert)
session = httpx.Client(transport=transport)
tx = 0
view = 2
session = httpx.Client(verify=cacert)
tx = -1
key_urls = [f"{target}/records/{KEY}" for target in targets]
while True:
log = Log()
target = random.choice(targets)
tx += 1
# Always start with a write, to avoid having to handle missing values
txtype = random.choice(["Ro", "Rw"]) if tx else "Rw"
if txtype == "Ro":
response = session.get(f"{target}/records/{KEY}")
if response.status_code == 200:
log(action=f"{txtype}TxRequestAction", type=f"{txtype}TxRequest", tx=tx)
assert response.text == VALUE
txid = response.headers["x-ms-ccf-transaction-id"]
log(
action="RoTxResponseAction",
type="RoTxResponse",
tx=tx,
tx_id=tx_id(txid),
)
response = retry(session.get, key_urls)
log(action=f"{txtype}TxRequestAction", type=f"{txtype}TxRequest", tx=tx)
assert response.text == VALUE
txid = response.headers["x-ms-ccf-transaction-id"]
log(
action="RoTxResponseAction",
type="RoTxResponse",
tx=tx,
tx_id=tx_id(txid),
)
elif txtype == "Rw":
response = session.put(f"{target}/records/{KEY}", data=VALUE)
if response.status_code == 204:
log(action=f"{txtype}TxRequestAction", type=f"{txtype}TxRequest", tx=tx)
txid = response.headers["x-ms-ccf-transaction-id"]
log(
action="RwTxExecuteAction",
type="RwTxExecute",
view=tx_id(txid)[0],
tx=tx,
)
log(
action="RwTxResponseAction",
type="RwTxResponse",
tx=tx,
tx_id=tx_id(txid),
)
status = "Pending"
final = False
while not final:
try:
response = session.get(f"{target}/tx?transaction_id={txid}")
if response.status_code == 200:
status = response.json()["status"]
if status in ("Committed", "Invalid"):
log(
action=f"Status{status}ResponseAction",
type="TxStatusReceived",
tx_id=tx_id(txid),
status=f"{status}Status",
)
new_view, _ = tx_id(txid)
if new_view > view:
log.prepend(action="TruncateLedgerAction")
view = new_view
final = True
break
except httpx.ReadTimeout:
pass
# if our target has gone away or does not know who is primary, try another one
target = random.choice(targets)
response = retry(session.put, key_urls, data=VALUE)
log(action=f"{txtype}TxRequestAction", type=f"{txtype}TxRequest", tx=tx)
txid = response.headers["x-ms-ccf-transaction-id"]
log(
action="RwTxExecuteAction",
type="RwTxExecute",
tx_id=tx_id(txid),
tx=tx,
)
log(
action="RwTxResponseAction",
type="RwTxResponse",
tx=tx,
tx_id=tx_id(txid),
)
done = False
while not done:
tx_urls = [f"{target}/tx?transaction_id={txid}" for target in targets]
response = retry(session.get, tx_urls)
status = response.json()["status"]
if status in ("Committed", "Invalid"):
log(
action=f"Status{status}ResponseAction",
type="TxStatusReceived",
tx_id=tx_id(txid),
status=f"{status}Status",
)
done = True
else:
raise ValueError(f"Unknown Tx type: {txtype}")
log.dump()
tx += 1
if __name__ == "__main__":

Просмотреть файл

@ -21,6 +21,10 @@ TxStatuses == {
InvalidStatus
}
\* Although views start at 1 in the consistency spec, this constant allows increasing the first branch
\* that can be appended to, to enable trace validation against the implementation, where view starts at 2
CONSTANT FirstBranch
\* Views start at 1, 0 is used a null value
Views == Nat

Просмотреть файл

@ -1,6 +1,7 @@
SPECIFICATION MCSpecMultiNode
CONSTANTS
FirstBranch = 1
HistoryLimit = 6
ViewLimit = 3

Просмотреть файл

@ -2,6 +2,7 @@ SPECIFICATION MCSpecMultiNodeReads
CONSTANTS
FirstBranch = 1
HistoryLimit = 6
ViewLimit = 1

Просмотреть файл

@ -2,6 +2,7 @@ SPECIFICATION MCSpecMultiNodeReads
CONSTANTS
FirstBranch = 1
HistoryLimit = 7
ViewLimit = 2

Просмотреть файл

@ -1,6 +1,7 @@
SPECIFICATION MCSpecMultiNodeReads
CONSTANTS
FirstBranch = 1
HistoryLimit = 6
ViewLimit = 3

Просмотреть файл

@ -1,6 +1,7 @@
SPECIFICATION MCSpecMultiNodeReadsAlt
CONSTANTS
FirstBranch = 1
HistoryLimit = 11
ViewLimit = 3

Просмотреть файл

@ -2,6 +2,7 @@ SPECIFICATION MCSpecMultiNodeReads
CONSTANTS
FirstBranch = 1
HistoryLimit = 8
ViewLimit = 2

Просмотреть файл

@ -1,6 +1,7 @@
SPECIFICATION MCSpecSingleNode
CONSTANTS
FirstBranch = 1
HistoryLimit = 7
RwTxRequest = RwTxRequest

Просмотреть файл

@ -2,6 +2,7 @@ SPECIFICATION MCSpecSingleNode
CONSTANTS
FirstBranch = 1
HistoryLimit = 3
RwTxRequest = RwTxRequest

Просмотреть файл

@ -1,6 +1,7 @@
SPECIFICATION MCSpecSingleNodeReads
CONSTANTS
FirstBranch = 1
HistoryLimit = 7
RwTxRequest = RwTxRequest

Просмотреть файл

@ -13,7 +13,7 @@ ViewWithAllCommitted ==
\* Simulates leader election by rolling back some number of uncommitted transactions and updating view
TruncateLedgerAction ==
/\ \E view \in ViewWithAllCommitted:
/\ \E i \in (CommitSeqNum + 1)..Len(ledgerBranches[view]) :
/\ \E i \in CommitSeqNum..Len(ledgerBranches[view]) :
/\ ledgerBranches' = Append(ledgerBranches, SubSeq(ledgerBranches[view], 1, i))
/\ UNCHANGED history
@ -25,10 +25,17 @@ StatusInvalidResponseAction ==
/\ \/ /\ CommitSeqNum >= history[i].tx_id[2]
/\ Len(ledgerBranches[Len(ledgerBranches)]) >= history[i].tx_id[2]
/\ ledgerBranches[Len(ledgerBranches)][history[i].tx_id[2]].view # history[i].tx_id[1]
\* or commit hasn't reached seqnum but never will as current seqnum is higher
\* or commit hasn't reached seqnum but never will as current view is higher
\/ /\ CommitSeqNum > 0
/\ CommitSeqNum < history[i].tx_id[2]
/\ ledgerBranches[Len(ledgerBranches)][CommitSeqNum].view > history[i].tx_id[1]
\* or commit hasn't reached there,... but can never reach there
\* note that this effectively allows StatusInvalidResponseAction to "declare" future transactions
\* invalid, and requires a corresponding change in SingleNode::StatusCommittedResponseAction to
\* constrain future commits. This combined change is unnecessary for model checking, but greatly
\* simplifies trace validation.
\/ /\ history[i].tx_id[1] = Len(ledgerBranches)
/\ history[i].tx_id[2] > CommitSeqNum
\* Reply
/\ history' = Append(
history,[

Просмотреть файл

@ -1,6 +1,8 @@
SPECIFICATION SpecMultiNodeReads
CONSTANTS
FirstBranch = 1
RwTxRequest = RwTxRequest
RwTxResponse = RwTxResponse
RoTxRequest = RoTxRequest

Просмотреть файл

@ -18,7 +18,7 @@ LedgerTypeOK ==
\* Each ledger entry is tuple containing a view and tx
\* The ledger entry index is the sequence number
/\ ledgerBranches[view][seqnum].view \in Views
\* /\ ledgerBranches[view][seqnum].tx \in Txs
/\ "tx" \in DOMAIN ledgerBranches[view][seqnum] => ledgerBranches[view][seqnum].tx \in Txs
\* In this abstract version of CCF's consensus layer, each ledger is append-only
LedgersMonoProp ==
@ -32,7 +32,7 @@ TypeOK ==
Init ==
/\ history = <<>>
/\ ledgerBranches = [ x \in {1} |-> <<>>]
/\ ledgerBranches = [ x \in 1..FirstBranch |-> <<>>]
IndexOfLastRequested ==
SelectLastInSeq(history, LAMBDA e : e.type \in {RwTxRequest, RoTxRequest})
@ -60,7 +60,7 @@ RwTxExecuteAction ==
=> history[i].tx /= ledgerBranches[view][seqnum].tx
\* Note that a transaction can be added to any ledger, simulating the fact
\* that it can be picked up by the current leader or any former leader
/\ \E view \in DOMAIN ledgerBranches:
/\ \E view \in FirstBranch..Len(ledgerBranches):
ledgerBranches' = [ledgerBranches EXCEPT ![view] =
@ @@ (Max(DOMAIN ledgerBranches[view] \cup {0})+1) :> [view |-> view, tx |-> history[i].tx]]
/\ UNCHANGED history
@ -78,7 +78,7 @@ RwTxResponseAction ==
/\ j > i
/\ history[j].type = RwTxResponse
/\ history[j].tx = history[i].tx} = {}
/\ \E view \in DOMAIN ledgerBranches:
/\ \E view \in FirstBranch..Len(ledgerBranches):
/\ \E seqnum \in DOMAIN ledgerBranches[view]:
/\ "tx" \in DOMAIN ledgerBranches[view][seqnum]
/\ history[i].tx = ledgerBranches[view][seqnum].tx
@ -94,21 +94,33 @@ RwTxResponseAction ==
\* Note that a request could only be committed if it's in the highest view's ledger
StatusCommittedResponseAction ==
/\ \E i \in DOMAIN history :
/\ history[i].type = RwTxResponse
/\ Max(DOMAIN ledgerBranches[Len(ledgerBranches)]) >= history[i].tx_id[2]
/\ ledgerBranches[Len(ledgerBranches)][history[i].tx_id[2]].view = history[i].tx_id[1]
\* Reply
/\ history' = Append(
history,[
LET view == history[i].tx_id[1]
seqno == history[i].tx_id[2]
IN /\ history[i].type = RwTxResponse
/\ Max(DOMAIN ledgerBranches[Len(ledgerBranches)] \cup {0}) >= seqno
/\ ledgerBranches[Len(ledgerBranches)][history[i].tx_id[2]].view = view
\* There is no future InvalidStatus that's incompatible with this commit
\* This is to accomodate StatusInvalidResponseAction making future commits invalid,
\* and is an unnecessary complication for model checking. It does facilitate trace
\* validation though, by allowing immediate processing of Invalids without
\* needing to wait for the commit history knowledge to catch up.
/\ \lnot \E j \in DOMAIN history:
/\ history[j].type = TxStatusReceived
/\ history[j].status = InvalidStatus
/\ history[j].tx_id[1] = view
/\ history[j].tx_id[2] <= seqno
\* Reply
/\ history' = Append(
history,[
type |-> TxStatusReceived,
tx_id |-> history[i].tx_id,
status |-> CommittedStatus]
)
)
/\ UNCHANGED ledgerBranches
\* Append a transaction to the ledger which does not impact the state we are considering
AppendOtherTxnAction ==
/\ \E view \in DOMAIN ledgerBranches:
/\ \E view \in (DOMAIN ledgerBranches) \ 0..(FirstBranch-1):
ledgerBranches' = [ledgerBranches EXCEPT ![view] = @ @@ (Max(DOMAIN ledgerBranches[view] \cup {0})+1) :> [view |-> view] ]
/\ UNCHANGED history

Просмотреть файл

@ -22,7 +22,7 @@ RoTxResponseAction ==
/\ j > i
/\ history[j].type = RoTxResponse
/\ history[j].tx = history[i].tx} = {}
/\ \E view \in DOMAIN ledgerBranches:
/\ \E view \in (DOMAIN ledgerBranches) \ 1..(FirstBranch-1):
/\ ledgerBranches[view] # <<>>
/\ history' = Append(
history,[

Просмотреть файл

@ -38,6 +38,9 @@ CONSTRAINT
Termination
CONSTANTS
\* View starts at 2 in the implementation
FirstBranch = 2
RwTxRequest = RwTxRequest
RwTxResponse = RwTxResponse
RoTxRequest = RoTxRequest

Просмотреть файл

@ -4,11 +4,12 @@ EXTENDS MultiNodeReads, Json, IOUtils, Sequences, SequencesExt
\* Trace validation has been designed for TLC running in default model-checking
\* mode, i.e., breadth-first search.
\* The property TraceMatched will be violated if TLC runs with more than a single worker.
ASSUME TLCGet("config").mode = "bfs" /\ TLCGet("config").worker = 1
\* TLC register 0 is used to hold the count of line numbers processed, so it can be checked
\* in the post-condition, which can only include constants and not state variables such as l.
ASSUME TLCGet("config").mode = "bfs" /\ TLCGet("config").worker = 1 /\ TLCSet(0, 0)
\* Note the extra /../ necessary to run in the VSCode extension but not a happy CLI default
JsonFile ==
IF "JSON" \in DOMAIN IOEnv THEN IOEnv.JSON ELSE "../../build/consistency/trace.ndjson"
IF "JSON" \in DOMAIN IOEnv THEN IOEnv.JSON ELSE "trace.ndjson"
JsonLog ==
\* Deserialize the System log as a sequence of records from the log file.
@ -38,55 +39,120 @@ ToStatus ==
"CommittedStatus" :> CommittedStatus @@
"InvalidStatus" :> InvalidStatus
\* Beware to only prime e.g. inbox in inbox'[rcv] and *not* also rcv, i.e.,
\* inbox[rcv]'. rcv is defined in terms of TLCGet("level") that correctly
\* handles priming, which causes rcv' to equal rcv of the next log line.
IsEvent(e) ==
\* Equals FALSE if we get past the end of the log, causing model checking to stop.
/\ l \in 1..Len(JsonLog)
/\ logline.action = e
/\ l' = l + 1
/\ TLCSet(0, Max({l', TLCGet(0)}))
IsRwTxRequestAction ==
/\ IsEvent("RwTxRequestAction")
\* Model action
/\ RwTxRequestAction
\* Match message contents
/\ Last(history').type = ToTxType[logline.type]
/\ Last(history').tx = logline.tx
IsRwTxExecuteAction ==
/\ IsEvent("RwTxExecuteAction")
\* Model action
/\ RwTxExecuteAction
\* Match message contents
/\ Last(history').tx = logline.tx
\* RwTxExecuteAction can only take place if a branch exists for the view
\* If there is no branch, BackfillLedgerBranches will create the right amount of branches
/\ Len(ledgerBranches) >= logline.tx_id[1]
\* That branch contains just the right amount of transactions (seqno - 1)
\* If that's not the case, BackfillLedgerBranche will create the right amount of txs
/\ Len(ledgerBranches[logline.tx_id[1]]) = logline.tx_id[2] - 1
IsRwTxResponseAction ==
/\ IsEvent("RwTxResponseAction")
\* Model action
/\ RwTxResponseAction
\* Match message contents
/\ Last(history').type = ToTxType[logline.type]
/\ Last(history').tx = logline.tx
/\ Last(history').tx_id = logline.tx_id
IsStatusCommittedResponseAction ==
/\ IsEvent("StatusCommittedResponseAction")
\* Model action
/\ StatusCommittedResponseAction
\* Match message contents
/\ Last(history').type = ToTxType[logline.type]
/\ Last(history').status = ToStatus[logline.status]
/\ Last(history').tx_id = logline.tx_id
IsRoTxRequestAction ==
/\ IsEvent("RoTxRequestAction")
\* Model action
/\ RoTxRequestAction
\* Match message contents
/\ Last(history').type = ToTxType[logline.type]
/\ Last(history').tx = logline.tx
IsRoTxResponseAction ==
/\ IsEvent("RoTxResponseAction")
\* Model action
/\ RoTxResponseAction
\* Match message contents
/\ Last(history').type = ToTxType[logline.type]
/\ Last(history').tx = logline.tx
\* RoTxResponseAction can only take place if a branch exists for the view
\* If there is no branch, BackfillLedgerBranches will create the right amount of branches
/\ Len(ledgerBranches) >= logline.tx_id[1]
\* That branch contains just the right amount of transactions (seqno - 1)
\* If that's not the case, BackfillLedgerBranche will create the right amount of txs
/\ Len(ledgerBranches[logline.tx_id[1]]) = logline.tx_id[2] - 1
IsStatusInvalidResponseAction ==
/\ IsEvent("StatusInvalidResponseAction")
\* Model action
/\ StatusInvalidResponseAction
\* Match message contents
/\ Last(history').type = ToTxType[logline.type]
/\ Last(history').status = ToStatus[logline.status]
/\ Last(history').tx_id = logline.tx_id
\* Matches an event, but without advancing the log line. Useful to apply changes
\* before an event can be handled, for example backfilling the ledger branch, or
\* creating new ledger branches.
PreEvent(e) ==
/\ l' = l
/\ l \in 1..Len(JsonLog)
/\ logline.action = e
BackfillLedgerBranch ==
/\
\/ PreEvent("RwTxExecuteAction")
\* There is no separate RoTxExecuteAction, but conceptually,
\* we would backfill before it as well. Instead we do before the
\* RoTxResponseAction, which is the earliest possible opportunity.
\/ PreEvent("RoTxResponseAction")
\* Similar to AppendOtherTxnAction, but only append to the specific branch
\* necessary to enable the next transaction to execute.
/\ LET view == logline.tx_id[1]
seqno == logline.tx_id[2]
IN /\ Len(ledgerBranches) >= view
/\ Len(ledgerBranches[view]) < seqno - 1
/\ ledgerBranches' = [ledgerBranches EXCEPT ![view] = Append(@, [view |-> view])]
/\ UNCHANGED history
BackfillLedgerBranches ==
/\
\/ PreEvent("RwTxExecuteAction")
\* There is no separate RoTxExecuteAction, but conceptually,
\* we would backfill before it as well. Instead we do before the
\* RoTxResponseAction, which is the earliest possible opportunity.
\/ PreEvent("RoTxResponseAction")
\* Similar to TruncateLedgerAction, but only advances the view
/\ LET view == logline.tx_id[1]
seqno == logline.tx_id[2]
IN /\ Len(ledgerBranches) < view
/\ ledgerBranches' = Append(ledgerBranches, Last(ledgerBranches))
/\ UNCHANGED history
TraceNext ==
\/ IsRwTxRequestAction
@ -96,6 +162,8 @@ TraceNext ==
\/ IsRoTxRequestAction
\/ IsRoTxResponseAction
\/ IsStatusInvalidResponseAction
\/ BackfillLedgerBranch
\/ BackfillLedgerBranches
TraceSpec ==
TraceInit /\ [][TraceNext]_<<l, vars>>
@ -119,7 +187,7 @@ TraceMatched ==
TraceMatchedNonTrivially ==
\* If, e.g., the FALSE state constraint excludes all states, TraceMatched won't be violated.
TLCGet("stats").diameter >= Len(JsonLog)
TLCGet(0) >= Len(JsonLog)
MNR == INSTANCE MultiNodeReads