зеркало из https://github.com/microsoft/FASTER.git
Merge pull request #78 from Microsoft/gunaprsd-patch-2
Fixed index checkpointing state machine
This commit is contained in:
Коммит
924cc88b75
73
cs/README.md
73
cs/README.md
|
@ -140,8 +140,77 @@ Several example projects are located in [cs/playground](https://github.com/Micro
|
|||
|
||||
## Checkpointing and Recovery
|
||||
|
||||
FASTER supports on-demand checkpoint-based recovery, using a new concept called Concurrent Prefix Recovery (CPR). You can read more about CPR [here](https://www.microsoft.com/en-us/research/uploads/prod/2019/01/cpr-sigmod19.pdf). We briefly describe how to use the checkpoint/recovery functionality below.
|
||||
FASTER supports **checkpoint-based recovery**. Every new checkpoint persists (or makes durable) additional user-operations (Read, Upsert or RMW). FASTER allows client threads to keep track of operations that have persisted and those that have not using a session-based API.
|
||||
|
||||
Recall that each FASTER thread starts a session, associated with a unique Guid. All FASTER thread operations (Read, Upsert, RMW) carry a monotonic sequence number. At any point in time, one may call `Checkpoint` to initiate an asynchronous checkpoint of FASTER. After calling `Checkpoint`, each FASTER thread is (eventually) notified of a sequence number, such that all operations until, and no operations after, that sequence number, are guaranteed to be persisted as part of that checkpoint. During recovery, threads continue their session with the same Guid using `ContinueSession`, and are provided the thread-local sequence number until which that session has been recovered. The new thread may use this information to replay all uncommitted operations since that point.
|
||||
Recall that each FASTER threads starts a session, associated with a unique Guid.
|
||||
All FASTER thread operations (Read, Upsert, RMW) carry a monotonic sequence number.
|
||||
At any point in time, one may call `Checkpoint` to initiate an asynchronous checkpoint of FASTER.
|
||||
After calling `Checkpoint`, each FASTER thread is (eventually) notified of a sequence number, such that all operations until, and no operations after, that sequence number, are guaranteed to be persisted as part of that checkpoint.
|
||||
This sequence number can be used by the FASTER thread to clear any in-memory buffer of operations waiting to be performed.
|
||||
|
||||
During recovery, threads can continue their session with the same Guid using `ContinueSession`. The function returns the thread-local sequence number until which that session hash been recovered. The new thread may use this information to replay all uncommitted operations since that point.
|
||||
|
||||
Below, we show a simple recovery example for for a single thread.
|
||||
```Csharp
|
||||
public class RecoveryExample
|
||||
{
|
||||
static FasterKV<long, long, long, long, Empty, Funcs> fht;
|
||||
static IDevice log;
|
||||
|
||||
static void Initialize()
|
||||
{
|
||||
var log = Devices.CreateLogDevice("C:\\Temp\\hlog.log");
|
||||
var fht = new FasterKV<long, long, long, long, Empty, Funcs>
|
||||
(1L << 20, new Funcs(), new LogSettings { LogDevice = log });
|
||||
}
|
||||
|
||||
static void Run()
|
||||
{
|
||||
Initialize();
|
||||
RunSession();
|
||||
}
|
||||
|
||||
static void RecoverAndContinue()
|
||||
{
|
||||
Initialize();
|
||||
string[] lines = System.IO.FileRead(@"C:\\Temp\latestCheckpoint.txt");
|
||||
Guid checkpointGuid = Guid.Parse(lines[0]);
|
||||
fht.Recover(checkpointGuid);
|
||||
ContinueSession();
|
||||
}
|
||||
|
||||
/* Helper Functions */
|
||||
static void RunSession()
|
||||
{
|
||||
Guid guid = fht.StartSession();
|
||||
System.IO.File.WriteAllText(@"C:\\Temp\\session1.txt", guid.ToString());
|
||||
long key = 1, value = 1, input = 10, output = 0, seq = 0;
|
||||
while(true) {
|
||||
for(long key = 1; key < 1L << 20; key++; seq++) {
|
||||
fht.RMW(ref key, ref input, Empty.Default, seq);
|
||||
}
|
||||
fht.TakeFullCheckpoint(out Guid checkpointguid);
|
||||
System.IO.File.WriteAllText(@"C:\\Temp\\latestCheckpoint.txt", checkpointGuid.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
static void ContinueSession()
|
||||
{
|
||||
string[] lines = System.IO.FileRead(@"C:\\Temp\session1.txt");
|
||||
Guid sessionGuid = Guid.Parse(lines[0]);
|
||||
long seq = fht.ContinueSession(sessionGuid);
|
||||
long key = 1, value = 1, input = 10, output = 0;
|
||||
|
||||
while(true) {
|
||||
key = seq % (1L << 20);
|
||||
for(key = 1; key < 1L << 20; key++; seq++) {
|
||||
fht.RMW(ref key, ref input, Empty.Default, seq);
|
||||
}
|
||||
fht.TakeFullCheckpoint(out Guid checkpointguid);
|
||||
System.IO.File.WriteAllText(@"C:\\Temp\\latestCheckpoint.txt", checkpointGuid.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
FASTER supports two notions of checkpointing: Snapshot and Fold-Over. The former is a full snapshot of in-memory into a separate snapshot file, whereas the latter is an _incremental_ checkpoint of the changes since the last checkpoint. Fold-Over effectively moves the read-only marker of the hybrid log to the tail, and thus all the data is persisted as part of the same hybrid log (there is no separate snapshot file). All subsequent updates are written to new hybrid log tail locations, which gives Fold-Over its incremental nature. You can find a few basic checkpointing examples [here](https://github.com/Microsoft/FASTER/blob/master/cs/test/SimpleRecoveryTest.cs) and [here](https://github.com/Microsoft/FASTER/tree/master/cs/playground/SumStore). We plan to add more examples and details going forward.
|
||||
|
|
|
@ -631,6 +631,25 @@ namespace FASTER.core
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We have several state machines supported by this function.
|
||||
* Full Checkpoint:
|
||||
* REST -> PREP_INDEX_CHECKPOINT -> PREPARE -> IN_PROGRESS
|
||||
* -> WAIT_PENDING -> WAIT_FLUSH -> PERSISTENCE_CALLBACK -> REST
|
||||
*
|
||||
* Index Checkpoint:
|
||||
* REST -> PREP_INDEX_CHECKPOINT -> INDEX_CHECKPOINT -> REST
|
||||
*
|
||||
* Hybrid Log Checkpoint:
|
||||
* REST -> PREPARE -> IN_PROGRESS -> WAIT_PENDING -> WAIT_FLUSH ->
|
||||
* -> PERSISTENCE_CALLBACK -> REST
|
||||
*
|
||||
* Grow :
|
||||
* REST -> PREPARE_GROW -> IN_PROGRESS_GROW -> REST
|
||||
*
|
||||
* GC:
|
||||
* REST -> GC -> REST
|
||||
*/
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private SystemState GetNextState(SystemState start, CheckpointType type = CheckpointType.FULL)
|
||||
{
|
||||
|
@ -663,8 +682,16 @@ namespace FASTER.core
|
|||
}
|
||||
break;
|
||||
case Phase.INDEX_CHECKPOINT:
|
||||
switch(type)
|
||||
{
|
||||
case CheckpointType.INDEX_ONLY:
|
||||
nextState.phase = Phase.REST;
|
||||
break;
|
||||
case CheckpointType.FULL:
|
||||
nextState.phase = Phase.PREPARE;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Phase.PREPARE:
|
||||
nextState.phase = Phase.IN_PROGRESS;
|
||||
nextState.version = start.version + 1;
|
||||
|
|
Загрузка…
Ссылка в новой задаче