diff --git a/Makefile b/Makefile index b2ca7c37860a96215e6c35d552e963f5214df1ae..713de3dc3fa1cc245972b709900e844284832228 100644 --- a/Makefile +++ b/Makefile @@ -132,7 +132,6 @@ include store/weakstore/Rules.mk include store/benchmark/Rules.mk include lockserver/Rules.mk include timeserver/Rules.mk -include test-client/Rules.mk include libtapir/Rules.mk ################################################################## # General rules diff --git a/lockserver/client.cc b/lockserver/client.cc index ccce2d61881ef628a6a213af6f3b2faa4eb477f7..45e2266786bf75350569bc8d09f1b424f8971d95 100644 --- a/lockserver/client.cc +++ b/lockserver/client.cc @@ -30,6 +30,17 @@ #include "lockserver/client.h" +namespace { + +void +usage() +{ + printf("Unknown command.. Try again!\n"); + printf("Usage: exit | q | lock <key> | unlock <key>\n"); +} + +} // namespace + int main(int argc, char **argv) { @@ -68,14 +79,18 @@ main(int argc, char **argv) cmd[clen++] = c; cmd[clen] = '\0'; - if (clen == 0) continue; tok = strtok(cmd, " ,.-"); + if (tok == NULL) continue; if (strcasecmp(tok, "exit") == 0 || strcasecmp(tok, "q") == 0) { printf("Exiting..\n"); break; } else if (strcasecmp(tok, "lock") == 0) { tok = strtok(NULL, " ,.-"); + if (tok == NULL) { + usage(); + continue; + } key = string(tok); status = locker.lock(key); @@ -86,11 +101,15 @@ main(int argc, char **argv) } } else if (strcasecmp(tok, "unlock") == 0) { tok = strtok(NULL, " ,.-"); + if (tok == NULL) { + usage(); + continue; + } key = string(tok); locker.unlock(key); printf("Unlock Successful\n"); } else { - printf("Unknown command.. Try again!\n"); + usage(); } fflush(stdout); } diff --git a/proof/IR.tla b/proof/IR.tla new file mode 100644 index 0000000000000000000000000000000000000000..f6d5afd3c770029d91e7143da8e7c9fd009743d0 --- /dev/null +++ b/proof/IR.tla @@ -0,0 +1,526 @@ +--------------------------------- MODULE IR --------------------------------- +(***************************************************************************) +(* This is a TLA+ specification of the Inconsistent Replication algorithm. *) +(* (And a mechanically-checked proof of its correctness using TLAPS) *) +(***************************************************************************) +EXTENDS FiniteSets, Naturals, TLC, TLAPS + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Constants}} ^' *) +(***************************************************************************) + +(***************************************************************************) +(* Constant parameters: *) +(* Replicas: the set of all replicas (Replica IDs) *) +(* Clients: the set of all clients (Client IDs) *) +(* Quorums: the set of all quorums *) +(* SuperQuorums: the set of all super quorums *) +(* Results: the set of all possible result types *) +(* OperationBody: the set of all possible operation bodies *) +(* (with arguments, etc. - can be infinite) *) +(* *) +(* f: maximum number of failures allowed (half of n) *) +(* *) +(* Constants used to bound variables, for model checking (Nat is bounded) *) +(* max_vc: maximum number of View-Changes allowed for each replicas *) +(* max_req: maximum number of op requests performed by clients *) +(***************************************************************************) + +CONSTANTS Replicas, Clients, Quorums, SuperQuorums, Results, OperationBody, + max_vc, max_req, f + +ASSUME IsFiniteSet(Replicas) + +(***************************************************************************) +(* The possible states of a replica and the two types of operations *) +(* currently defined by IR. *) +(***************************************************************************) + +ReplicaState == {"NORMAL", "FAILED", "RECOVERING", "VIEW-CHANGING"} +ClientState == {"NORMAL", "FAILED"} +OperationType == {"Inconsistent", "Consensus"} + +(***************************************************************************) +(* Definition of operation space *) +(***************************************************************************) + +MessageId == Clients \X Nat +Operations == OperationType \X OperationBody + +(***************************************************************************) +(* Message is defined to be the set of all possible messages *) +(***************************************************************************) + +\* TODO: Assumptions +\* Assume unique message ids +\* Assume no more than f replica failures + +Message == + [type: {"REQUEST"}, + id: MessageId, + op: Operations] + \cup + [type: {"REPLY"}, + id: MessageId, + v: Nat, + res: Results, + src: Replicas] + \* v = view num. + \cup + [type: {"START-VIEW-CHANGE"}, + v: Nat, + src: Replicas] + \cup + [type: {"DO-VIEW-CHANGE"}, + r: SUBSET (MessageId \X Operations \X Results + \cup MessageId \X Operations), + v: Nat, + src: Replicas, + dst: Replicas] + \cup + [type: {"START-VIEW"}, + v: Nat, + src: Replicas] + \cup + [type: {"START-VIEW-REPLY"}, + v: Nat, + src: Replicas, + dst: Replicas] + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Variables and State Predicates}} ^' *) +(***************************************************************************) + +(***************************************************************************) +(* Variables: *) +(* 1. State at each replica: *) +(* rState = Denotes current replica state. Either: *) +(* - NORMAL (processing operations) *) +(* - VIEW-CHANGING (participating in recovery) *) +(* rRecord = Unordered set of operations and their results *) +(* rViewNumber = current view number *) +(* 2. State of communication medium: *) +(* sentMsg = sent (but not yet received) messages *) +(* 3. State at client: *) +(* cCurrentOperation = crt operation requested by the client *) +(* cMmessageCounter = the message I must use for *) +(* the next operation *) +(* *) +(***************************************************************************) + +VARIABLES rState, rRecord, rViewNumber, rViewReplies, sentMsg, cCrtOp, + cMsgCounter, cCrtOpReplies, cState, aSuccessful, aFinalized, + gViewChangesNo + +(***************************************************************************) +(* Defining these tuples makes it easier to express which varibles remain *) +(* unchanged *) +(***************************************************************************) + +rVars == <<rState, rRecord, rViewNumber, rViewReplies>>\* Replica variables. +cVars == <<cCrtOp, \* current operation at a client + cCrtOpReplies, \* current operation replies + cMsgCounter, + cState>> \* Client variables. +aVars == <<aSuccessful, aFinalized>> \* Application variables +oVars == <<sentMsg, gViewChangesNo>> \* Other variables. +vars == <<rVars, cVars, oVars>> \* All variables. + +TypeOK == + /\ rState \in [Replicas -> ReplicaState] + /\ rRecord \in [Replicas -> SUBSET (MessageId \X + Operations \X + Results + \cup MessageId \X + Operations)] + /\ rViewNumber \in [Replicas -> Nat] + /\ rViewReplies \in [Replicas -> SUBSET [type: {"do-view-change", + "start-view-reply"}, + viewNumber: Nat, + r: SUBSET (MessageId \X + Operations \X + Results + \cup MessageId \X + Operations), + src: Replicas]] + /\ sentMsg \in SUBSET Message + /\ cCrtOp \in [Clients -> Operations \cup {<<>>}] + /\ cCrtOpReplies \in [Clients -> SUBSET [viewNumber: Nat, + res: Results, + src: Replicas]] + /\ cMsgCounter \in [Clients -> Nat] + /\ cState \in [Clients -> ClientState] + /\ aSuccessful \in SUBSET (MessageId \X + Operations \X + Results + \cup MessageId \X + Operations) + /\ aFinalized \in SUBSET (MessageId \X + Operations \X + Results) + /\ gViewChangesNo \in Nat + +Init == + /\ rState = [r \in Replicas |-> "NORMAL"] + /\ rRecord = [r \in Replicas |-> {}] + /\ rViewNumber = [r \in Replicas |-> 0] + /\ rViewReplies = [r \in Replicas |-> {}] + /\ sentMsg = {} + /\ cCrtOp = [c \in Clients |-> <<>>] + /\ cCrtOpReplies = [c \in Clients |-> {}] + /\ cMsgCounter = [c \in Clients |-> 0] + /\ cState = [c \in Clients |-> "NORMAL"] + /\ aSuccessful = {} + /\ aFinalized = {} + /\ gViewChangesNo = 0 + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Actions}} ^' *) +(***************************************************************************) + +Send(m) == sentMsg' = sentMsg \cup {m} +Receive(m) == sentMsg' = sentMsg \ {m} +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Client Actions}} ^' *) +(***************************************************************************) + +\* Note: CHOOSE does not introduce nondeterminism (the same value is chosen +\* each time) + +\*Client sends a request +ClientRequest(c) == + \E opType \in OperationType: \E opBody \in OperationBody: + /\ cCrtOp[c] = <<>> \*\* the client is not waiting for a result + \*\* of another operation + /\ Send([type |-> "REQUEST", id |-> <<c, cMsgCounter[c]>>, + op |-> <<opType, opBody>> ]) + /\ cCrtOp' = [cCrtOp EXCEPT ![c] = <<opType, opBody>>] + /\ cMsgCounter' = [cMsgCounter EXCEPT ![c] = cMsgCounter[c] + 1] + /\ UNCHANGED <<rVars, aVars, cCrtOpReplies, cState, gViewChangesNo>> + /\ cMsgCounter[c] < max_req \* BOUND the number of requests a client can make + +\*Client received a reply +ClientReceiveReply(c) == + \E msg \in sentMsg: + /\ msg.type = "REPLY" + /\ cCrtOp[c] # <<>> + /\ msg.id = <<c, cMsgCounter[c] - 1>> \* reply to c's request for crt op + \* TODO: if already reply from src, keep the most recent one (biggest view Number) + \* /\ Assert(Cardinality(cCrtOpReplies[c])< 10, "cCrtOpReplies cardinality bound") + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![c] = @ \cup + {[viewNumber |-> msg.v, + res |-> msg.res, + src |-> msg.src]}] + /\ UNCHANGED <<cCrtOp, cMsgCounter, cState, rVars, aVars, oVars>> + +\*An operation is successful at a client and result returned to the application +ClientSuccessfulOp(c) == + /\ cCrtOp[c] /= <<>> + /\ \E Q \in Quorums: + \* a quorum of replies with matching view numbers + /\ \A r \in Q: + /\ \E reply \in cCrtOpReplies[c]: reply.src = r + /\ \A p \in Q: \E rr, pr \in cCrtOpReplies[c]: + /\ rr.src = r + /\ pr.src = p + /\ rr.viewNumber = pr.viewNumber + /\ \/ /\ cCrtOp[c][1] = "Inconsistent" + /\ cCrtOp' = [cCrtOp EXCEPT ![c] = <<>>] + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![c] = {}] + /\ aSuccessful' = aSuccessful \cup + {<<<<c, cMsgCounter[c] - 1>>, + cCrtOp[c]>>} + /\ UNCHANGED <<cMsgCounter, cState>> + \/ /\ cCrtOp[c][1] = "Consensus" + /\ \A r, p \in Q: \E rr, pr \in cCrtOpReplies[c]: + /\ rr.src = r + /\ pr.src = p + /\ rr.res = pr.res + /\ \E reply \in cCrtOpReplies[c]: + /\ reply.src \in Q + /\ aSuccessful' = aSuccessful \cup + {<<<<c, cMsgCounter[c] - 1>>, + cCrtOp[c], + reply.res>>} + /\ UNCHANGED <<cVars>> + /\ UNCHANGED <<rVars, aFinalized, oVars>> + +\*An operation is finalized by a client and result returned to the application +ClientFinalizedOp(c) == + /\ cCrtOp[c] /= <<>> /\ cCrtOp[c][1] = "Consensus" + /\ \E Q \in SuperQuorums: + \* a superquorum of replies with matching view numbers and results + /\ \A r \in Q: + /\ \E reply \in cCrtOpReplies[c]: reply.src = r + /\ \A p \in Q: \E rr, pr \in cCrtOpReplies[c]: + /\ rr.src = r + /\ pr.src = p + /\ rr.viewNumber = pr.viewNumber + /\ rr.res = pr.res + /\ \E reply \in cCrtOpReplies[c]: + /\ reply.src \in Q + /\ aFinalized' = aFinalized \cup + {<<<<c, cMsgCounter[c] - 1>>, + cCrtOp[c], + reply.res >>} + /\ cCrtOp' = [cCrtOp EXCEPT ![c] = <<>>] + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![c] = {}] + /\ UNCHANGED <<rVars, cMsgCounter, cState, aSuccessful, oVars>> + +\*Client fails and looses all data +ClientFail(c) == + /\ cState' = [cState EXCEPT ![c] = "FAILED"] + /\ cMsgCounter' = [cMsgCounter EXCEPT ![c] = 0] + /\ cCrtOp' = [cCrtOp EXCEPT ![c] = <<>>] + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![c] = {}] + /\ UNCHANGED <<rVars, aVars, oVars>> + +\*Client recovers +ClientRecover(c) == FALSE + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Replica Actions}} ^' *) +(***************************************************************************) + +\* Replica sends a reply +ReplicaReply(r) == \* TODO: Might need to check for duplicate state action? + \E op \in Operations, id \in MessageId, result \in Results: + /\ [type |-> "REQUEST", id |-> id, op |-> op] \in sentMsg + /\ ~ \E rec \in rRecord[r]: rec[1] = id + \* not alredy replied for this op + /\ rRecord' = [rRecord EXCEPT ![r] = @ \cup {<<id, op, result>>}] + /\ Assert(rViewNumber[r] < 10, "viewNumber bound") + /\ Send([type |-> "REPLY", + id |-> id, + v |-> rViewNumber[r], + res |-> result, + src |-> r]) + /\ UNCHANGED <<rState, rViewNumber, rViewReplies, cVars, aVars, gViewChangesNo>> + +\*A replica starts the view change procedure +\* supports concurrent view changes (id by src) +ReplicaStartViewChange(r) == + /\ Send([type |-> "START-VIEW-CHANGE", v |-> rViewNumber[r], src |-> r]) + /\ rState' = [rState EXCEPT ![r] = "RECOVERING"] + /\ UNCHANGED <<rViewNumber, rViewReplies, rRecord, cVars, aVars>> + /\ gViewChangesNo < max_vc \* BOUND on number of view changes + /\ gViewChangesNo' = gViewChangesNo + 1 + +\* A replica received a message to start view change +ReplicaReceiveStartViewChange(r) == + /\ \E msg \in sentMsg: + /\ msg.type = "START-VIEW-CHANGE" + /\ LET v_new == + IF msg.v > rViewNumber[r] THEN msg.v + ELSE rViewNumber[r] + IN + /\ ~\E m \in sentMsg: \* not already sent (just to bound the model checker) + /\ m.type = "DO-VIEW-CHANGE" + /\ m.v >= msg.v + /\ m.dst = msg.src + /\ m.src = r + /\ Send([type |-> "DO-VIEW-CHANGE", + v |-> v_new + 1, + r |-> rRecord[r], + src |-> r, + dst |-> msg.src]) + /\ rViewNumber' = [rViewNumber EXCEPT ![r] = v_new + 1] + /\ rState' = [rState EXCEPT ![r] = "VIEW-CHANGING"] + /\ UNCHANGED <<cVars, rRecord, rViewReplies, aVars, gViewChangesNo>> + +\* Replica received DO-VIEW-CHANGE message +ReplicaReceiveDoViewChange(r) == + \*/\ Assert(Cardinality(sentMsg) < 20, "bound on sentMsg") + /\ \E msg \in sentMsg: + /\ msg.type = "DO-VIEW-CHANGE" + /\ msg.dst = r + /\ msg.v > rViewNumber[r] + /\ rViewReplies' = [rViewReplies EXCEPT ![r] = @ \cup + {[type |-> "do-view-change", + viewNumber |-> msg.v, + r |-> msg.r, + src |-> msg.src]}] + /\ UNCHANGED <<cVars, rViewNumber, rRecord, rState, aVars, oVars>> + +RecoverOpsResults(ops) == + \*/\ Assert(Cardinality(ops) < 3, "recovered ops result") + /\ TRUE +RecoverOps(ops) == + \*/\ Assert(Cardinality(ops) < 3, "recovered ops") + /\ TRUE + +\* A replica received enough view change replies to start processing in the new view +ReplicaDecideNewView(r) == + /\ \E Q \in Quorums: + /\ \A rep \in Q: \E reply \in rViewReplies[r]: /\ reply.src = rep + /\ reply.type = "do-view-change" + \* received at least a quorum of replies + /\ LET recoveredConensusOps_a == + \* any consensus operation found in at least a majority of a Quorum + {x \in UNION {y.r: y \in {z \in rViewReplies[r]: z.src \in Q}}: + /\ x[2][1] = "Consensus" + /\ \E P \in SuperQuorums: + \A rep \in Q \cap P: + \E reply \in rViewReplies[r]: + /\ reply.src = rep + /\ x \in reply.r} \* same op, same result + + recoveredConensusOps_b == \* TODO: what result? from the app? + \* the rest of consensus ops found in at least one record (discard the result) + {<<z[1], z[2]>>: + z \in {x \in UNION {y.r: y \in {z \in rViewReplies[r]: z.src \in Q}}: + /\ x[2][1] = "Consensus" + /\ ~ x \in recoveredConensusOps_a}} + + recoveredInconsistentOps_c == + \* any inconsistent operation found in any received record (discard the result) + {<<z[1], z[2]>>: + z \in {x \in UNION {y.r: y \in {z \in rViewReplies[r]: z.src \in Q}}: + x[2][1] = "Inconsistent"}} + IN + /\ RecoverOpsResults(recoveredConensusOps_a) + /\ RecoverOps(recoveredConensusOps_b) + /\ RecoverOps(recoveredInconsistentOps_c) + /\ rRecord' = [rRecord EXCEPT ![r] = @ \cup recoveredConensusOps_a + \cup recoveredConensusOps_b + \cup recoveredInconsistentOps_c] + /\ LET v_new == + \* max view number received + CHOOSE v \in {x.viewNumber: x \in rViewReplies[r]}: + \A y \in rViewReplies[r]: + y.viewNumber <= v + IN + /\ Send([type |-> "START-VIEW", + v |-> v_new, + src |-> r]) + /\ rViewNumber' = [rViewNumber EXCEPT ![r] = v_new] + /\ rViewReplies' = [rViewReplies EXCEPT ![r] = {}] + /\ UNCHANGED <<rState, cVars, aVars, gViewChangesNo>> + +\*A replica receives a start view message +ReplicaReceiveStartView(r) == + /\ \E msg \in sentMsg: + /\ msg.type = "START-VIEW" + /\ msg.v >= rViewNumber[r] + /\ msg.src # r \* don't reply to myself + /\ Send([type |-> "START-VIEW-REPLY", + v |-> msg.v, + src |-> r, + dst |-> msg.src]) + /\ rViewNumber' = [rViewNumber EXCEPT ![r] = msg.v] + /\ rState' = [rState EXCEPT ![r] = "NORMAL"] + /\ UNCHANGED <<rRecord, rViewReplies, cVars, aVars, gViewChangesNo>> + +ReplicaReceiveStartViewReply(r) == + /\ \E msg \in sentMsg: + /\ msg.type = "START-VIEW-REPLY" + /\ msg.dst = r + /\ msg.v > rViewNumber[r] \* receive only if bigger than the last view I was in + /\ rViewReplies' = [rViewReplies EXCEPT ![r] = @ \cup + {[type |-> "start-view-reply", + viewNumber |-> msg.v, + r |-> {}, + src |-> msg.src]}] + /\ UNCHANGED <<rRecord, rState, rViewNumber, cVars, aVars, oVars>> + +ReplicaRecover(r) == \* we received enough START-VIEW-REPLY messages + \E Q \in Quorums: + /\ r \in Q + /\ \A p \in Q: \/ p = r + \/ /\ p # r + /\ \E reply \in rViewReplies[r]: /\ reply.src = p + /\ reply.type = "start-view-reply" + /\ rViewReplies' = [rViewReplies EXCEPT ![r] = {}] + /\ rState' = [rState EXCEPT ![r] = "NORMAL"] + /\ UNCHANGED <<rRecord, rViewNumber, cVars, aVars, oVars>> + +ReplicaResumeViewChange(r) == \* TODO: On timeout + FALSE + +\*A replica fails and looses everything +ReplicaFail(r) == \* TODO: check cardinality + /\ rState' = [rState EXCEPT ![r] = "FAILED"] + /\ rRecord' = [rRecord EXCEPT ![r] = {}] + \*/\ rViewNumber' = [rViewNumber EXCEPT ![r] = 0] \* TODO: check what happens if we loose the view number + /\ rViewReplies' = [rViewReplies EXCEPT ![r] = {}] + /\ UNCHANGED <<rViewNumber, cVars, aVars, oVars>> + /\ Cardinality({re \in Replicas: + \* We assume less than f replicas are allowed to fail + \/ rState[re] = "FAILED" + \/ rState[re] = "RECOVERING"}) < f +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{High-Level Actions}} ^' *) +(***************************************************************************) + +ClientAction(c) == + \/ /\ cState[c] = "NORMAL" + /\ \/ ClientRequest(c) \* some client tries to replicate commit an operation + \/ ClientReceiveReply(c) \* some client receives a reply from a replica + \*\/ ClientFail(c) \* some client fails + \/ ClientSuccessfulOp(c) \* an operation is successful at some client + \/ ClientFinalizedOp(c) \* an operation was finalized at some client + \/ /\ cState[c] = "FAILED" + /\ \/ ClientRecover(c) + +ReplicaAction(r) == + \/ /\ rState[r] = "NORMAL" + /\ \/ ReplicaReply(r) \* some replica sends a reply to a REQUEST msg + \/ ReplicaReceiveStartViewChange(r) + \/ ReplicaReceiveStartView(r) + \/ ReplicaFail(r) \* some replica fails + \/ /\ rState[r] = "FAILED" + /\ \/ ReplicaStartViewChange(r) \* some replica starts to recover + \/ /\ rState[r] = "RECOVERING" \* just to make it clear + /\ \/ ReplicaReceiveDoViewChange(r) + \/ ReplicaDecideNewView(r) + \/ ReplicaReceiveStartViewReply(r) + \/ ReplicaRecover(r) + \/ /\ rState[r] = "VIEW-CHANGING" + /\ \/ ReplicaReceiveStartViewChange(r) + \/ ReplicaReceiveStartView(r) + \/ ReplicaResumeViewChange(r) \* some timeout expired and view change not finished + \/ ReplicaFail(r) + +Next == + \/ \E c \in Clients: ClientAction(c) + \/ \E r \in Replicas: ReplicaAction(r) + +Spec == + TypeOK /\ Init /\ [] [Next]_vars + +FaultTolerance == + /\ \A successfulOp \in aSuccessful, Q \in Quorums: + (\A r \in Q: rState[r] = "NORMAL" \/ rState[r] = "VIEW-CHANGING") + => (\E p \in Q: \E rec \in rRecord[p]: + /\ successfulOp[1] = rec[1] + /\ successfulOp[2] = rec[2]) \* Not necessarily same result + /\ \A finalizedOp \in aFinalized, Q \in Quorums: + (\A r \in Q: rState[r] = "NORMAL" \/ rState[r] = "VIEW-CHANGING") + => (\E P \in SuperQuorums: + \A p \in Q \cap P: + \E rec \in rRecord[p]: + finalizedOp = rec) + +Inv == + /\ TypeOK + /\ FaultTolerance + +THEOREM Spec => []Inv +PROOF +<1>1. Init => Inv + PROOF BY DEF Init, Inv, TypeOK, FaultTolerance, ReplicaState, ClientState +<1>2. Inv /\ [Next]_vars => Inv' + PROOF OBVIOUS +<1>3. QED + PROOF BY <1>1, <1>2, PTL DEF Spec + +============================================================================= +\* Modification History +\* Last modified Sun Jan 25 14:09:05 PST 2015 by aaasz +\* Created Fri Dec 12 17:42:14 PST 2014 by aaasz diff --git a/proof/IR_consensus.tla b/proof/IR_consensus.tla new file mode 100644 index 0000000000000000000000000000000000000000..ac3dd7d5993241fbeb09c40135c6e929223ec339 --- /dev/null +++ b/proof/IR_consensus.tla @@ -0,0 +1,833 @@ +------------------------ MODULE IR_consensus -------------------------------- +(***************************************************************************) +(* This is a TLA+ specification of the Inconsistent Replication algorithm. *) +(* (And a mechanically-checked proof of its correctness using TLAPS) *) +(***************************************************************************) +EXTENDS FiniteSets, Naturals, TLC, TLAPS + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Constants}} ^' *) +(***************************************************************************) + +(***************************************************************************) +(* Constant parameters: *) +(* Replicas: the set of all replicas (Replica IDs) *) +(* Clients: the set of all clients (Client IDs) *) +(* Quorums: the set of all quorums *) +(* SuperQuorums: the set of all super quorums *) +(* Results: the set of all possible result types *) +(* OperationBody: the set of all possible operation bodies *) +(* (with arguments, etc. - can be infinite) *) +(* S: shard id of the shard Replicas constitute *) +(* f: maximum number of failures allowed (half of n) *) +(* *) +(* Constants used to bound variables, for model checking (Nat is bounded) *) +(* max_vc: maximum number of View-Changes allowed for each replicas *) +(* max_req: maximum number of op requests performed by clients *) +(***************************************************************************) + +CONSTANTS Replicas, Clients, Quorums, SuperQuorums, Results, OpBody, + AppClientFail, AppReplicaFail, + SuccessfulInconsistentOp(_,_,_), SuccessfulConsensusOp(_,_,_,_), + Merge(_,_,_), + Sync(_), + ExecInconsistent(_), + ExecConsensus(_), + Decide(_), + f, + S, Shards, \* S = shard id + max_vc, max_req + +ASSUME IsFiniteSet(Replicas) + +ASSUME QuorumAssumption == + /\ Quorums \subseteq SUBSET Replicas + /\ SuperQuorums \subseteq SUBSET Replicas + /\ \A Q1, Q2 \in Quorums: Q1 \cap Q2 # {} + /\ \A Q \in Quorums, R1, R2 \in SuperQuorums: + Q \cap R1 \cap R2 # {} + +ASSUME FailuresAssumption == + \A Q \in Quorums: Cardinality(Q) > f + +(***************************************************************************) +(* The possible states of a replica and the two types of operations *) +(* currently defined by IR. *) +(***************************************************************************) + +ReplicaState == {"NORMAL", "FAILED", "RECOVERING", "VIEW-CHANGING"} +ClientState == {"NORMAL", "FAILED"} +OpType == {"Inconsistent", "Consensus"} +OpStatus == {"TENTATIVE", "FINALIZED"} + +(***************************************************************************) +(* Definition of operation space *) +(***************************************************************************) + +MessageId == [cid: Clients, msgid: Nat] +Operations == [type: OpType, body: OpBody] \cup {<<>>} + +(***************************************************************************) +(* Message is defined to be the set of all possible messages *) +(***************************************************************************) + +\* Assume unique message ids +\* Assume no more than f replica failures +\* We use shard to specify for what shard this message was +\* (we share the variables) +Message == + [type: {"PROPOSE"}, + id: MessageId, + op: Operations, + v: Nat] + \cup [type: {"REPLY"}, \* reply no result + id: MessageId, + v: Nat, + src: Replicas] + \cup + [type: {"REPLY"}, \* reply with result + id: MessageId, + v: Nat, + res: Results, + src: Replicas] + \* v = view num. + \cup + [type: {"START-VIEW-CHANGE"}, + v: Nat, + src: Replicas] + \cup + [type: {"DO-VIEW-CHANGE"}, + r: SUBSET ([msgid: MessageId, + op: Operations, + res: Results, + status: OpStatus] + \cup [msgid: MessageId, + op: Operations, + status: OpStatus]), + v: Nat, + lv: Nat, + src: Replicas, + dst: SUBSET Replicas] + \cup + [type: {"START-VIEW"}, + v: Nat, + r: SUBSET ([msgid: MessageId, + op: Operations, + res: Results, + status: OpStatus] + \cup [msgid: MessageId, + op: Operations, + status: OpStatus]), + src: Replicas] + \cup + [type: {"FINALIZE"}, \* finalize with no result + id: MessageId, + op: Operations, + res: Results, + v: Nat] + \cup + [type: {"FINALIZE"}, \* finalize with result + id: MessageId, + op: Operations, + res: Results, + v: Nat] + \cup + [type: {"CONFIRM"}, + v: Nat, + id: MessageId, + op: Operations, + res: Results, + src: Replicas] + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Variables and State Predicates}} ^' *) +(***************************************************************************) + +(***************************************************************************) +(* \* Variables: *) +(* 1. State at each replica: *) +(* rState = Denotes current replica state. Either: *) +(* - NORMAL (processing operations) *) +(* - VIEW-CHANGING (participating in recovery) *) +(* rRecord = Unordered set of operations and their results *) +(* rViewNumber = current view number *) +(* 2. State of communication medium: *) +(* sentMsg = sent (but not yet received) messages *) +(* 3. State at client: *) +(* cCurrentOperation = crt operation requested by the client *) +(* cMmessageCounter = the message I must use for *) +(* the next operation *) +(* *\ *) +(***************************************************************************) + +VARIABLES rState, rRecord, rCrtView, rLastView, rViewReplies, + rViewOnDisk, + rNonce, sentMsg, cCrtOp, + cCrtOpToFinalize, cMsgCounter, cCrtOpReplies, cCrtOpConfirms, + cState, aSuccessful, arRecord, aVisibility, gViewChangesNo + +(***************************************************************************) +(* Defining these tuples makes it easier to express which varibles remain *) +(* unchanged *) +(***************************************************************************) + +\* Replica variables. +rVars == <<rState, rRecord, rCrtView, rViewOnDisk, rLastView, + rViewReplies, rNonce>> +\* Client variables. +cVars == <<cCrtOp, \* current operation at a client + cCrtOpToFinalize, + cCrtOpReplies, \* current operation replies + cCrtOpConfirms, + cMsgCounter, + cState>> +\* Application variables. +aVars == <<aSuccessful, arRecord, aVisibility>> \* we use them to write invariants +\* Other variables. +oVars == <<sentMsg, gViewChangesNo>> +\* All variables. +vars == <<rVars, cVars, aVars, oVars>> + +TypeOK == + /\ rState[S] \in [Replicas -> ReplicaState] + /\ rRecord[S] \in [Replicas -> SUBSET ([msgid: MessageId, + op: Operations, + res: Results, + status: OpStatus] + \cup [msgid: MessageId, + op: Operations, + status: OpStatus])] + /\ rCrtView[S] \in [Replicas -> Nat] + /\ rViewOnDisk[S] \in [Replicas -> Nat] + /\ rLastView[S] \in [Replicas -> Nat] + /\ rViewReplies[S] \in [Replicas -> SUBSET ([type: {"start-view-change"}, + v: Nat, + src: Replicas] + \cup [type: {"do-view-change"}, + v: Nat, + lv: Nat, + r: SUBSET ([msgid: MessageId, + op: Operations, + res: Results, + status: OpStatus] + \cup [msgid: MessageId, + op: Operations, + status: OpStatus]), + src: Replicas])] + /\ rNonce[S] \in [Replicas -> Nat] + /\ sentMsg[S] \in SUBSET Message + /\ cCrtOp[S] \in [Clients -> Operations] + /\ cCrtOpToFinalize[S] \in [Clients -> Operations] + /\ cCrtOpReplies[S] \in [Clients -> SUBSET ([viewNumber: Nat, + res: Results, + src: Replicas] + \cup [viewNumber: Nat, + src: Replicas])] + /\ cCrtOpConfirms[S] \in [Clients -> SUBSET [viewNumber: Nat, + res: Results, + src: Replicas]] + /\ cMsgCounter[S] \in [Clients -> Nat] + /\ cState \in [Clients -> ClientState] + /\ aSuccessful \in SUBSET ([mid: MessageId, + op: Operations, + res: Results] + \cup [mid: MessageId, + op: Operations]) + /\ aVisibility[S] \in [MessageId -> SUBSET MessageId] + /\ arRecord[S] \in [Replicas -> SUBSET ([msgid: MessageId, + op: Operations, + res: Results, + status: OpStatus] + \cup [msgid: MessageId, + op: Operations, + status: OpStatus])] + /\ gViewChangesNo[S] \in Nat + +Init == + /\ rState = [r \in Replicas |-> "NORMAL"] + /\ rRecord = [r \in Replicas |-> {}] + /\ rCrtView = [r \in Replicas |-> 0] + /\ rViewOnDisk = [r \in Replicas |-> 0] + /\ rLastView = [r \in Replicas |-> 0] + /\ rViewReplies = [r \in Replicas |-> {}] + /\ rNonce = [r \in Replicas |-> 0] + /\ sentMsg = {} + /\ cCrtOp = [c \in Clients |-> <<>>] + /\ cCrtOpToFinalize = [c \in Clients |-> <<>>] + /\ cCrtOpReplies = [c \in Clients |-> {}] + /\ cCrtOpConfirms = [c \in Clients |-> {}] + /\ cMsgCounter = [c \in Clients |-> 0] + /\ cState = [c \in Clients |-> "NORMAL"] + /\ aSuccessful = {} + /\ aVisibility = [o \in MessageId |-> {}] + /\ arRecord = [r \in Replicas |-> {}] + /\ gViewChangesNo = 0 + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Actions}} ^' *) +(***************************************************************************) + +Send(m) == sentMsg' = [sentMsg EXCEPT ![S] = @ \cup {m}] + +AmLeader(r, v) == r = (v % Cardinality(Replicas)) + 1 +IsLeader(r, v) == AmLeader(r, v) +NotIsLeader(r, v) == r /= (v % Cardinality(Replicas)) + 1 +LeaderOf(v) == CHOOSE x \in Replicas: IsLeader(x, v) + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Client Actions}} ^' *) +(***************************************************************************) + +\* Note: CHOOSE does not introduce nondeterminism (the same value is chosen +\* each time) + +\* \* Client sends a request +ClientRequest(c, op) == + /\ cCrtOp[S][c] = <<>> \* the client is not waiting for a result + \* of another operation + /\ cCrtOpToFinalize[S][c] = <<>> \* the client is not waiting + \* to finalize operation + /\ cMsgCounter' = [cMsgCounter EXCEPT ![S][c] = @ + 1] + /\ cCrtOp' = [cCrtOp EXCEPT ![S][c] = op] + /\ Send([type |-> "PROPOSE", + id |-> [cid |-> c, msgid |-> cMsgCounter[S][c] + 1], + op |-> op, + v |-> 0]) + /\ UNCHANGED <<rVars, aVars, cCrtOpReplies, cCrtOpToFinalize, + cCrtOpConfirms,cState, gViewChangesNo>> + /\ cMsgCounter[S][c] < max_req \* BOUND the number of requests a client can make + \* (useful for model checking) + +\* \* Client received a reply +ClientReceiveReply(c) == + \E msg \in sentMsg[S]: + /\ msg.type = "REPLY" + /\ cCrtOp[S][c] /= <<>> + /\ \* reply to c's request for crt op + msg.id = [cid |-> c, msgid |-> cMsgCounter[S][c]] + /\ \/ /\ cCrtOp[S][c].type = "Inconsistent" + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![S][c] = @ \cup + {[viewNumber |-> msg.v, + src |-> msg.src]}] + \/ /\ cCrtOp[S][c].type = "Consensus" + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![S][c] = @ \cup + {[viewNumber |-> msg.v, + res |-> msg.res, + src |-> msg.src]}] + /\ UNCHANGED <<cCrtOp, cCrtOpToFinalize, cCrtOpConfirms, + cMsgCounter, cState, rVars, aVars, oVars>> + +\* \* "Helper" formulas +__matchingViewNumbers(Q, c) == + \* a (super)quorum of replies with matching view numbers + /\ \A r \in Q: + \*/\ \E reply \in cCrtOpReplies[S][c]: reply.src = r + /\ \A p \in Q: \E rr, pr \in cCrtOpReplies[S][c]: + /\ rr.src = r + /\ pr.src = p + /\ rr.viewNumber = pr.viewNumber + +__matchingViewNumbersAndResults(Q, c) == + \* a (super)quorum of replies with matching view numbers + \* and results + /\ \A r \in Q: + \*/\ \E reply \in cCrtOpReplies[S][c]: reply.src = r + /\ \A p \in Q: \E rr, pr \in cCrtOpReplies[S][c]: + /\ rr.src = r + /\ pr.src = p + /\ rr.viewNumber = pr.viewNumber + /\ rr.res = pr.res + +\* \* IR Client received enough responses to decide +\* \* what to do with the operation +ClientSendFinalize(c) == + /\ cCrtOp[S][c] /= <<>> + /\ \/ \E Q \in Quorums: + \* I. The IR Client got a simple quorum of replies + /\ \A r \in Q: + \E reply \in cCrtOpReplies[S][c]: reply.src = r + + /\ \/ /\ cCrtOp[S][c].type = "Inconsistent" + /\ __matchingViewNumbers(Q, c) + /\ aSuccessful' = aSuccessful \cup + {[mid |-> [cid |-> c, + msgid |-> cMsgCounter[S][c]], + op |-> cCrtOp[S][c]]} + /\ SuccessfulInconsistentOp(c, S, cCrtOp[S][c]) + /\ Send([type |-> "FINALIZE", + id |-> [cid |-> c, msgid |-> cMsgCounter[S][c]], + op |-> cCrtOp[S][c], + v |-> 0]) + /\ UNCHANGED <<cCrtOpToFinalize>> + + \/ /\ cCrtOp[S][c].type = "Consensus" + /\ __matchingViewNumbers(Q, c) + /\ LET res == IF __matchingViewNumbersAndResults(Q, c) + THEN + CHOOSE result \in + {res \in Results: + \E reply \in cCrtOpReplies[S][c]: + /\ reply.src \in Q + /\ reply.res = res}: TRUE + ELSE + Decide(cCrtOpReplies[S][c]) + IN + /\ Send([type |-> "FINALIZE", + id |-> [cid |-> c, msgid |-> cMsgCounter[S][c]], + op |-> cCrtOp[S][c], + res |-> res, + v |-> 0]) + /\ cCrtOpToFinalize' = [cCrtOp EXCEPT ![S][c] = cCrtOp[S][c]] + /\ UNCHANGED <<aSuccessful>> + + \/ \E SQ \in SuperQuorums: + \* II. The IR Client got super quorum of responses + /\ \A r \in SQ: + \E reply \in cCrtOpReplies[S][c]: reply.src = r + /\ cCrtOp[S][c].type = "Consensus" \* only care if consensus op + /\ __matchingViewNumbersAndResults(SQ, c) + /\ LET res == CHOOSE result \in + {res \in Results: + \E reply \in cCrtOpReplies[S][c]: + /\ reply.src \in SQ + /\ reply.res = res}: TRUE + IN + /\ Send([type |-> "FINALIZE", + id |-> [cid |-> c, msgid |-> cMsgCounter[S][c]], + op |-> cCrtOp[S][c], + res |-> res, + v |-> 0]) + /\ aSuccessful' = aSuccessful \cup + {[mid |-> [cid |-> c, + msgid |-> cMsgCounter[S][c]], + op |-> cCrtOp[S][c], + res |-> res]} + /\ SuccessfulConsensusOp(c, S, cCrtOp[S][c], res) + /\ UNCHANGED <<cCrtOpToFinalize>> + /\ cCrtOp' = [cCrtOp EXCEPT ![S][c] = <<>>] + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![S][c] = {}] + /\ UNCHANGED <<cMsgCounter, cState, cCrtOpConfirms, rVars, arRecord, aVisibility, gViewChangesNo>> + +\* \* Client received a confirm +ClientReceiveConfirm(c) == + \E msg \in sentMsg[S]: + /\ msg.type = "CONFIRM" + /\ cCrtOpToFinalize[S][c] /= <<>> + /\ msg.id = [cid |-> c, msgid |-> cMsgCounter[S][c]] \* reply to c's request for crt op + /\ cCrtOpConfirms' = [cCrtOpConfirms EXCEPT ![S][c] = @ \cup + {[viewNumber |-> msg.v, + res |-> msg.res, + src |-> msg.src]}] + /\ UNCHANGED <<cCrtOp, cCrtOpReplies, cCrtOpToFinalize, cMsgCounter, + cState, rVars, aVars, oVars>> + + +\* \* An operation is finalized by a client and result returned to the application +ClientFinalizeOp(c) == + /\ cCrtOpToFinalize[S][c] /= <<>> + /\ \E Q \in Quorums: + \* IR client received a quorum of confirms + /\ \A r \in Q: + \E reply \in cCrtOpConfirms[S][c]: reply.src = r + /\ LET + \* take the result in the biggest view number + reply == CHOOSE reply \in cCrtOpConfirms[S][c]: + ~ \E rep \in cCrtOpConfirms[S][c]: + rep.viewNumber > reply.viewNumber + IN + /\ aSuccessful' = aSuccessful \cup + {[mid |-> [cid |-> c, + msgid |-> cMsgCounter[S][c]], + op |-> cCrtOpToFinalize[S][c], + res |-> reply.res]} + /\ SuccessfulConsensusOp(c, S, cCrtOp[S][c], reply.res) \* respond to app + /\ cCrtOpToFinalize' = [cCrtOpToFinalize EXCEPT ![S][c] = <<>>] + /\ cCrtOpConfirms' = [cCrtOpConfirms EXCEPT ![S][c] = {}] + /\ UNCHANGED <<rVars, cCrtOp, cCrtOpReplies, + cMsgCounter, cState, arRecord, aVisibility, oVars>> + +\* \* Client fails and looses all data +ClientFail(c) == + /\ cState' = [cState EXCEPT ![S][c] = "FAILED"] + /\ cMsgCounter' = [cMsgCounter EXCEPT ![S][c] = 0] + /\ cCrtOp' = [cCrtOp EXCEPT ![S][c] = <<>>] + /\ cCrtOpReplies' = [cCrtOpReplies EXCEPT ![S][c] = {}] + /\ AppClientFail + /\ UNCHANGED <<rVars, aVars, oVars>> + +\* \* Client recovers +\* \* Not implemented yet +ClientRecover(c) == FALSE + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Replica Actions}} ^' *) +(***************************************************************************) + +\* \* Replica sends a reply +ReplicaReceiveRequest(r) == + \E msg \in sentMsg[S]: + /\ msg.type = "PROPOSE" + /\ \* not already replied for this op + ~ (\E rec \in rRecord[S][r]: rec.msgid = msg.id) + /\ \/ /\ msg.op.type = "Inconsistent" + /\ Send([type |-> "REPLY", + id |-> msg.id, + v |-> rCrtView[S][r], + src |-> r]) + /\ rRecord' = [rRecord EXCEPT ![S][r] = + @ \cup {[msgid |-> msg.id, + op |-> msg.op, + status |-> "TENTATIVE"]}] + \/ /\ msg.op.type = "Consensus" + /\ LET res == ExecConsensus(msg.op) + IN + /\ Send([type |-> "REPLY", + id |-> msg.id, + v |-> rCrtView[S][r], + res |-> res, + src |-> r]) + /\ rRecord' = [rRecord EXCEPT ![S][r] = + @ \cup {[msgid |-> msg.id, + op |-> msg.op, + res |-> res, + status |-> "TENTATIVE"]}] + /\ UNCHANGED <<rState, rCrtView, rLastView, rViewOnDisk, + rViewReplies, rNonce, cVars, aVars, gViewChangesNo>> + +\* \* Replica receives a message from an IR Client to finalize an op +\* \* For inconsistent oprations the replica just +\* \* executes the operation. +ReplicaReceiveFinalize(r) == + \E msg \in sentMsg[S]: + /\ msg.type = "FINALIZE" + /\ msg.v >= rCrtView[S][r] + /\ LET recs == {rec \in rRecord[S][r]: \* Must be only 1 record + /\ rec.msgid = msg.id + /\ rec.op = msg.op} + IN + \/ /\ msg.op.type = "Inconsistent" + /\ IF + \* Replica knows of this op + recs /= {} + THEN + IF \A rec \in recs: rec.status /= "FINALIZED" + THEN ExecInconsistent(msg.op) + ELSE TRUE + ELSE + \* Replica didn't hear of this op + ExecInconsistent(msg.op) + /\ rRecord' = [rRecord EXCEPT ![S][r] = (@ \ recs) \cup + {[msgid |-> msg.id, + op |-> msg.op, + status |-> "FINALIZED"]}] + /\ UNCHANGED <<sentMsg>> + \/ /\ msg.op.type = "Consensus" + /\ rRecord' = [rRecord EXCEPT ![S][r] = (@ \ recs) \cup + {[msgid |-> msg.id, + op |-> msg.op, + res |-> msg.res, + status |-> "FINALIZED"]}] + /\ Send([type |-> "CONFIRM", + v |-> rCrtView[S][r], + id |-> msg.id, + op |-> msg.op, + res |-> msg.res, + src |-> r]) + /\ UNCHANGED <<rState, rCrtView, rLastView, rViewReplies, rViewOnDisk, + rNonce, cVars, aVars, gViewChangesNo>> + + +\* \* A recovering replica starts the view change procedure +ReplicaSendDoViewChange(r) == + /\ \/ /\ rState[S][r] = "NORMAL" \/ rState[S][r] = "VIEW-CHANGING" + /\ rCrtView' = [rCrtView EXCEPT ![S][r] = @ + 1] + /\ rViewOnDisk' = [rViewOnDisk EXCEPT ![S][r] = rCrtView[S][r] + 1] + /\ rState' = [rState EXCEPT ![S][r] = "VIEW-CHANGING"] + /\ Send([type |-> "DO-VIEW-CHANGE", + v |-> rCrtView[S][r] + 1, + lv |-> rLastView[S][r], + r |-> rRecord[S][r], + src |-> r, + dst |-> Replicas]) + \/ /\ rState[S][r] = "FAILED" + /\ rState' = [rState EXCEPT ![S][r] = "RECOVERING"] + /\ rCrtView' = [rCrtView EXCEPT ![S][r] = rViewOnDisk[S][r]] + /\ Send([type |-> "DO-VIEW-CHANGE", + v |-> rViewOnDisk[S][r], + lv |-> rLastView[S][r], + r |-> rRecord[S][r], + src |-> r, + dst |-> Replicas \ {x \in Replicas: IsLeader(x, rViewOnDisk[S][r])}]) + /\ UNCHANGED <<rViewOnDisk>> + \/ /\ rState[S][r] = "RECOVERING" + /\ rCrtView' = [rCrtView EXCEPT ![S][r] = @ + 1] + /\ Send([type |-> "DO-VIEW-CHANGE", + v |-> rCrtView[S][r] + 1, + lv |-> rLastView[S][r], + r |-> rRecord[S][r], + src |-> r, + dst |-> Replicas \ {x \in Replicas: IsLeader(x, rCrtView[S][r] + 1)}]) + /\ UNCHANGED <<rViewOnDisk, rState>> + /\ UNCHANGED <<cVars, rLastView, rViewReplies, rRecord, + rNonce, aVars>> + /\ gViewChangesNo[S] < max_vc \* BOUND on number of view changes + /\ gViewChangesNo' = [gViewChangesNo EXCEPT ![S] = @ + 1] + +\* \* Replica received DO-VIEW-CHANGE message +ReplicaReceiveDoViewChange(r) == + /\ \E msg \in sentMsg[S]: + /\ msg.type = "DO-VIEW-CHANGE" + /\ r \in msg.dst + /\ \/ /\ rState[S][r] = "NORMAL" + /\ msg.v > rCrtView[S][r] + \/ /\ rState[S][r] = "VIEW-CHANGING" + /\ msg.v >= rCrtView[S][r] + /\ rState' = [rState EXCEPT ![S][r] = "VIEW-CHANGING"] + \* keep only the one with the higher view (v) + /\ \/ /\ IsLeader(r, msg.v) + /\ LET + existingRecord == {x \in rViewReplies[S][r]: + /\ x.type = "do-view-change" + /\ x.src = msg.src} \* should only be one item in set + IN + IF \A x \in existingRecord: x.v < msg.v + THEN rViewReplies' = [rViewReplies EXCEPT ![S][r] = + (@ \ existingRecord) \cup + {[type |-> "do-view-change", + v |-> msg.v, + lv |-> msg.lv, + r |-> msg.r, + src |-> msg.src]}] + ELSE FALSE + \/ UNCHANGED <<rViewReplies>> + /\ rCrtView' = [rCrtView EXCEPT ![S][r] = msg.v] + /\ rViewOnDisk' = [rViewOnDisk EXCEPT ![S][r] = msg.v] + /\ Send([type |-> "DO-VIEW-CHANGE", + v |-> msg.v, + lv |-> rLastView[S][r], + r |-> rRecord[S][r], + src |-> r, + dst |-> Replicas]) + /\ UNCHANGED <<cVars, rLastView, rRecord, rNonce, + aVars, gViewChangesNo>> + +\* Note: Assume one reply for view change per replica in Q +\* (in ReplicaReceiveDoViewChange we keep only the most recent reply) +ReplicaSendStartView(r) == + /\ \E Q \in Quorums: + /\ \A r1 \in Q: + /\ \A r2 \in Q: \E rr, pr \in rViewReplies[S][r]: + /\ rr.type = "do-view-change" + /\ pr.type = "do-view-change" + /\ rr.src = r1 + /\ pr.src = r2 + /\ rr.v = pr.v + /\ rr.v >= rCrtView[S][r] + \* received at a least a quorum of replies + /\ LET + A == + \* set of all do-view-change replies from Q, + {x \in rViewReplies[S][r]: /\ x.src \in Q + /\ x.type = "do-view-change"} + + B == + \* keep only the replies from the maximum view + {x \in A: \A y \in A: y.lv <= x.lv} + + C == + \* set of all records received in replies in B + UNION {x.r: x \in B} + + recoveredConsensusOps_R == + \* any finalized consensus operation (in at least one record, + \* in the maximum latest view) + {[msgid |-> y.msgid, op |-> y.op, res |-> y.res]: y \in + {x \in C: + /\ x.op.type = "Consensus" + /\ x.status = "FINALIZED"}} + + recoveredConsensusOps_d == + \* any consensus operation found in at least a majority of a Quorum + {[msgid |-> y.msgid, op |-> y.op, res |-> y.res]: y \in + {x \in C: + /\ x.op.type = "Consensus" + /\ x.status = "TENTATIVE" + /\ \E P \in SuperQuorums: + \A replica \in Q \cap P: + \E reply \in B: + /\ reply.src = replica + /\ x \in reply.r}} \ recoveredConsensusOps_R + + recoveredConsensusOps_u == + \* the rest of consensus ops found in at least one record + \* (discard the result) + {[msgid |-> z.msgid, op |-> z.op]: z \in + (({[msgid |-> y.msgid, op |-> y.op, res |-> y.res]: y \in + {x \in C: x.op.type = "Consensus"}} + \ recoveredConsensusOps_d) \ recoveredConsensusOps_R)} + + recoveredInconsistentOps_R == + \* any inconsistent operation found in any received record + {[msgid |-> y.msgid, op |-> y.op]: y \in + {x \in C: x.op.type = "Inconsistent"}} + + mergedRecordInconsistent == + {x \in Merge(recoveredConsensusOps_R + \cup recoveredInconsistentOps_R, + recoveredConsensusOps_d, + recoveredConsensusOps_u): x.op.type = "Inconsistent"} + + mergedRecordConsensus == + {x \in Merge(recoveredConsensusOps_R + \cup recoveredInconsistentOps_R, + recoveredConsensusOps_d, + recoveredConsensusOps_u): x.op.type = "Consensus"} + + masterRecord == + {[msgid |-> x.msgid, + op |-> x.op, + status |-> "FINALIZED"]: + x \in mergedRecordInconsistent} + \cup + {[msgid |-> x.msgid, + op |-> x.op, + res |-> x.res, + status |-> "FINALIZED"]: + x \in mergedRecordConsensus} + + v_new == + \* the one decided by quorum Q + CHOOSE v \in {x.v: x \in A}: TRUE + IN + /\ rRecord' = [rRecord EXCEPT ![S][r] = masterRecord] + /\ Sync(masterRecord) + /\ Send([type |-> "START-VIEW", + v |-> v_new, + r |-> masterRecord, + src |-> r]) + /\ rCrtView' = [rCrtView EXCEPT ![S][r] = v_new] + /\ rLastView' = [rLastView EXCEPT ![S][r] = v_new] + \*/\ Assert(Cardinality(masterRecord) = 0, "Should fail - ReplicaSendStartView") + /\ rState' = [rState EXCEPT ![S][r] = "NORMAL"] + /\ rViewReplies' = [rViewReplies EXCEPT ![S][r] = {}] + /\ UNCHANGED <<rNonce, rViewOnDisk, cVars, aVars, gViewChangesNo>> + +\* \* A replica receives a start view message +ReplicaReceiveStartView(r) == + /\ \E msg \in sentMsg[S]: + /\ msg.type = "START-VIEW" + /\ \/ /\ rState[S][r] = "NORMAL" + /\ msg.v > rCrtView[S][r] + \/ /\ \/ rState[S][r] = "VIEW-CHANGING" + \/ rState[S][r] = "RECOVERING" + /\ msg.v >= rCrtView[S][r] + /\ rCrtView' = [rCrtView EXCEPT ![S][r] = msg.v] + /\ rLastView' = [rLastView EXCEPT ![S][r] = msg.v] + /\ rRecord' = [rRecord EXCEPT ![S][r] = msg.r] + /\ Sync(msg.r) + /\ \* Check if the operations received in the master record + \* must be added to the aSuccessful + LET + successfulOps == {x \in msg.r: \E Q \in Quorums: + \A r1 \in Q: + \/ x \in rRecord[S][r1] + \/ x \in arRecord[S][r1] + \/ r1 = r} + IN + aSuccessful' = aSuccessful \cup + {[mid |-> x.msgid, + op |-> x.op, + res |-> x.res]: + x \in + {y \in successfulOps: + y.op.type = "Consensus"}} + \cup + {[mid |-> x.msgid, + op |-> x.op]: + x \in + { y \in successfulOps: + y.op.type = "Inconsistent"}} + /\ rViewOnDisk' = [rViewOnDisk EXCEPT ![S][r] = msg.v + 1] + /\ rState' = [rState EXCEPT ![S][r] = "NORMAL"] + /\ rViewReplies' = [rViewReplies EXCEPT ![S][r] = {}] + /\ UNCHANGED <<rNonce, cVars, arRecord, aVisibility, oVars>> + +\* \* A replica fails and looses everything except the view number +\* \* The view number has been written to disk +ReplicaFail(r) == + /\ rState' = [rState EXCEPT ![S][r] = "FAILED"] + /\ rRecord' = [rRecord EXCEPT ![S][r] = {}] + /\ arRecord' = [arRecord EXCEPT ![S][r] = rRecord[S][r]] + \* save record only for + \* invariant purposes + /\ rCrtView' = [rCrtView EXCEPT ![S][r] = 0] + /\ rLastView' = [rLastView EXCEPT ![S][r] = 0] + /\ rViewReplies' = [rViewReplies EXCEPT ![S][r] = {}] + /\ UNCHANGED <<rViewOnDisk, rNonce, cVars, aSuccessful, aVisibility, oVars>> + /\ \* We assume less than f replicas fail simultaneously + Cardinality({re \in Replicas: + \/ rState[S][re] = "FAILED" + \/ rState[S][re] = "RECOVERING"}) < f + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{High-Level Actions}} ^' *) +(***************************************************************************) + +ClientAction(c) == + \/ /\ cState[c] = "NORMAL" + /\ \* \/ ClientRequest(c) \* some client tries to replicate commit an operation + \/ ClientReceiveReply(c) \* some client receives a reply from a replica + \/ ClientReceiveConfirm(c) \* some client receives a confirm from a replica + \* \/ ClientFail(c) \* some client fails + \/ ClientSendFinalize(c) \* an operation is successful at some client + \/ ClientFinalizeOp(c) \* an operation was finalized at some client + \/ /\ cState[c] = "FAILED" + /\ \/ ClientRecover(c) + +ReplicaAction(r) == + \/ /\ rState[S][r] = "NORMAL" + /\ \/ ReplicaReceiveRequest(r) \* some replica sends a reply to a PROPOSE msg + \/ ReplicaReceiveFinalize(r) + \/ ReplicaSendDoViewChange(r) + \/ ReplicaReceiveDoViewChange(r) + \/ ReplicaReceiveStartView(r) + \/ ReplicaFail(r) \* some replica fails + \/ /\ rState[S][r] = "FAILED" + /\ \/ ReplicaSendDoViewChange(r) \* start view-change protocol + \/ /\ rState[S][r] = "RECOVERING" + /\ \/ ReplicaSendDoViewChange(r)\* re-start view-change protocol (assume a + \* timeout and still no response from the new leader) + \/ ReplicaReceiveStartView(r) + \/ /\ rState[S][r] = "VIEW-CHANGING" + /\ \/ ReplicaSendDoViewChange(r) + \/ ReplicaReceiveDoViewChange(r) + \/ ReplicaSendStartView(r) + \/ ReplicaReceiveStartView(r) + \/ ReplicaFail(r) + +Next == + \/ \E c \in Clients: ClientAction(c) + \/ \E r \in Replicas: ReplicaAction(r) + \*\/ \* Avoid deadlock by termination + \* (\A i \in 1..Cardinality(Replicas): rLastView[S][i] = max_vc) \/ UNCHANGED <<vars>> + +Spec == Init /\ [] [Next]_vars + +FaultTolerance == + /\ \A successfulOp \in aSuccessful, Q \in Quorums: + (\A r \in Q: rState[S][r] = "NORMAL" \/ rState[S][r] = "VIEW-CHANGING") + => (\E p \in Q: \E rec \in rRecord[S][p]: + /\ successfulOp.mid = rec.msgid + /\ successfulOp.op = rec.op) \* Not necessarily same result + + +============================================================================= +\* Modification History +\* Last modified Fri Aug 28 10:58:02 PDT 2015 by aaasz +\* Created Fri Dec 12 17:42:14 PST 2014 by aaasz diff --git a/proof/TAPIR.tla b/proof/TAPIR.tla new file mode 100644 index 0000000000000000000000000000000000000000..3a6b13d1d0ed2be9d1e977d9dd0bdac95c7b6ed8 --- /dev/null +++ b/proof/TAPIR.tla @@ -0,0 +1,272 @@ +------------------------------- MODULE TAPIR ------------------------------- + +(***************************************************************************) +(* This is a TLA+ specification of the TAPIR algorithm. *) +(***************************************************************************) + +EXTENDS FiniteSets, Naturals, TLC, TLAPS + +Max(S) == IF S = {} THEN 0 ELSE CHOOSE i \in S: \A j \in S: j <= i + +(***************************************************************************) +(* TAPIR constants: *) +(* 1. Shards: function from shard id to set of replica ids in the shard *) +(* 2. Transactions: set of all possible transactions *) +(* 3. nr_shards: number of shards *) +(***************************************************************************) +CONSTANTS Shards, Transactions, NrShards +\* Note: assume unique number ids for replicas + +(***************************************************************************) +(* IR constants & variables (description in the IR module) *) +(***************************************************************************) +CONSTANTS Clients, Quorums, SuperQuorums, + max_vc, max_req, f + +VARIABLES rState, rRecord, rCrtView, rLastView, rViewReplies, rNonce, + rViewOnDisk, + sentMsg, cCrtOp, + cCrtOpToFinalize, cMsgCounter, cCrtOpReplies, cCrtOpConfirms, + cState, aSuccessful, arRecord, aVisibility, gViewChangesNo + +irReplicaVars == <<rState, rRecord, rCrtView, rViewOnDisk, rLastView, + rViewReplies, rNonce>> +irClientVars == <<cCrtOp, \* current operation at a client + cCrtOpReplies, \* current operation replies + cMsgCounter, + cState, + cCrtOpToFinalize, + cCrtOpConfirms>> \* Client variables. +irAppVars == <<aSuccessful, arRecord, aVisibility>> \* Application variables. +irOtherVars == <<sentMsg, gViewChangesNo>> \* Other variables. + +IRMessageId == [cid: Clients, msgid: Nat] + +(***************************************************************************) +(* TAPIR Variables/State: *) +(* 1. State at each replica: *) +(* rPrepareTxns = List of txns this replica is prepared *) +(* to commit *) +(* rTxnsLog = Log of committed and aborted txns in ts order *) +(* rStore = Versioned store *) +(* rBkpTable = Table of txns for which this replica *) +(* is the bkp coordinator *) +(* 2. State of communication medium: *) +(* sentMsg = sent (and duplicate) messages *) +(* 3. State at client: *) +(* cCrtTxn = crt txn requested by the client *) +(* *) +(***************************************************************************) + +\* TAPIR variables & data structures +VARIABLES rPreparedTxns, rStore, rTxnsLogAborted, rTxnsLogCommitted, + rClock, cCrtTxn, cClock + +tapirReplicaVars == <<rPreparedTxns, rStore, rTxnsLogAborted, + rTxnsLogCommitted, + rClock>> +tapirClientVars == <<cCrtTxn, cClock>> + +vars == <<irReplicaVars, irClientVars, irAppVars, irOtherVars, + tapirReplicaVars, tapirClientVars>> + +StoreEntry == [vs: Nat, val: Nat] \* vs = version +Store == [key: Nat, + entries: SUBSET StoreEntry, + latestVs: Nat, + latestVal: Nat] + +TransactionTs == [cid: Clients, clock: Nat] \* Timestamp +ReadSet == [key: Nat, val: Nat, vs: Nat] +WriteSet == [key: Nat, val: Nat] +Transaction == [rSet: SUBSET ReadSet, + wSet: SUBSET WriteSet, + shards: SUBSET Nat] + +TypeOK == + /\ rStore \in [UNION {Shards[i]: i \in 1..NrShards} -> SUBSET Store] + /\ rPreparedTxns \in [UNION {Shards[i]: i \in 1..NrShards} -> SUBSET Transaction] + /\ rTxnsLogAborted \in [UNION {Shards[i]: i \in 1..NrShards} -> SUBSET Transaction] + /\ rTxnsLogCommitted \in [UNION {Shards[i]: i \in 1..NrShards} -> SUBSET Transaction] + + +TAPIRResults == {"Prepare-OK", "Retry", "Prepare-Abstain", "Abort"} +TAPIROpType == {"Prepare", "ABORT", "COMMIT"} +TAPIROpBody == [opType : TAPIROpType, txn: Transaction] + +TAPIRClientFail == TRUE \* state we lose at the app level +TAPIRReplicaFail == TRUE \* state we lose at the app level + +\* TAPIR implementation of IR interface +TAPIRExecInconsistent(op) == TRUE +TAPIRExecConsensus(op) == IF op.type = "Consensus" THEN "Prepare-OK" ELSE "Abort" +TAPIRDecide(results) == "Prepare-OK" + +TAPIRMerge(R, d, u) == R \cup d \cup + {[msgid |-> x.msgid, op |-> x.op, res |-> "Prepare-OK"]: x \in u} + +TAPIRSync(records) == TRUE +TAPIRSuccessfulInconsistentOp(c, S, op) == TRUE +TAPIRSuccessfulConsensusOp(c, S, op, res) == TRUE + + +\* Initialize for all shards +InitIR == + /\ rState = [s \in 1..NrShards |-> [r \in Shards[s] |-> "NORMAL"]] + /\ rRecord = [s \in 1..NrShards |-> [r \in Shards[s] |-> {}]] + /\ rCrtView = [s \in 1..NrShards |-> [r \in Shards[s] |-> 0]] + /\ rViewOnDisk = [s \in 1..NrShards |-> [r \in Shards[s] |-> 0]] + /\ rLastView = [s \in 1..NrShards |-> [r \in Shards[s] |-> 0]] + /\ rViewReplies = [s \in 1..NrShards |-> [r \in Shards[s] |-> {}]] + /\ rNonce = [s \in 1..NrShards |-> [r \in Shards[s] |-> 0]] + /\ sentMsg = [s \in 1..NrShards |-> {}] + /\ cCrtOp = [s \in 1..NrShards |-> [c \in Clients |-> <<>>]] + /\ cCrtOpToFinalize = [s \in 1..NrShards |-> [c \in Clients |-> <<>>]] + /\ cMsgCounter = [s \in 1..NrShards |-> [c \in Clients |-> 0]] + /\ cCrtOpReplies = [s \in 1..NrShards |-> [c \in Clients |-> {}]] + /\ cCrtOpConfirms = [s \in 1..NrShards |-> [c \in Clients |-> {}]] + /\ cState = [c \in Clients |-> "NORMAL"] + /\ aSuccessful = {} + /\ arRecord = [s \in 1..NrShards |-> [r \in Shards[s] |-> {}]] + /\ aVisibility = [s \in 1..NrShards |-> [o \in IRMessageId |-> {}]] + /\ gViewChangesNo = [s \in 1..NrShards |-> 0] + + +\* IR instance per shard TODO: modify replica also +IR(s) == INSTANCE IR_consensus WITH AppClientFail <- TAPIRClientFail, + AppReplicaFail <- TAPIRReplicaFail, + OpBody <- TAPIROpBody, + ExecInconsistent <- TAPIRExecInconsistent, + ExecConsensus <- TAPIRExecConsensus, + Merge <- TAPIRMerge, + Sync <- TAPIRSync, + SuccessfulInconsistentOp <- TAPIRSuccessfulInconsistentOp, + SuccessfulConsensusOp <- TAPIRSuccessfulConsensusOp, + Decide <- TAPIRDecide, + Results <- TAPIRResults, + Replicas <- Shards[s], + Quorums <- Quorums[s], + SuperQuorums <- SuperQuorums[s], + S <- s + +\* TAPIR messages +Message == + [type: {"READ"}, + key: Nat, + dst: UNION Shards] + \cup + [type: {"READ-REPLY"}, + key: Nat, + val: Nat, + vs: Nat, \* version + dst: Clients] + \cup + [type: {"READ-VERSION"}, + key: Nat, + vs: Nat, + dst: UNION Shards] + \cup + [type: {"READ-VERSION-REPLY"}, + key: Nat, + vs: Nat, + dst: Clients] + +InitTAPIR == /\ cCrtTxn = [c \in Clients |-> <<>>] + /\ cClock = [c \in Clients |-> 0] + /\ rPreparedTxns = [s \in 1..NrShards |-> [r \in Shards[s] |-> {}]] + /\ rStore = [r \in UNION {Shards[i]: i \in 1..NrShards} |-> {}] + /\ rTxnsLogAborted = [s \in 1..NrShards |-> [r \in Shards[s] |-> {}]] + /\ rTxnsLogCommitted = [s \in 1..NrShards |-> [r \in Shards[s] |-> {}]] + /\ rClock = [s \in 1..NrShards |-> [r \in Shards[s] |-> 0]] + +Init == InitIR /\ InitTAPIR + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Tapir replica actions}} ^' *) +(***************************************************************************) +\*TAPIRReplicaReceiveRead(r) == TRUE + + +\*TAPIRReplicaAction(r) == +\* \/ /\ rState[r] = "NORMAL" +\* /\ \/ TAPIRReplicaReceiveRead(r) + + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Tapir client actions}} ^' *) +(***************************************************************************) + +TAPIRClientExecuteTxn(c) == + \* first, resolve all reads (read from any replica and get the vs) + \* then send prepares in all shard involved by seting the cCrtOp in the + \* respective IR shard instance + \* TODO: for now just simulate this, pick a transaction from + \* transaction pool, get some versions from the replica + \* stores + /\ cCrtTxn[c] = <<>> + /\ \E t \in Transactions: + LET rSet == {rse \in ReadSet: + /\ \E trse \in t.rSet : rse = trse + /\ LET + r == Max({r \in Shards[(rse.key % NrShards) + 1]: + \E se \in rStore[r]: rse.key = se.key}) + IN + /\ r /= 0 + /\ \E se \in rStore[r]: + /\ rse.key = se.key + /\ rse.val = se.latestVal + /\ rse.vs = se.latestVs + } + shards == {s \in 1..NrShards: + \/ \E trse \in t.rSet: s = (trse.key % NrShards) + 1 + \/ \E twse \in t.wSet: s = (twse.key % NrShards) + 1 } + IN + /\ Cardinality(rSet) = Cardinality(t.rSet) \* found all the reads + /\ cCrtTxn' = [cCrtTxn EXCEPT ![c] = [rSet |-> rSet, + wSet |-> t.wSet, + shards |-> shards]] + /\ UNCHANGED <<irReplicaVars, irClientVars, irOtherVars, irAppVars, + tapirReplicaVars, cClock>> + +TAPIRClientPrepareTxn(c) == + /\ cCrtTxn[c] /= <<>> + /\ \E s \in cCrtTxn[c].shards: \* prepare in shard s + \* - ok if already prepared + /\ IR(s)!ClientRequest(c, [type |-> "Consensus", + body |-> [opType |-> "Prepare", + txn |-> cCrtTxn[c]]]) + /\ UNCHANGED <<irReplicaVars, irAppVars, + cCrtOpReplies, + cCrtOpConfirms, + cCrtOpToFinalize, + gViewChangesNo, + cState, tapirClientVars, tapirReplicaVars>> + +TAPIRClientAction(c) == + \/ /\ cState[c] = "NORMAL" + /\ \/ TAPIRClientExecuteTxn(c) \* for now just simulate this + \* (don't send explicit READ messages) + \/ TAPIRClientPrepareTxn(c) + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{High-Level Actions}} ^' *) +(***************************************************************************) + +Next == + \/ \E c \in Clients: TAPIRClientAction(c) + \/ /\ \E s \in 1..NrShards: IR(s)!Next + /\ UNCHANGED <<tapirClientVars, tapirReplicaVars>> + \/ \* Avoid deadlock by termination + ((\A s \in 1..NrShards: + \A i \in 1..Cardinality(Shards[s]): + rLastView[s][i] = max_vc) /\ UNCHANGED <<vars>>) + +Inv == IR(1)!TypeOK /\ IR(1)!FaultTolerance + +============================================================================= +\* Modification History +\* Last modified Mon Aug 31 12:55:38 PDT 2015 by aaasz +\* Created Sat Jan 31 18:31:52 PST 2015 by aaasz diff --git a/proof/Test.tla b/proof/Test.tla new file mode 100644 index 0000000000000000000000000000000000000000..594eef96051df44ea5a7b4abf046c12db1123b3a --- /dev/null +++ b/proof/Test.tla @@ -0,0 +1,64 @@ +--------------------------------- MODULE Test --------------------------------- +(***************************************************************************) +(* This is a TLA+ specification of the Inconsistent Replication algorithm. *) +(* (And a mechanically-checked proof of its correctness using TLAPS) *) +(***************************************************************************) +EXTENDS Naturals, FiniteSets, TLC + +VARIABLES rViewReplies, recoveredOps + +OpType == {"Inconsistent", "Consensus"} +OpStatus == {"TENTATIVE", "FINALIZED"} +Operations == [type: OpType, body: Nat] + +TypeOK == + /\ rViewReplies \in SUBSET ([lv: Nat, + r: SUBSET ([msgid: Nat, + op: Operations, + res: Nat, + status: OpStatus] + \cup [msgid: Nat, + op: Operations, + status: OpStatus]), + src: Nat]) + +A == + rViewReplies + +B == + \* set of all records received in replies in A + UNION {x.r: x \in A} + +test_recoveredConensusOps_R == + \* any finalized consensus operation (in at least one record, in the maximum + \* latest view) + {x \in B: + /\ x.op.type = "Consensus" + /\ x.status = "FINALIZED" + /\ LET most_updated_reply == + CHOOSE reply \in A: + /\ \E rec \in reply.r: /\ rec.msgid = x.msgid + /\ rec.status = "FINALIZED" + /\ \A rep \in A: + IF \E rec \in rep.r: /\ rec.msgid = x.msgid + /\ rec.status = "FINALIZED" + THEN rep.lv <= reply.lv + ELSE TRUE + IN + x \in most_updated_reply.r} + +Init == + /\ rViewReplies = {[lv |-> 1, r |-> {[msgid |-> 1, op |-> [type |-> "Consensus", body |-> 1], res |-> 1, status |-> "FINALIZED"]}, src |-> 1], + [lv |-> 2, r |-> {[msgid |-> 1, op |-> [type |-> "Consensus", body |-> 1], res |-> 2, status |-> "FINALIZED"]}, src |-> 2], + [lv |-> 3, r |-> {[msgid |-> 1, op |-> [type |-> "Consensus", body |-> 1], res |-> 3, status |-> "FINALIZED"]}, src |-> 3]} + /\ recoveredOps = {} + +Next == + /\ recoveredOps' = test_recoveredConensusOps_R + /\ Assert(Cardinality(recoveredOps) = 0, "Should fail") + /\ UNCHANGED <<rViewReplies>> + +============================================================================= +\* Modification History +\* Last modified Fri Apr 24 14:34:42 PDT 2015 by aaasz +\* Created Fri Dec 12 17:42:14 PST 2014 by aaasz diff --git a/proof/VR.tla b/proof/VR.tla new file mode 100644 index 0000000000000000000000000000000000000000..943bcca36518171d366051fe23de2aad63255ff2 --- /dev/null +++ b/proof/VR.tla @@ -0,0 +1,763 @@ +------------------------ MODULE VR ------------------------------------------ +(***************************************************************************) +(* This is a TLA+ specification of the VR algorithm. *) +(***************************************************************************) +EXTENDS FiniteSets, Naturals, TLC, TLAPS + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Constants}} ^' *) +(***************************************************************************) + +(***************************************************************************) +(* Constant parameters: *) +(* Replicas: the set of all replicas (Replica IDs) *) +(* Clients: the set of all clients (Client IDs) *) +(* Quorums: the set of all quorums *) +(* SuperQuorums: the set of all super quorums *) +(* Results: the set of all possible result types *) +(* OperationBody: the set of all possible operation bodies *) +(* (with arguments, etc. - can be infinite) *) +(* S: shard id of the shard Replicas constitute *) +(* f: maximum number of failures allowed (half of n) *) +(* *) +(* Constants used to bound variables, for model checking (Nat is bounded) *) +(* max_vc: maximum number of View-Changes allowed for each replicas *) +(* max_req: maximum number of op requests performed by clients *) +(***************************************************************************) + +CONSTANTS Replicas, Clients, Quorums, Operations, f, max_req, max_vc, max_c + +ASSUME IsFiniteSet(Replicas) + +ASSUME QuorumAssumption == + /\ Quorums \subseteq SUBSET Replicas + /\ \A Q1, Q2 \in Quorums: Q1 \cap Q2 # {} + +(***************************************************************************) +(* The possible states of a replica and the two types of operations *) +(* currently defined by IR. *) +(***************************************************************************) + +ReplicaState == {"NORMAL", "FAILED", "RECOVERING", "VIEW-CHANGING"} +ClientState == {"NORMAL", "FAILED"} + +(***************************************************************************) +(* Definition of operation space *) +(***************************************************************************) + +Operation == [op: Nat, + c: Nat, + s: Nat] \cup {<<>>} + +(***************************************************************************) +(* Message is defined to be the set of all possible messages *) +(***************************************************************************) + +\* TODO: Assumptions +\* Assume unique message ids +\* Assume no more than f replica failures +Message == + [type: {"REQUEST"}, + op: Nat, + c: Clients, + s: Nat] + \cup [type: {"REPLY"}, + v: Nat, + s: Nat, + x: Nat, + dst: Clients] + \cup + [type: {"PREPARE"}, + v: Nat, + m: Operation, + n: Nat, + k: Nat] + \cup + [type: {"PREPARE-OK"}, + v: Nat, + n: Nat, + src: Replicas] + \cup + [type: {"COMMIT"}, + v: Nat, + k: Nat] + \cup + [type: {"START-VIEW-CHANGE"}, + v: Nat, + src: Replicas] + \cup + [type: {"DO-VIEW-CHANGE"}, + v: Nat, + l: [1..max_req -> Operation], + vp: Nat, + n: Nat, + k: Nat, + src: Replicas, + dst: Replicas] + \cup + [type: {"START-VIEW"}, + v: Nat, + l: [1..max_req -> Operation], + n: Nat, + k: Nat, + src: Replicas] + \cup + [type: {"RECOVERY"}, + x: Nat, \* nonce + src: Replicas] + \cup + [type: {"RECOVERY-RESPONSE"}, + v: Nat, + x: Nat, + l: [1..max_req -> Operation], + n: Nat, + k: Nat, + dst: Replicas, + src: Replicas] + \cup + [type: {"STATE-REQUEST"}, + v: Nat, + n: Nat, + src: Replicas] + \cup + [type: {"STATE-RESPONSE"}, + v: Nat, + l: [1..max_req -> Operation], + n: Nat, + k: Nat, + src: Replicas] + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Variables and State Predicates}} ^' *) +(***************************************************************************) + +(***************************************************************************) +(* Variables: *) +(* 1. State at each replica: *) +(* rStatus = Denotes current replica state. Either: *) +(* - NORMAL (processing operations) *) +(* - VIEW-CHANGING (participating in recovery) *) +(* - RECOVERING (recovering replica) *) +(* rLog = operations log and their results *) +(* rViewNr = current view number *) +(* rOpNr = assigned to the most recently received request *) +(* rCommitNr = the OpNr of the most recently commited op *) +(* rClientTable = for each client, the number of its most recent*) +(* request *) +(* 2. State of communication medium: *) +(* sentMsg = sent (but not yet received) messages *) +(* 3. State at client: *) +(* cCrtOp = crt operation requested by the client *) +(* cReqNr = crt request number *) +(* *) +(***************************************************************************) + +VARIABLES rStatus, rLog, rViewNr, rOpNr, rLastView, rReplies, + rClientTable, rCommitNr, rNonce, rCrtOp, + sentMsg, + cCrtOp, cReqNr, cStatus, + aSuccessful, + gViewChangesNo, + gCrashesNo + +(***************************************************************************) +(* Defining these tuples makes it easier to express which varibles remain *) +(* unchanged *) +(***************************************************************************) + +\* Replica variables. +rVars == <<rStatus, rLog, rViewNr, rOpNr, rLastView, + rClientTable, rCommitNr, rReplies, rNonce, rCrtOp>> +\* Client variables. +cVars == <<cCrtOp, \* current operation at a client + cReqNr, + cStatus>> +\* Application variables +aVars == <<aSuccessful>> \* we'll use them to write invariants +\* Other variables. +oVars == <<sentMsg, gViewChangesNo, gCrashesNo>> +\* All variables. +vars == <<rVars, cVars, aVars, oVars>> + +TypeOK == + /\ rStatus \in [Replicas -> ReplicaState] + /\ rLog \in [Replicas -> [1..max_req -> Operation]] + /\ rViewNr \in [Replicas -> Nat] + /\ rOpNr \in [Replicas -> Nat] + /\ rCommitNr \in [Replicas -> Nat] + /\ rLastView \in [Replicas -> Nat] + /\ rReplies \in [Replicas -> SUBSET ([type: {"prepare-ok"}, + v: Nat, + n: Nat, + src: Replicas] + \cup [type: {"start-view-change"}, + v: Nat, + src: Replicas] + \cup [type: {"do-view-change"}, + v: Nat, + l: [1..max_req -> Operation], + vp: Nat, + n: Nat, + k: Nat, + src: Replicas] + \cup [type: {"recovery-response"}, + v: Nat, + x: Nat, + l: [1..max_req -> Operation], + n: Nat, + k: Nat, + src: Replicas])] + /\ rNonce \in [Replicas -> Nat] + /\ rClientTable \in [Replicas -> [Clients -> Nat]] + /\ rCrtOp \in [Replicas -> Operation] + /\ sentMsg \in SUBSET Message + /\ cCrtOp \in [Clients -> Operation] + /\ cReqNr \in [Clients -> Nat] + /\ cStatus \in [Clients -> ClientState] + /\ aSuccessful \in SUBSET Operation + /\ gViewChangesNo \in Nat + /\ gCrashesNo \in Nat + +Init == + /\ rStatus = [r \in Replicas |-> "NORMAL"] + /\ rLog = [r \in Replicas |-> [i \in 1..max_req |-> <<>>]] + /\ rViewNr = [r \in Replicas |-> 0] + /\ rLastView = [r \in Replicas |-> 0] + /\ rOpNr = [r \in Replicas |-> 0] + /\ rCommitNr = [r \in Replicas |-> 0] + /\ rReplies = [r \in Replicas |-> {}] + /\ rNonce = [r \in Replicas |-> 0] + /\ rClientTable = [r \in Replicas |-> [c \in Clients |-> 0]] + /\ rCrtOp = [r \in Replicas |-> <<>>] + /\ sentMsg = {} + /\ cCrtOp = [c \in Clients |-> <<>>] + /\ cReqNr = [c \in Clients |-> 0] + /\ cStatus = [c \in Clients |-> "NORMAL"] + /\ aSuccessful = {} + /\ gViewChangesNo = 0 + /\ gCrashesNo = 0 + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Actions}} ^' *) +(***************************************************************************) + +Send(m) == sentMsg' = sentMsg \cup {m} +AmLeader(r, v) == r = (v % Cardinality(Replicas)) + 1 +IsLeader(r, v) == AmLeader(r, v) +LeaderOf(v) == CHOOSE x \in Replicas: IsLeader(x, v) +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Client Actions}} ^' *) +(***************************************************************************) + +\* Note: CHOOSE does not introduce nondeterminism (the same value is chosen +\* each time) + +\*Client sends a request +ClientRequest(c) == + \E op \in Operations: + /\ cCrtOp[c] = <<>> \* the client is not waiting for a result + \* of another operation + /\ cReqNr' = [cReqNr EXCEPT ![c] = @ + 1] + /\ cCrtOp' = [cCrtOp EXCEPT ![c] = [op |-> op, + c |-> c, + s |-> cReqNr[c] + 1]] + /\ Send([type |-> "REQUEST", + op |-> op, + c |-> c, + s |-> cReqNr[c] + 1]) + /\ UNCHANGED <<cStatus, rVars, aVars, gViewChangesNo, gCrashesNo>> + /\ cReqNr[c] < max_req \* BOUND the number of requests a client can make + +\*Client received a reply +ClientReceiveReply(c) == + \E msg \in sentMsg: + /\ msg.type = "REPLY" + /\ msg.s = cReqNr[c] \* reply to c's request for crt op + /\ cCrtOp[c] /= <<>> + /\ cCrtOp' = [cCrtOp EXCEPT ![c] = <<>>] + /\ aSuccessful' = aSuccessful \cup {cCrtOp[c]} + /\ UNCHANGED <<cReqNr, cStatus, rVars, oVars>> + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{Replica Actions}} ^' *) +(***************************************************************************) + +(***************************************************************************) +(* Normal Operation protocol *) +(***************************************************************************) + +\* Replica sends prepares +ReplicaReceiveRequest(r) == + /\ r = 1 + /\ AmLeader(r, rViewNr[r]) + /\ \A replica \in {3}: /\ rViewNr[replica] = 0 /\ rStatus[replica] = "NORMAL"\* PRUNE send request only after view-change + /\ \E reply \in rReplies[2]: reply.type="do-view-change" /\ reply.src = replica + /\ rCrtOp[r] = <<>> + /\ \E msg \in sentMsg: + /\ msg.type = "REQUEST" + /\ msg.s > rClientTable[r][msg.c] + /\ LET operation == [op |-> msg.op, + c |-> msg.c, + s |-> msg.s] + IN + /\ rCrtOp' = [rCrtOp EXCEPT ![r] = operation] + /\ rLog' = [rLog EXCEPT ![r][rOpNr[r] + 1] = operation] + /\ Send([type |-> "PREPARE", + v |-> rViewNr[r], + m |-> operation, + n |-> rOpNr[r] + 1, + k |-> rCommitNr[r]]) + /\ rClientTable' = [rClientTable EXCEPT ![r][msg.c] = msg.s] + /\ rOpNr' = [rOpNr EXCEPT ![r] = @ + 1] + /\ UNCHANGED <<rStatus, rLastView, rReplies, + rNonce, rViewNr, rCommitNr, cVars, aVars, gViewChangesNo, gCrashesNo>> + +\* Replica receives a prepare request from the leader +ReplicaReceivePrepare(r) == + /\ r \in {1,3} \* PRUNE + /\ \E msg \in sentMsg: + /\ msg.type = "PREPARE" + /\ \/ /\ msg.v > rViewNr[r] \/ msg.n > rOpNr[r] + 1 \* Need state transfer + \* (miss a prepare or in a lower view) + /\ Send([type |-> "STATE-REQUEST", + v |-> rViewNr[r], + n |-> rOpNr[r], + src |-> r]) + /\ UNCHANGED <<rVars, cVars, aVars, gViewChangesNo>> + \/ /\ msg.v = rViewNr[r] /\ \/ msg.n = rOpNr[r] + 1 + \/ msg.n = rOpNr[r] + /\ rLog' = [rLog EXCEPT ![r][msg.n] = msg.m] + /\ rOpNr' = [rOpNr EXCEPT ![r] = msg.n] + /\ rClientTable' = [rClientTable EXCEPT ![r][msg.m.c] = msg.m.s] + /\ Send([type |-> "PREPARE-OK", + v |-> rViewNr[r], + n |-> msg.n, + src |-> r]) + /\ UNCHANGED <<rCrtOp, rStatus, rViewNr, rLastView, + rReplies, rCommitNr, + rNonce, cVars, aVars, gViewChangesNo, gCrashesNo>> + +ReplicaReceiveStateRequest(r) == + \E msg \in sentMsg: + /\ msg.type = "STATE-REQUEST" + /\ msg.v < rViewNr[r] /\ msg.n < rOpNr[r] + /\ Send([type |-> "STATE-RESPONSE", + v |-> rViewNr[r], + l |-> rLog[r], + n |-> rOpNr[r], + k |-> rCommitNr[r], + src |-> r]) + /\ UNCHANGED <<rVars, cVars, aVars, gViewChangesNo, gCrashesNo>> + +ReplicaReceiveStateResponse(r) == + \E msg \in sentMsg: \* TODO: some more cases to check to merge + \* these - for now don't need, just one operation req + /\ msg.type = "STATE-RESPONSE" + /\ \/ msg.v = rViewNr[r] /\ msg.n > rOpNr[r] + \/ msg.v > rViewNr[r] + /\ rViewNr' = [rViewNr EXCEPT ![r] = msg.v] + /\ rLog' = [rLog EXCEPT ![r] = msg.l] + /\ rOpNr' = [rOpNr EXCEPT ![r] = msg.n] + /\ UNCHANGED <<rClientTable, rCrtOp, rCommitNr, + rStatus, rLastView, rReplies, + rNonce, cVars, aVars, oVars>> + +\* Leader receives a prepare ok +ReplicaReceivePrepareOK(r) == + /\ \E msg \in sentMsg: + /\ msg.type = "PREPARE-OK" + /\ msg.v = rViewNr[r] + /\ msg.n = rOpNr[r] + /\ AmLeader(r, msg.v) + /\ rReplies' = [rReplies EXCEPT ![r] = @ \cup + {[type |-> "prepare-ok", + v |-> msg.v, + n |-> msg.n, + src |-> msg.src + ]}] + /\ UNCHANGED <<rClientTable, rCrtOp, rOpNr, rCommitNr, + rStatus, rLastView, rLog, rViewNr, + rNonce, cVars, aVars, oVars>> + + +\* Leader received enough replies +ReplicaSendReply(r) == + /\ AmLeader(r, rViewNr[r]) + /\ \E Q \in Quorums: + /\ \A rep \in Q: \E reply \in rReplies[r]: + /\ reply.src = rep + /\ reply.type = "prepare-ok" + /\ reply.v = rViewNr[r] + /\ reply.n = rOpNr[r] + /\ Send([type |-> "REPLY", + v |-> rViewNr[r], + s |-> rLog[r][rOpNr[r]].s, + x |-> 1, + dst |-> rLog[r][rOpNr[r]].c]) + /\ UNCHANGED <<rVars, cVars, aVars, gViewChangesNo, gCrashesNo>> + +(***************************************************************************) +(* View-change protocol *) +(***************************************************************************) +\* A replica starts a view change procedure +ReplicaSendStartViewChange(r) == + /\ r \in {2, 3} \* PRUNE just replicas 2 and 3 start a view change + /\ rViewNr' = [rViewNr EXCEPT ![r] = @ + 1] + /\ Send([type |-> "START-VIEW-CHANGE", + v |-> rViewNr[r] + 1, + src |-> r]) + /\ rStatus' = [rStatus EXCEPT ![r] = "VIEW-CHANGING"] + /\ UNCHANGED <<rReplies, rNonce, rClientTable, + rCrtOp, rOpNr, rCommitNr, + rLastView, rLog, cVars, aVars, gCrashesNo>> + /\ gViewChangesNo < max_vc \* BOUND on number of view changes + /\ gViewChangesNo' = gViewChangesNo + 1 + +\* A replica received a message to start view change +ReplicaReceiveStartViewChange(r) == + /\ r \in {2, 3} \* PRUNE just these paricipate in a view change + /\ \E r1 \in {2, 3}: + rStatus[r1] = "NORMAL" \* PRUNE + /\ \E msg \in sentMsg: + /\ msg.type = "START-VIEW-CHANGE" + /\ \/ /\ rStatus[r] = "NORMAL" + /\ msg.v > rViewNr[r] + /\ rStatus' = [rStatus EXCEPT ![r] = "VIEW-CHANGING"] + \/ /\ rStatus[r] = "VIEW-CHANGING" + /\ msg.v >= rViewNr[r] + /\ UNCHANGED <<rStatus>> + /\ rReplies' = [rReplies EXCEPT ![r] = @ \cup + {[type |-> "start-view-change", + v |-> msg.v, + src |-> msg.src]}] + /\ rViewNr' = [rViewNr EXCEPT ![r] = msg.v] + /\ Send([type |-> "START-VIEW-CHANGE", + v |-> msg.v, + src |-> r]) + /\ UNCHANGED <<rClientTable, rCrtOp, rOpNr, rCommitNr, + rNonce, rLog, rLastView, cVars, + aVars, gViewChangesNo, gCrashesNo>> + +\* We received enough view change replies to be able to +\* send a do-view-change +ReplicaSendDoViewChange(r) == + /\ r \in {2,3} \* PRUNE just these participate in a view change + /\ \E Q \in Quorums: + /\ r \in Q + /\ \A rep \in Q: rep = r \/ \E reply \in rReplies[r]: + /\ reply.src = rep + /\ reply.type = "start-view-change" + /\ reply.v = rViewNr[r] + /\ Send([type |-> "DO-VIEW-CHANGE", + v |-> rViewNr[r], + l |-> rLog[r], + vp |-> rLastView[r], + n |-> rOpNr[r], + k |-> rCommitNr[r], + src |-> r, + dst |-> LeaderOf(rViewNr[r])]) + /\ UNCHANGED <<rVars, cVars, aVars, gViewChangesNo, gCrashesNo>> + +\* Replica received DO-VIEW-CHANGE message +ReplicaReceiveDoViewChange(r) == + /\ r = 2 \* PRUNE just the new leader switches view + \* /\ \A rr \in {2,3}: rStatus[rr] = "NORMAL" /\ rNonce[rr] > 0 /\ rViewNr[rr] = 0 + /\ \E msg \in sentMsg: + /\ msg.type = "DO-VIEW-CHANGE" + /\ msg.dst = r + /\ \/ /\ rStatus[r] = "NORMAL" + /\ msg.v > rViewNr[r] + \/ /\ rStatus[r] = "VIEW-CHANGING" + /\ msg.v >= rViewNr[r] + /\ rStatus' = [rStatus EXCEPT ![r] = "VIEW-CHANGING"] + \* keep only the one with the higher view (v) + /\ LET + existingRecord == {x \in rReplies[r]: + /\ x.type = "do-view-change" + /\ x.src = msg.src} \* should only be one item in set + IN + IF \A x \in existingRecord: x.v < msg.v + THEN rReplies' = [rReplies EXCEPT ![r] = + (@ \ existingRecord) \cup + {[type |-> "do-view-change", + v |-> msg.v, + l |-> msg.l, + vp |-> msg.vp, + n |-> msg.n, + k |-> msg.k, + src |-> msg.src]}] + ELSE FALSE + /\ UNCHANGED <<cVars, rClientTable, rCrtOp, rOpNr, rCommitNr, + rViewNr, rLastView, rLog, rNonce, aVars, oVars>> + +\* Note: Assume one reply for view change per replica in Q +\* (in ReplicaReceiveDoViewChange we keep only the most recent reply) +ReplicaSendStartView(r) == + /\ \E Q \in Quorums: + /\ r \in Q + /\ \A r1 \in Q: + /\ \A r2 \in Q: \E rr, pr \in rReplies[r]: + /\ rr.type = "do-view-change" + /\ pr.type = "do-view-change" + /\ rr.src = r1 + /\ pr.src = r2 + /\ rr.v = pr.v + \*/\ rr.v > rLastView[r] + /\ AmLeader(r, rr.v) + \* received at a least a quorum of replies + /\ LET + A == + \* set of all do-view-change replies from Q + {x \in rReplies[r]: /\ x.src \in Q + /\ x.type = "do-view-change"} + + B == + \* set of all do-view-change replies in A that have the biggest vp + {x \in A: \A rep \in A: rep.vp <= x.vp} + + replyWithMostCompleteLog == + \* if multiple replies in B, choose the one with the largest op number + CHOOSE x \in B: \A rep \in B: rep.n <= x.n + + ops(c) == + {x \in Operation: + \E i \in 1..max_req: + /\ replyWithMostCompleteLog.l[i] = x + /\ x /= <<>> + /\ x.c = c} + + v_new == + \* the one decided by quorum Q + CHOOSE v \in {x.v: x \in A}: TRUE + IN + /\ rLog' = [rLog EXCEPT ![r] = replyWithMostCompleteLog.l] + /\ Send([type |-> "START-VIEW", + v |-> v_new, + l |-> replyWithMostCompleteLog.l, + n |-> replyWithMostCompleteLog.n, + k |-> replyWithMostCompleteLog.k, + src |-> r]) + /\ rOpNr' = [rOpNr EXCEPT ![r] = replyWithMostCompleteLog.n] + /\ rCommitNr' = [rCommitNr EXCEPT ![r] = replyWithMostCompleteLog.k] + /\ rViewNr' = [rViewNr EXCEPT ![r] = v_new] + /\ rLastView' = [rLastView EXCEPT ![r] = v_new] + \* Update client table based on the latest op commited from each client + /\ rClientTable' = [rClientTable EXCEPT ![r] = [c \in Clients |-> + IF ops(c) /= {} + THEN CHOOSE s \in {op.s: op \in ops(c)}: + \A op \in ops(c): op.s <= s + ELSE 0]] + /\ rCrtOp' = [rCrtOp EXCEPT ![r] = <<>>] + /\ rStatus' = [rStatus EXCEPT ![r] = "NORMAL"] + /\ rReplies' = [rReplies EXCEPT ![r] = {}] + /\ UNCHANGED <<rNonce, cVars, aVars, gViewChangesNo, gCrashesNo>> + +\*A replica receives a start view message +ReplicaReceiveStartView(r) == + /\ aSuccessful /= {} \* PRUNE possible paths + /\ \E msg \in sentMsg: + /\ msg.type = "START-VIEW" + /\ \/ /\ rStatus[r] = "NORMAL" + /\ msg.v > rViewNr[r] + \/ /\ \/ rStatus[r] = "VIEW-CHANGING" + \/ rStatus[r] = "RECOVERING" + /\ msg.v >= rViewNr[r] + /\ rViewNr' = [rViewNr EXCEPT ![r] = msg.v] + /\ rLastView' = [rLastView EXCEPT ![r] = msg.v] + /\ rLog' = [rLog EXCEPT ![r] = msg.l] + /\ LET ops(c) == + {x \in Operation: + \E i \in 1..max_req: + /\ msg.l[i] = x + /\ x /= <<>> + /\ x.c = c} + IN + rClientTable' = [rClientTable EXCEPT ![r] = [c \in Clients |-> + IF ops(c) /= {} + THEN CHOOSE s \in {op.s: op \in ops(c)}: + \A op \in ops(c): op.s <= s + ELSE 0]] + /\ rStatus' = [rStatus EXCEPT ![r] = "NORMAL"] + /\ rReplies' = [rReplies EXCEPT ![r] = {}] + /\ UNCHANGED <<rCrtOp, rOpNr, rCommitNr, + rNonce, cVars, aVars, oVars>> + +(***************************************************************************) +(* Recovery protocol *) +(***************************************************************************) +\*A replica fails and looses everything +ReplicaFail(r) == + /\ r \in {2,3} \* PRUNE just these are allowed to fail + /\ rNonce[r] < 3 \* BOUND + /\ gCrashesNo < max_c \* BOUND + /\ gCrashesNo' = gCrashesNo + 1 + \*/\ \E msg \in sentMsg: msg.type = "START-VIEW-CHANGE" /\ msg.src = r \* PRUNE + /\ rStatus' = [rStatus EXCEPT ![r] = "FAILED"] + /\ rViewNr' = [rViewNr EXCEPT ![r] = 0] + /\ rReplies' = [rReplies EXCEPT ![r] = {}] + /\ rCrtOp' = [rCrtOp EXCEPT ![r] = <<>>] + /\ rCommitNr' = [rCommitNr EXCEPT ![r] = 0] + /\ rOpNr' = [rOpNr EXCEPT ![r] = 0] + /\ rClientTable' = [rClientTable EXCEPT![r] = [c \in Clients |-> 0]] + /\ rLog' = [rLog EXCEPT![r] = [i \in 1..max_req |-> <<>>]] + /\ UNCHANGED <<rLastView, rNonce, cVars, aSuccessful, sentMsg, gViewChangesNo>> + /\ \* We assume less than f replicas fail simultaneously + Cardinality({re \in Replicas: + \/ rStatus[re] = "FAILED" + \/ rStatus[re] = "RECOVERING"}) < f + +\* Recovery protocol +ReplicaRecover(r) == + /\ Send([type |-> "RECOVERY", + x |-> rNonce[r] + 1, + src |-> r]) + /\ rStatus' = [rStatus EXCEPT ![r] = "RECOVERING"] + /\ rNonce' = [rNonce EXCEPT ![r] = @ + 1] + /\ UNCHANGED <<rLog, rViewNr, rReplies, + rClientTable, rCrtOp, + rLastView, rCommitNr, + rOpNr, + cVars, aVars, + gViewChangesNo, gCrashesNo>> + +ReplicaSendRecoveryResponse(r) == + /\ rViewNr[r] = 0 + /\ \E msg \in sentMsg: + /\ msg.type = "RECOVERY" + \* TODO: send nil if not leader, + \* does not really matter for our purposes + \* right now + /\ Send([type |-> "RECOVERY-RESPONSE", + v |-> rViewNr[r], + x |-> msg.x, + l |-> rLog[r], + n |-> rOpNr[r], + k |-> rCommitNr[r], + src |-> r, + dst |-> msg.src]) + /\ UNCHANGED <<rVars, cVars, aVars, gViewChangesNo, gCrashesNo>> + +ReplicaReceiveRecoveryResponse(r) == + \E msg \in sentMsg: + /\ msg.type = "RECOVERY-RESPONSE" + /\ msg.x = rNonce[r] + /\ msg.dst = r + /\ rReplies' = [rReplies EXCEPT ![r] = @ \cup + {[type |-> "recovery-response", + v |-> msg.v, + x |-> msg.x, + l |-> msg.l, + n |-> msg.n, + k |-> msg.k, + src |-> msg.src]}] + /\ UNCHANGED <<rStatus, rLastView, rLog, rViewNr, + rClientTable, rCrtOp, rOpNr, rCommitNr, + rNonce, cVars, aVars, oVars>> + + +ReplicaFinishRecovery(r) == + /\ \E Q \in Quorums: + /\ \A rep \in Q: \E reply \in rReplies[r]: + /\ reply.type = "recovery-response" + /\ reply.src = rep + /\ reply.x = rNonce[r] + \* received at a least a quorum of replies + /\ LET + A == + \* set of all recovery-response replies from Q + {x \in rReplies[r]: /\ x.src \in Q + /\ x.type = "recovery-response" + /\ x.x = rNonce[r]} + + B == + \* set of all recovery-response replies in A from the biggest view + {x \in A: \A rep \in A: rep.v <= x.v} + + leaderReply == + \* reply from leader + IF ~ \E x \in B: IsLeader(x.src, x.v) + THEN <<>> + ELSE CHOOSE x \in B: IsLeader(x.src, x.v) + + IN + /\ leaderReply /= <<>> + /\ Assert(leaderReply.v = 0, "Assert that we recover just in view 0") + /\ rLog' = [rLog EXCEPT ![r] = leaderReply.l] + /\ rOpNr' = [rOpNr EXCEPT ![r] = leaderReply.n] + /\ rViewNr' = [rViewNr EXCEPT ![r] = leaderReply.v] + /\ rLastView' = [rLastView EXCEPT ![r] = leaderReply.v] + \* TODO: rClientTable + /\ rStatus' = [rStatus EXCEPT ![r] = "NORMAL"] + /\ rReplies' = [rReplies EXCEPT ![r] = {}] + /\ UNCHANGED <<rNonce, cVars, aVars, gViewChangesNo, gCrashesNo, sentMsg, rClientTable, + rCrtOp, rCommitNr>> + +----------------------------------------------------------------------------- +(***************************************************************************) +(* `^ \center \large{\textbf{High-Level Actions}} ^' *) +(***************************************************************************) + +ClientAction(c) == + \/ /\ cStatus[c] = "NORMAL" + /\ \/ ClientRequest(c) \* some client tries to replicate commit an operation + \/ ClientReceiveReply(c) \* some client receives a reply from a replica + \*\/ ClientFail(c) \* some client fails + \/ /\ cStatus[c] = "FAILED" + \*/\ \/ ClientRecover(c) + +ReplicaAction(r) == + \/ /\ rStatus[r] = "NORMAL" + /\ \/ ReplicaReceiveRequest(r) + \/ ReplicaSendReply(r) + \/ ReplicaReceivePrepare(r) + \/ ReplicaReceivePrepareOK(r) + \/ ReplicaSendStartViewChange(r) + \/ ReplicaReceiveStartViewChange(r) + \/ ReplicaSendDoViewChange(r) + \/ ReplicaReceiveDoViewChange(r) + \/ ReplicaSendStartView(r) + \/ ReplicaReceiveStartView(r) + \/ ReplicaSendRecoveryResponse(r) + \/ ReplicaReceiveStateRequest(r) + \/ ReplicaReceiveStateResponse(r) + \*\/ ReplicaFail(r) \* some replica fails + \/ /\ rStatus[r] = "FAILED" + /\ \/ ReplicaRecover(r) \* start view-change protocol + \/ /\ rStatus[r] = "RECOVERING" + /\ \/ ReplicaReceiveRecoveryResponse(r) \* Replica received a + \* recovery response + \/ ReplicaFinishRecovery(r) + \/ /\ rStatus[r] = "VIEW-CHANGING" + /\ \/ ReplicaReceiveStartViewChange(r) + \/ ReplicaSendDoViewChange(r) + \/ ReplicaReceiveDoViewChange(r) + \/ ReplicaSendStartView(r) + \/ ReplicaReceiveStartView(r) + \/ ReplicaFail(r) + +Next == + \/ \E c \in Clients: ClientAction(c) + \/ \E r \in Replicas: ReplicaAction(r) + \/ \* Avoid deadlock by termination + (\A i \in 1..Cardinality(Replicas): rLastView[i] = max_vc) /\ UNCHANGED <<vars>> + +Spec == Init /\ [] [Next]_vars + +FaultTolerance == + /\ \A successfulOp \in aSuccessful, Q \in Quorums: + (\A r \in Q: rStatus[r] = "NORMAL" \/ rStatus[r] = "VIEW-CHANGING") + => (\E r \in Q: \E i \in 1..max_req: rLog[r][i] = successfulOp) + +Inv == FaultTolerance + +Inv2 == aSuccessful = {} + +============================================================================= +\* Modification History +\* Last modified Sat Aug 01 13:57:28 PDT 2015 by aaasz +\* Created Fri Dec 12 17:42:14 PST 2014 by aaasz diff --git a/proof/VR_MC_constants b/proof/VR_MC_constants new file mode 100644 index 0000000000000000000000000000000000000000..8fbe813f0b62ec24e224a374b5928a16c6c4969b --- /dev/null +++ b/proof/VR_MC_constants @@ -0,0 +1,8 @@ +Quorums <- {{1,2}, {1,3}, {2,3}} +Clients <- {1} +max_req <- 1 +f <- 1 +max_vc <- 3 +Operations <- {1} +Replicas <- {1,2,3} +max_c <- 3 diff --git a/replication/ir/client.cc b/replication/ir/client.cc index 7825794b19f6455da7e61465780ecbbc1780eaf1..6e38ae72704565af2baed14830aff1440c81caa6 100644 --- a/replication/ir/client.cc +++ b/replication/ir/client.cc @@ -80,10 +80,10 @@ IRClient::InvokeInconsistent(const string &request, }); PendingInconsistentRequest *req = new PendingInconsistentRequest(request, - reqId, - continuation, - timer, - config.QuorumSize()); + reqId, + continuation, + timer, + config.QuorumSize()); pendingReqs[reqId] = req; SendInconsistent(req); } @@ -100,8 +100,8 @@ IRClient::SendInconsistent(const PendingInconsistentRequest *req) req->timer->Reset(); } else { Warning("Could not send inconsistent request to replicas"); - pendingReqs.erase(req->clientReqId); - delete req; + pendingReqs.erase(req->clientReqId); + delete req; } } @@ -313,40 +313,38 @@ IRClient::HandleInconsistentReply(const TransportAddress &remote, return; } - PendingInconsistentRequest *req = static_cast<PendingInconsistentRequest *>(it->second); + PendingInconsistentRequest *req = + static_cast<PendingInconsistentRequest *>(it->second); // Make sure the dynamic cast worked ASSERT(req != NULL); - Debug("Client received reply: %lu %i", reqId, req->inconsistentReplyQuorum.NumRequired()); + Debug("Client received reply: %lu %i", reqId, + req->inconsistentReplyQuorum.NumRequired()); // Record replies viewstamp_t vs = { msg.view(), reqId }; if (req->inconsistentReplyQuorum.AddAndCheckForQuorum(vs, msg.replicaidx(), msg)) { // If all quorum received, then send finalize and return to client - - req->timer->Stop(); - delete req->timer; - req->timer = new Timeout(transport, 500, [this, reqId]() { - ResendConfirmation(reqId, false); - }); - - // asynchronously send the finalize message - proto::FinalizeInconsistentMessage response; - *(response.mutable_opid()) = msg.opid(); - - if (transport->SendMessageToAll(this, response)) { - req->timer->Start(); - } else { - Warning("Could not send finalize message to replicas"); - pendingReqs.erase(it); - delete req; - return; - } - // Return to client if (!req->continuationInvoked) { + req->timer->Stop(); + delete req->timer; + req->timer = new Timeout(transport, 500, [this, reqId]() { + ResendConfirmation(reqId, false); + }); + + // asynchronously send the finalize message + proto::FinalizeInconsistentMessage response; + *(response.mutable_opid()) = msg.opid(); + + if (transport->SendMessageToAll(this, response)) { + req->timer->Start(); + } else { + Warning("Could not send finalize message to replicas"); + } + req->continuation(req->request, ""); - req->continuationInvoked = true; + req->continuationInvoked = true; } } } @@ -427,7 +425,7 @@ IRClient::HandleConfirm(const TransportAddress &remote, uint64_t reqId = msg.opid().clientreqid(); auto it = pendingReqs.find(reqId); if (it == pendingReqs.end()) { - Debug("Received reply when no request was pending"); + // ignore, we weren't waiting for the confirmation return; } @@ -436,12 +434,12 @@ IRClient::HandleConfirm(const TransportAddress &remote, viewstamp_t vs = { msg.view(), reqId }; if (req->confirmQuorum.AddAndCheckForQuorum(vs, msg.replicaidx(), msg)) { req->timer->Stop(); - pendingReqs.erase(it); - if (!req->continuationInvoked) { - // Return to client - PendingConsensusRequest *r2 = static_cast<PendingConsensusRequest *>(req); - r2->continuation(r2->request, r2->decideResult); - } + pendingReqs.erase(it); + if (!req->continuationInvoked) { + // Return to client + PendingConsensusRequest *r2 = static_cast<PendingConsensusRequest *>(req); + r2->continuation(r2->request, r2->decideResult); + } delete req; } } diff --git a/replication/ir/replica.cc b/replication/ir/replica.cc index 411669232b2bb11ae141f490db37877225efa110..0d41208dc7cf85d303025e4eb6fa31803421f650 100644 --- a/replication/ir/replica.cc +++ b/replication/ir/replica.cc @@ -113,7 +113,7 @@ IRReplica::HandleFinalizeInconsistent(const TransportAddress &remote, // Check record for the request RecordEntry *entry = record.Find(opid); - if (entry != NULL) { + if (entry != NULL && entry->state == RECORD_STATE_TENTATIVE) { // Mark entry as finalized record.SetStatus(opid, RECORD_STATE_FINALIZED); diff --git a/replication/vr/replica.cc b/replication/vr/replica.cc index 09ecf24d6f7d44157f6950d3df31b28e23d757fe..67d11d14642b53da2e1d6d74086bcd97301d9060 100644 --- a/replication/vr/replica.cc +++ b/replication/vr/replica.cc @@ -513,7 +513,8 @@ VRReplica::HandleUnloggedRequest(const TransportAddress &remote, Debug("Received unlogged request %s", (char *)msg.req().op().c_str()); ExecuteUnlogged(msg.req(), reply); - + reply.set_clientreqid(msg.req().clientreqid()); + if (!(transport->SendMessage(this, remote, reply))) Warning("Failed to send reply message"); } diff --git a/store/common/backend/kvstore.h b/store/common/backend/kvstore.h index 9bdae9e1f605d68f2e0dbb9f2370a8cfa1c7fe09..225793c80e4b73ec484651f6b37859742a109a9f 100644 --- a/store/common/backend/kvstore.h +++ b/store/common/backend/kvstore.h @@ -33,7 +33,7 @@ #include "lib/assert.h" #include "lib/message.h" - +#include <string> #include <unordered_map> #include <list> diff --git a/store/common/backend/tests/Rules.mk b/store/common/backend/tests/Rules.mk index e53ac75d272b9ecf30fa25939f9687c9289aff35..2219be16c8a9610dc81aa27462186655b7d35e2b 100644 --- a/store/common/backend/tests/Rules.mk +++ b/store/common/backend/tests/Rules.mk @@ -8,14 +8,14 @@ GTEST_SRCS += $(addprefix $(d), \ versionstore-test.cc \ lockserver-test.cc) -$(d)kvstore-test: $(o)kvstore-test.o $(LIB-transport) $(LIB-common) $(LIB-backend) $(GTEST_MAIN) +$(d)kvstore-test: $(o)kvstore-test.o $(LIB-transport) $(LIB-store-common) $(LIB-store-backend) $(GTEST_MAIN) TEST_BINS += $(d)kvstore-test -$(d)versionstore-test: $(o)versionstore-test.o $(LIB-transport) $(LIB-common) $(LIB-backend) $(GTEST_MAIN) +$(d)versionstore-test: $(o)versionstore-test.o $(LIB-transport) $(LIB-store-common) $(LIB-store-backend) $(GTEST_MAIN) TEST_BINS += $(d)versionstore-test -$(d)lockserver-test: $(o)lockserver-test.o $(LIB-transport) $(LIB-common) $(LIB-backend) $(GTEST_MAIN) +$(d)lockserver-test: $(o)lockserver-test.o $(LIB-transport) $(LIB-store-common) $(LIB-store-backend) $(GTEST_MAIN) TEST_BINS += $(d)lockserver-test diff --git a/test-client/Rules.mk b/test-client/Rules.mk deleted file mode 100644 index 55bcdd026b419643c9c3565a40e2b50f0e3cd8e5..0000000000000000000000000000000000000000 --- a/test-client/Rules.mk +++ /dev/null @@ -1,9 +0,0 @@ -d := $(dir $(lastword $(MAKEFILE_LIST))) - -SRCS += $(addprefix $(d), test-client.cc) - -PROTOS += $(addprefix $(d), test-client-proto.proto) - -$(d)test-client: $(LIB-message) $(LIB-tcptransport) $(LIB-udptransport) $(o)test-client-proto.o $(o)test-client.o - -BINS += $(d)test-client diff --git a/test-client/test-client-proto.proto b/test-client/test-client-proto.proto deleted file mode 100644 index 1895637398031dbd07f12291bc59d04ef072b3d3..0000000000000000000000000000000000000000 --- a/test-client/test-client-proto.proto +++ /dev/null @@ -1,5 +0,0 @@ -syntax = "proto2"; - -message TestMessage { - required int32 status = 1; -} diff --git a/test-client/test-client.cc b/test-client/test-client.cc deleted file mode 100644 index 6dfa02ad13c9cfaaeb7852faf81e4be11ac52dbd..0000000000000000000000000000000000000000 --- a/test-client/test-client.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include "lib/message.h" -#include "lib/configuration.h" -#include "lib/udptransport.h" -#include "lib/tcptransport.h" -#include "test-client/test-client-proto.pb.h" - -#include <thread> -#include <fstream> -#include <sstream> - -class Worker : public TransportReceiver -{ -public: - Worker(const string configPath, int id) - : id(id), n(0), transport(0.0, 0.0, 0, false) - { - std::ifstream configStream(configPath, std::ifstream::in); - - fprintf(stderr, "Reading %s %d\n", configPath.c_str()); - - if (configStream.fail()) { - Debug("Unable to read configuration file: %s\n", configPath.c_str()); - } - - config = new transport::Configuration(configStream); - - transport.Register(this, *config, -1); - - transport.Timer(100, [=]() { - Debug("Scheduling SendMessage"); - this->SendMessage(); - }); - } - - ~Worker() {} - - void Run() - { - transport.Run(); - } - - void SendMessage() - { - n++; - if (n > 10) { - transport.Stop(); - return; - } - - Debug("Sending Message %d %d", id, n); - - TestMessage msg; - msg.set_status(n); - - transport.Timer(1000, [=]() { - if (!transport.SendMessageToReplica(this, 0, msg)) { - Debug("Unable to send request"); - } - - this->SendMessage(); - }); - } - - void ReceiveMessage(const TransportAddress &remote, const string &type, const string &data) - { - Debug("Received reply type: %s", type.c_str()); - } - -private: - int id, n; - UDPTransport transport; - //TCPTransport transport; - transport::Configuration *config; -}; - -void test_thread(int id) -{ - Worker *w; - std::stringstream buf; - buf << "test" << id; - - w = new Worker(buf.str(), id); - - w->Run(); -} - -int main() -{ - std::thread t1(test_thread, 1); - std::thread t2(test_thread, 2); - - t1.join(); - t2.join(); - - return 0; -} diff --git a/test-client/test1 b/test-client/test1 deleted file mode 100644 index 76c2c99a234e9878b127dd801fec0b087447b48d..0000000000000000000000000000000000000000 --- a/test-client/test1 +++ /dev/null @@ -1,4 +0,0 @@ -f 1 -replica localhost:51729 -replica localhost:51730 -replica localhost:51731 diff --git a/test-client/test2 b/test-client/test2 deleted file mode 100644 index 76c2c99a234e9878b127dd801fec0b087447b48d..0000000000000000000000000000000000000000 --- a/test-client/test2 +++ /dev/null @@ -1,4 +0,0 @@ -f 1 -replica localhost:51729 -replica localhost:51730 -replica localhost:51731