update
This commit is contained in:
137
src/shardkv/client.go
Normal file
137
src/shardkv/client.go
Normal file
@@ -0,0 +1,137 @@
|
||||
package shardkv
|
||||
|
||||
//
|
||||
// client code to talk to a sharded key/value service.
|
||||
//
|
||||
// the client first talks to the shardctrler to find out
|
||||
// the assignment of shards (keys) to groups, and then
|
||||
// talks to the group that holds the key's shard.
|
||||
//
|
||||
|
||||
import "6.824/labrpc"
|
||||
import "crypto/rand"
|
||||
import "math/big"
|
||||
import "6.824/shardctrler"
|
||||
import "time"
|
||||
|
||||
//
|
||||
// which shard is a key in?
|
||||
// please use this function,
|
||||
// and please do not change it.
|
||||
//
|
||||
func key2shard(key string) int {
|
||||
shard := 0
|
||||
if len(key) > 0 {
|
||||
shard = int(key[0])
|
||||
}
|
||||
shard %= shardctrler.NShards
|
||||
return shard
|
||||
}
|
||||
|
||||
func nrand() int64 {
|
||||
max := big.NewInt(int64(1) << 62)
|
||||
bigx, _ := rand.Int(rand.Reader, max)
|
||||
x := bigx.Int64()
|
||||
return x
|
||||
}
|
||||
|
||||
type Clerk struct {
|
||||
sm *shardctrler.Clerk
|
||||
config shardctrler.Config
|
||||
make_end func(string) *labrpc.ClientEnd
|
||||
// You will have to modify this struct.
|
||||
}
|
||||
|
||||
//
|
||||
// the tester calls MakeClerk.
|
||||
//
|
||||
// ctrlers[] is needed to call shardctrler.MakeClerk().
|
||||
//
|
||||
// make_end(servername) turns a server name from a
|
||||
// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
|
||||
// send RPCs.
|
||||
//
|
||||
func MakeClerk(ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
|
||||
ck := new(Clerk)
|
||||
ck.sm = shardctrler.MakeClerk(ctrlers)
|
||||
ck.make_end = make_end
|
||||
// You'll have to add code here.
|
||||
return ck
|
||||
}
|
||||
|
||||
//
|
||||
// fetch the current value for a key.
|
||||
// returns "" if the key does not exist.
|
||||
// keeps trying forever in the face of all other errors.
|
||||
// You will have to modify this function.
|
||||
//
|
||||
func (ck *Clerk) Get(key string) string {
|
||||
args := GetArgs{}
|
||||
args.Key = key
|
||||
|
||||
for {
|
||||
shard := key2shard(key)
|
||||
gid := ck.config.Shards[shard]
|
||||
if servers, ok := ck.config.Groups[gid]; ok {
|
||||
// try each server for the shard.
|
||||
for si := 0; si < len(servers); si++ {
|
||||
srv := ck.make_end(servers[si])
|
||||
var reply GetReply
|
||||
ok := srv.Call("ShardKV.Get", &args, &reply)
|
||||
if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
|
||||
return reply.Value
|
||||
}
|
||||
if ok && (reply.Err == ErrWrongGroup) {
|
||||
break
|
||||
}
|
||||
// ... not ok, or ErrWrongLeader
|
||||
}
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
// ask controler for the latest configuration.
|
||||
ck.config = ck.sm.Query(-1)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
//
|
||||
// shared by Put and Append.
|
||||
// You will have to modify this function.
|
||||
//
|
||||
func (ck *Clerk) PutAppend(key string, value string, op string) {
|
||||
args := PutAppendArgs{}
|
||||
args.Key = key
|
||||
args.Value = value
|
||||
args.Op = op
|
||||
|
||||
|
||||
for {
|
||||
shard := key2shard(key)
|
||||
gid := ck.config.Shards[shard]
|
||||
if servers, ok := ck.config.Groups[gid]; ok {
|
||||
for si := 0; si < len(servers); si++ {
|
||||
srv := ck.make_end(servers[si])
|
||||
var reply PutAppendReply
|
||||
ok := srv.Call("ShardKV.PutAppend", &args, &reply)
|
||||
if ok && reply.Err == OK {
|
||||
return
|
||||
}
|
||||
if ok && reply.Err == ErrWrongGroup {
|
||||
break
|
||||
}
|
||||
// ... not ok, or ErrWrongLeader
|
||||
}
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
// ask controler for the latest configuration.
|
||||
ck.config = ck.sm.Query(-1)
|
||||
}
|
||||
}
|
||||
|
||||
func (ck *Clerk) Put(key string, value string) {
|
||||
ck.PutAppend(key, value, "Put")
|
||||
}
|
||||
func (ck *Clerk) Append(key string, value string) {
|
||||
ck.PutAppend(key, value, "Append")
|
||||
}
|
||||
44
src/shardkv/common.go
Normal file
44
src/shardkv/common.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package shardkv
|
||||
|
||||
//
|
||||
// Sharded key/value server.
|
||||
// Lots of replica groups, each running op-at-a-time paxos.
|
||||
// Shardctrler decides which group serves each shard.
|
||||
// Shardctrler may change shard assignment from time to time.
|
||||
//
|
||||
// You will have to modify these definitions.
|
||||
//
|
||||
|
||||
const (
|
||||
OK = "OK"
|
||||
ErrNoKey = "ErrNoKey"
|
||||
ErrWrongGroup = "ErrWrongGroup"
|
||||
ErrWrongLeader = "ErrWrongLeader"
|
||||
)
|
||||
|
||||
type Err string
|
||||
|
||||
// Put or Append
|
||||
type PutAppendArgs struct {
|
||||
// You'll have to add definitions here.
|
||||
Key string
|
||||
Value string
|
||||
Op string // "Put" or "Append"
|
||||
// You'll have to add definitions here.
|
||||
// Field names must start with capital letters,
|
||||
// otherwise RPC will break.
|
||||
}
|
||||
|
||||
type PutAppendReply struct {
|
||||
Err Err
|
||||
}
|
||||
|
||||
type GetArgs struct {
|
||||
Key string
|
||||
// You'll have to add definitions here.
|
||||
}
|
||||
|
||||
type GetReply struct {
|
||||
Err Err
|
||||
Value string
|
||||
}
|
||||
382
src/shardkv/config.go
Normal file
382
src/shardkv/config.go
Normal file
@@ -0,0 +1,382 @@
|
||||
package shardkv
|
||||
|
||||
import "6.824/shardctrler"
|
||||
import "6.824/labrpc"
|
||||
import "testing"
|
||||
import "os"
|
||||
|
||||
// import "log"
|
||||
import crand "crypto/rand"
|
||||
import "math/big"
|
||||
import "math/rand"
|
||||
import "encoding/base64"
|
||||
import "sync"
|
||||
import "runtime"
|
||||
import "6.824/raft"
|
||||
import "strconv"
|
||||
import "fmt"
|
||||
import "time"
|
||||
|
||||
func randstring(n int) string {
|
||||
b := make([]byte, 2*n)
|
||||
crand.Read(b)
|
||||
s := base64.URLEncoding.EncodeToString(b)
|
||||
return s[0:n]
|
||||
}
|
||||
|
||||
func makeSeed() int64 {
|
||||
max := big.NewInt(int64(1) << 62)
|
||||
bigx, _ := crand.Int(crand.Reader, max)
|
||||
x := bigx.Int64()
|
||||
return x
|
||||
}
|
||||
|
||||
// Randomize server handles
|
||||
func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
|
||||
sa := make([]*labrpc.ClientEnd, len(kvh))
|
||||
copy(sa, kvh)
|
||||
for i := range sa {
|
||||
j := rand.Intn(i + 1)
|
||||
sa[i], sa[j] = sa[j], sa[i]
|
||||
}
|
||||
return sa
|
||||
}
|
||||
|
||||
type group struct {
|
||||
gid int
|
||||
servers []*ShardKV
|
||||
saved []*raft.Persister
|
||||
endnames [][]string
|
||||
mendnames [][]string
|
||||
}
|
||||
|
||||
type config struct {
|
||||
mu sync.Mutex
|
||||
t *testing.T
|
||||
net *labrpc.Network
|
||||
start time.Time // time at which make_config() was called
|
||||
|
||||
nctrlers int
|
||||
ctrlerservers []*shardctrler.ShardCtrler
|
||||
mck *shardctrler.Clerk
|
||||
|
||||
ngroups int
|
||||
n int // servers per k/v group
|
||||
groups []*group
|
||||
|
||||
clerks map[*Clerk][]string
|
||||
nextClientId int
|
||||
maxraftstate int
|
||||
}
|
||||
|
||||
func (cfg *config) checkTimeout() {
|
||||
// enforce a two minute real-time limit on each test
|
||||
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
|
||||
cfg.t.Fatal("test took longer than 120 seconds")
|
||||
}
|
||||
}
|
||||
|
||||
func (cfg *config) cleanup() {
|
||||
for gi := 0; gi < cfg.ngroups; gi++ {
|
||||
cfg.ShutdownGroup(gi)
|
||||
}
|
||||
for i := 0; i < cfg.nctrlers; i++ {
|
||||
cfg.ctrlerservers[i].Kill()
|
||||
}
|
||||
cfg.net.Cleanup()
|
||||
cfg.checkTimeout()
|
||||
}
|
||||
|
||||
// check that no server's log is too big.
|
||||
func (cfg *config) checklogs() {
|
||||
for gi := 0; gi < cfg.ngroups; gi++ {
|
||||
for i := 0; i < cfg.n; i++ {
|
||||
raft := cfg.groups[gi].saved[i].RaftStateSize()
|
||||
snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
|
||||
if cfg.maxraftstate >= 0 && raft > 8*cfg.maxraftstate {
|
||||
cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
|
||||
raft, cfg.maxraftstate)
|
||||
}
|
||||
if cfg.maxraftstate < 0 && snap > 0 {
|
||||
cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// controler server name for labrpc.
|
||||
func (cfg *config) ctrlername(i int) string {
|
||||
return "ctrler" + strconv.Itoa(i)
|
||||
}
|
||||
|
||||
// shard server name for labrpc.
|
||||
// i'th server of group gid.
|
||||
func (cfg *config) servername(gid int, i int) string {
|
||||
return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
|
||||
}
|
||||
|
||||
func (cfg *config) makeClient() *Clerk {
|
||||
cfg.mu.Lock()
|
||||
defer cfg.mu.Unlock()
|
||||
|
||||
// ClientEnds to talk to controler service.
|
||||
ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
|
||||
endnames := make([]string, cfg.n)
|
||||
for j := 0; j < cfg.nctrlers; j++ {
|
||||
endnames[j] = randstring(20)
|
||||
ends[j] = cfg.net.MakeEnd(endnames[j])
|
||||
cfg.net.Connect(endnames[j], cfg.ctrlername(j))
|
||||
cfg.net.Enable(endnames[j], true)
|
||||
}
|
||||
|
||||
ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
|
||||
name := randstring(20)
|
||||
end := cfg.net.MakeEnd(name)
|
||||
cfg.net.Connect(name, servername)
|
||||
cfg.net.Enable(name, true)
|
||||
return end
|
||||
})
|
||||
cfg.clerks[ck] = endnames
|
||||
cfg.nextClientId++
|
||||
return ck
|
||||
}
|
||||
|
||||
func (cfg *config) deleteClient(ck *Clerk) {
|
||||
cfg.mu.Lock()
|
||||
defer cfg.mu.Unlock()
|
||||
|
||||
v := cfg.clerks[ck]
|
||||
for i := 0; i < len(v); i++ {
|
||||
os.Remove(v[i])
|
||||
}
|
||||
delete(cfg.clerks, ck)
|
||||
}
|
||||
|
||||
// Shutdown i'th server of gi'th group, by isolating it
|
||||
func (cfg *config) ShutdownServer(gi int, i int) {
|
||||
cfg.mu.Lock()
|
||||
defer cfg.mu.Unlock()
|
||||
|
||||
gg := cfg.groups[gi]
|
||||
|
||||
// prevent this server from sending
|
||||
for j := 0; j < len(gg.servers); j++ {
|
||||
name := gg.endnames[i][j]
|
||||
cfg.net.Enable(name, false)
|
||||
}
|
||||
for j := 0; j < len(gg.mendnames[i]); j++ {
|
||||
name := gg.mendnames[i][j]
|
||||
cfg.net.Enable(name, false)
|
||||
}
|
||||
|
||||
// disable client connections to the server.
|
||||
// it's important to do this before creating
|
||||
// the new Persister in saved[i], to avoid
|
||||
// the possibility of the server returning a
|
||||
// positive reply to an Append but persisting
|
||||
// the result in the superseded Persister.
|
||||
cfg.net.DeleteServer(cfg.servername(gg.gid, i))
|
||||
|
||||
// a fresh persister, in case old instance
|
||||
// continues to update the Persister.
|
||||
// but copy old persister's content so that we always
|
||||
// pass Make() the last persisted state.
|
||||
if gg.saved[i] != nil {
|
||||
gg.saved[i] = gg.saved[i].Copy()
|
||||
}
|
||||
|
||||
kv := gg.servers[i]
|
||||
if kv != nil {
|
||||
cfg.mu.Unlock()
|
||||
kv.Kill()
|
||||
cfg.mu.Lock()
|
||||
gg.servers[i] = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (cfg *config) ShutdownGroup(gi int) {
|
||||
for i := 0; i < cfg.n; i++ {
|
||||
cfg.ShutdownServer(gi, i)
|
||||
}
|
||||
}
|
||||
|
||||
// start i'th server in gi'th group
|
||||
func (cfg *config) StartServer(gi int, i int) {
|
||||
cfg.mu.Lock()
|
||||
|
||||
gg := cfg.groups[gi]
|
||||
|
||||
// a fresh set of outgoing ClientEnd names
|
||||
// to talk to other servers in this group.
|
||||
gg.endnames[i] = make([]string, cfg.n)
|
||||
for j := 0; j < cfg.n; j++ {
|
||||
gg.endnames[i][j] = randstring(20)
|
||||
}
|
||||
|
||||
// and the connections to other servers in this group.
|
||||
ends := make([]*labrpc.ClientEnd, cfg.n)
|
||||
for j := 0; j < cfg.n; j++ {
|
||||
ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
|
||||
cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
|
||||
cfg.net.Enable(gg.endnames[i][j], true)
|
||||
}
|
||||
|
||||
// ends to talk to shardctrler service
|
||||
mends := make([]*labrpc.ClientEnd, cfg.nctrlers)
|
||||
gg.mendnames[i] = make([]string, cfg.nctrlers)
|
||||
for j := 0; j < cfg.nctrlers; j++ {
|
||||
gg.mendnames[i][j] = randstring(20)
|
||||
mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
|
||||
cfg.net.Connect(gg.mendnames[i][j], cfg.ctrlername(j))
|
||||
cfg.net.Enable(gg.mendnames[i][j], true)
|
||||
}
|
||||
|
||||
// a fresh persister, so old instance doesn't overwrite
|
||||
// new instance's persisted state.
|
||||
// give the fresh persister a copy of the old persister's
|
||||
// state, so that the spec is that we pass StartKVServer()
|
||||
// the last persisted state.
|
||||
if gg.saved[i] != nil {
|
||||
gg.saved[i] = gg.saved[i].Copy()
|
||||
} else {
|
||||
gg.saved[i] = raft.MakePersister()
|
||||
}
|
||||
cfg.mu.Unlock()
|
||||
|
||||
gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
|
||||
gg.gid, mends,
|
||||
func(servername string) *labrpc.ClientEnd {
|
||||
name := randstring(20)
|
||||
end := cfg.net.MakeEnd(name)
|
||||
cfg.net.Connect(name, servername)
|
||||
cfg.net.Enable(name, true)
|
||||
return end
|
||||
})
|
||||
|
||||
kvsvc := labrpc.MakeService(gg.servers[i])
|
||||
rfsvc := labrpc.MakeService(gg.servers[i].rf)
|
||||
srv := labrpc.MakeServer()
|
||||
srv.AddService(kvsvc)
|
||||
srv.AddService(rfsvc)
|
||||
cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
|
||||
}
|
||||
|
||||
func (cfg *config) StartGroup(gi int) {
|
||||
for i := 0; i < cfg.n; i++ {
|
||||
cfg.StartServer(gi, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (cfg *config) StartCtrlerserver(i int) {
|
||||
// ClientEnds to talk to other controler replicas.
|
||||
ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
|
||||
for j := 0; j < cfg.nctrlers; j++ {
|
||||
endname := randstring(20)
|
||||
ends[j] = cfg.net.MakeEnd(endname)
|
||||
cfg.net.Connect(endname, cfg.ctrlername(j))
|
||||
cfg.net.Enable(endname, true)
|
||||
}
|
||||
|
||||
p := raft.MakePersister()
|
||||
|
||||
cfg.ctrlerservers[i] = shardctrler.StartServer(ends, i, p)
|
||||
|
||||
msvc := labrpc.MakeService(cfg.ctrlerservers[i])
|
||||
rfsvc := labrpc.MakeService(cfg.ctrlerservers[i].Raft())
|
||||
srv := labrpc.MakeServer()
|
||||
srv.AddService(msvc)
|
||||
srv.AddService(rfsvc)
|
||||
cfg.net.AddServer(cfg.ctrlername(i), srv)
|
||||
}
|
||||
|
||||
func (cfg *config) shardclerk() *shardctrler.Clerk {
|
||||
// ClientEnds to talk to ctrler service.
|
||||
ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
|
||||
for j := 0; j < cfg.nctrlers; j++ {
|
||||
name := randstring(20)
|
||||
ends[j] = cfg.net.MakeEnd(name)
|
||||
cfg.net.Connect(name, cfg.ctrlername(j))
|
||||
cfg.net.Enable(name, true)
|
||||
}
|
||||
|
||||
return shardctrler.MakeClerk(ends)
|
||||
}
|
||||
|
||||
// tell the shardctrler that a group is joining.
|
||||
func (cfg *config) join(gi int) {
|
||||
cfg.joinm([]int{gi})
|
||||
}
|
||||
|
||||
func (cfg *config) joinm(gis []int) {
|
||||
m := make(map[int][]string, len(gis))
|
||||
for _, g := range gis {
|
||||
gid := cfg.groups[g].gid
|
||||
servernames := make([]string, cfg.n)
|
||||
for i := 0; i < cfg.n; i++ {
|
||||
servernames[i] = cfg.servername(gid, i)
|
||||
}
|
||||
m[gid] = servernames
|
||||
}
|
||||
cfg.mck.Join(m)
|
||||
}
|
||||
|
||||
// tell the shardctrler that a group is leaving.
|
||||
func (cfg *config) leave(gi int) {
|
||||
cfg.leavem([]int{gi})
|
||||
}
|
||||
|
||||
func (cfg *config) leavem(gis []int) {
|
||||
gids := make([]int, 0, len(gis))
|
||||
for _, g := range gis {
|
||||
gids = append(gids, cfg.groups[g].gid)
|
||||
}
|
||||
cfg.mck.Leave(gids)
|
||||
}
|
||||
|
||||
var ncpu_once sync.Once
|
||||
|
||||
func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
|
||||
ncpu_once.Do(func() {
|
||||
if runtime.NumCPU() < 2 {
|
||||
fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
|
||||
}
|
||||
rand.Seed(makeSeed())
|
||||
})
|
||||
runtime.GOMAXPROCS(4)
|
||||
cfg := &config{}
|
||||
cfg.t = t
|
||||
cfg.maxraftstate = maxraftstate
|
||||
cfg.net = labrpc.MakeNetwork()
|
||||
cfg.start = time.Now()
|
||||
|
||||
// controler
|
||||
cfg.nctrlers = 3
|
||||
cfg.ctrlerservers = make([]*shardctrler.ShardCtrler, cfg.nctrlers)
|
||||
for i := 0; i < cfg.nctrlers; i++ {
|
||||
cfg.StartCtrlerserver(i)
|
||||
}
|
||||
cfg.mck = cfg.shardclerk()
|
||||
|
||||
cfg.ngroups = 3
|
||||
cfg.groups = make([]*group, cfg.ngroups)
|
||||
cfg.n = n
|
||||
for gi := 0; gi < cfg.ngroups; gi++ {
|
||||
gg := &group{}
|
||||
cfg.groups[gi] = gg
|
||||
gg.gid = 100 + gi
|
||||
gg.servers = make([]*ShardKV, cfg.n)
|
||||
gg.saved = make([]*raft.Persister, cfg.n)
|
||||
gg.endnames = make([][]string, cfg.n)
|
||||
gg.mendnames = make([][]string, cfg.nctrlers)
|
||||
for i := 0; i < cfg.n; i++ {
|
||||
cfg.StartServer(gi, i)
|
||||
}
|
||||
}
|
||||
|
||||
cfg.clerks = make(map[*Clerk][]string)
|
||||
cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
|
||||
|
||||
cfg.net.Reliable(!unreliable)
|
||||
|
||||
return cfg
|
||||
}
|
||||
101
src/shardkv/server.go
Normal file
101
src/shardkv/server.go
Normal file
@@ -0,0 +1,101 @@
|
||||
package shardkv
|
||||
|
||||
|
||||
import "6.824/labrpc"
|
||||
import "6.824/raft"
|
||||
import "sync"
|
||||
import "6.824/labgob"
|
||||
|
||||
|
||||
|
||||
type Op struct {
|
||||
// Your definitions here.
|
||||
// Field names must start with capital letters,
|
||||
// otherwise RPC will break.
|
||||
}
|
||||
|
||||
type ShardKV struct {
|
||||
mu sync.Mutex
|
||||
me int
|
||||
rf *raft.Raft
|
||||
applyCh chan raft.ApplyMsg
|
||||
make_end func(string) *labrpc.ClientEnd
|
||||
gid int
|
||||
ctrlers []*labrpc.ClientEnd
|
||||
maxraftstate int // snapshot if log grows this big
|
||||
|
||||
// Your definitions here.
|
||||
}
|
||||
|
||||
|
||||
func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
|
||||
// Your code here.
|
||||
}
|
||||
|
||||
func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
|
||||
// Your code here.
|
||||
}
|
||||
|
||||
//
|
||||
// the tester calls Kill() when a ShardKV instance won't
|
||||
// be needed again. you are not required to do anything
|
||||
// in Kill(), but it might be convenient to (for example)
|
||||
// turn off debug output from this instance.
|
||||
//
|
||||
func (kv *ShardKV) Kill() {
|
||||
kv.rf.Kill()
|
||||
// Your code here, if desired.
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// servers[] contains the ports of the servers in this group.
|
||||
//
|
||||
// me is the index of the current server in servers[].
|
||||
//
|
||||
// the k/v server should store snapshots through the underlying Raft
|
||||
// implementation, which should call persister.SaveStateAndSnapshot() to
|
||||
// atomically save the Raft state along with the snapshot.
|
||||
//
|
||||
// the k/v server should snapshot when Raft's saved state exceeds
|
||||
// maxraftstate bytes, in order to allow Raft to garbage-collect its
|
||||
// log. if maxraftstate is -1, you don't need to snapshot.
|
||||
//
|
||||
// gid is this group's GID, for interacting with the shardctrler.
|
||||
//
|
||||
// pass ctrlers[] to shardctrler.MakeClerk() so you can send
|
||||
// RPCs to the shardctrler.
|
||||
//
|
||||
// make_end(servername) turns a server name from a
|
||||
// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
|
||||
// send RPCs. You'll need this to send RPCs to other groups.
|
||||
//
|
||||
// look at client.go for examples of how to use ctrlers[]
|
||||
// and make_end() to send RPCs to the group owning a specific shard.
|
||||
//
|
||||
// StartServer() must return quickly, so it should start goroutines
|
||||
// for any long-running work.
|
||||
//
|
||||
func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV {
|
||||
// call labgob.Register on structures you want
|
||||
// Go's RPC library to marshall/unmarshall.
|
||||
labgob.Register(Op{})
|
||||
|
||||
kv := new(ShardKV)
|
||||
kv.me = me
|
||||
kv.maxraftstate = maxraftstate
|
||||
kv.make_end = make_end
|
||||
kv.gid = gid
|
||||
kv.ctrlers = ctrlers
|
||||
|
||||
// Your initialization code here.
|
||||
|
||||
// Use something like this to talk to the shardctrler:
|
||||
// kv.mck = shardctrler.MakeClerk(kv.ctrlers)
|
||||
|
||||
kv.applyCh = make(chan raft.ApplyMsg)
|
||||
kv.rf = raft.Make(servers, me, persister, kv.applyCh)
|
||||
|
||||
|
||||
return kv
|
||||
}
|
||||
948
src/shardkv/test_test.go
Normal file
948
src/shardkv/test_test.go
Normal file
@@ -0,0 +1,948 @@
|
||||
package shardkv
|
||||
|
||||
import "6.824/porcupine"
|
||||
import "6.824/models"
|
||||
import "testing"
|
||||
import "strconv"
|
||||
import "time"
|
||||
import "fmt"
|
||||
import "sync/atomic"
|
||||
import "sync"
|
||||
import "math/rand"
|
||||
import "io/ioutil"
|
||||
|
||||
const linearizabilityCheckTimeout = 1 * time.Second
|
||||
|
||||
func check(t *testing.T, ck *Clerk, key string, value string) {
|
||||
v := ck.Get(key)
|
||||
if v != value {
|
||||
t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// test static 2-way sharding, without shard movement.
|
||||
//
|
||||
func TestStaticShards(t *testing.T) {
|
||||
fmt.Printf("Test: static shards ...\n")
|
||||
|
||||
cfg := make_config(t, 3, false, -1)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
cfg.join(1)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(20)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
// make sure that the data really is sharded by
|
||||
// shutting down one shard and checking that some
|
||||
// Get()s don't succeed.
|
||||
cfg.ShutdownGroup(1)
|
||||
cfg.checklogs() // forbid snapshots
|
||||
|
||||
ch := make(chan string)
|
||||
for xi := 0; xi < n; xi++ {
|
||||
ck1 := cfg.makeClient() // only one call allowed per client
|
||||
go func(i int) {
|
||||
v := ck1.Get(ka[i])
|
||||
if v != va[i] {
|
||||
ch <- fmt.Sprintf("Get(%v): expected:\n%v\nreceived:\n%v", ka[i], va[i], v)
|
||||
} else {
|
||||
ch <- ""
|
||||
}
|
||||
}(xi)
|
||||
}
|
||||
|
||||
// wait a bit, only about half the Gets should succeed.
|
||||
ndone := 0
|
||||
done := false
|
||||
for done == false {
|
||||
select {
|
||||
case err := <-ch:
|
||||
if err != "" {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ndone += 1
|
||||
case <-time.After(time.Second * 2):
|
||||
done = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ndone != 5 {
|
||||
t.Fatalf("expected 5 completions with one shard dead; got %v\n", ndone)
|
||||
}
|
||||
|
||||
// bring the crashed shard/group back to life.
|
||||
cfg.StartGroup(1)
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestJoinLeave(t *testing.T) {
|
||||
fmt.Printf("Test: join then leave ...\n")
|
||||
|
||||
cfg := make_config(t, 3, false, -1)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(5)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
cfg.join(1)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(5)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
cfg.leave(0)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(5)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
// allow time for shards to transfer.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
cfg.checklogs()
|
||||
cfg.ShutdownGroup(0)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestSnapshot(t *testing.T) {
|
||||
fmt.Printf("Test: snapshots, join, and leave ...\n")
|
||||
|
||||
cfg := make_config(t, 3, false, 1000)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 30
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(20)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
cfg.join(1)
|
||||
cfg.join(2)
|
||||
cfg.leave(0)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(20)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
cfg.leave(1)
|
||||
cfg.join(0)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(20)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
cfg.checklogs()
|
||||
|
||||
cfg.ShutdownGroup(0)
|
||||
cfg.ShutdownGroup(1)
|
||||
cfg.ShutdownGroup(2)
|
||||
|
||||
cfg.StartGroup(0)
|
||||
cfg.StartGroup(1)
|
||||
cfg.StartGroup(2)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestMissChange(t *testing.T) {
|
||||
fmt.Printf("Test: servers miss configuration changes...\n")
|
||||
|
||||
cfg := make_config(t, 3, false, 1000)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(20)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
cfg.join(1)
|
||||
|
||||
cfg.ShutdownServer(0, 0)
|
||||
cfg.ShutdownServer(1, 0)
|
||||
cfg.ShutdownServer(2, 0)
|
||||
|
||||
cfg.join(2)
|
||||
cfg.leave(1)
|
||||
cfg.leave(0)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(20)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
cfg.join(1)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(20)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
cfg.StartServer(0, 0)
|
||||
cfg.StartServer(1, 0)
|
||||
cfg.StartServer(2, 0)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(20)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
cfg.ShutdownServer(0, 1)
|
||||
cfg.ShutdownServer(1, 1)
|
||||
cfg.ShutdownServer(2, 1)
|
||||
|
||||
cfg.join(0)
|
||||
cfg.leave(2)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(20)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
cfg.StartServer(0, 1)
|
||||
cfg.StartServer(1, 1)
|
||||
cfg.StartServer(2, 1)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestConcurrent1(t *testing.T) {
|
||||
fmt.Printf("Test: concurrent puts and configuration changes...\n")
|
||||
|
||||
cfg := make_config(t, 3, false, 100)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(5)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
|
||||
var done int32
|
||||
ch := make(chan bool)
|
||||
|
||||
ff := func(i int) {
|
||||
defer func() { ch <- true }()
|
||||
ck1 := cfg.makeClient()
|
||||
for atomic.LoadInt32(&done) == 0 {
|
||||
x := randstring(5)
|
||||
ck1.Append(ka[i], x)
|
||||
va[i] += x
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
go ff(i)
|
||||
}
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
cfg.join(1)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.join(2)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.leave(0)
|
||||
|
||||
cfg.ShutdownGroup(0)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
cfg.ShutdownGroup(1)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
cfg.ShutdownGroup(2)
|
||||
|
||||
cfg.leave(2)
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
cfg.StartGroup(0)
|
||||
cfg.StartGroup(1)
|
||||
cfg.StartGroup(2)
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
cfg.join(0)
|
||||
cfg.leave(1)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.join(1)
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
atomic.StoreInt32(&done, 1)
|
||||
for i := 0; i < n; i++ {
|
||||
<-ch
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
//
|
||||
// this tests the various sources from which a re-starting
|
||||
// group might need to fetch shard contents.
|
||||
//
|
||||
func TestConcurrent2(t *testing.T) {
|
||||
fmt.Printf("Test: more concurrent puts and configuration changes...\n")
|
||||
|
||||
cfg := make_config(t, 3, false, -1)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(1)
|
||||
cfg.join(0)
|
||||
cfg.join(2)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(1)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
|
||||
var done int32
|
||||
ch := make(chan bool)
|
||||
|
||||
ff := func(i int, ck1 *Clerk) {
|
||||
defer func() { ch <- true }()
|
||||
for atomic.LoadInt32(&done) == 0 {
|
||||
x := randstring(1)
|
||||
ck1.Append(ka[i], x)
|
||||
va[i] += x
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
ck1 := cfg.makeClient()
|
||||
go ff(i, ck1)
|
||||
}
|
||||
|
||||
cfg.leave(0)
|
||||
cfg.leave(2)
|
||||
time.Sleep(3000 * time.Millisecond)
|
||||
cfg.join(0)
|
||||
cfg.join(2)
|
||||
cfg.leave(1)
|
||||
time.Sleep(3000 * time.Millisecond)
|
||||
cfg.join(1)
|
||||
cfg.leave(0)
|
||||
cfg.leave(2)
|
||||
time.Sleep(3000 * time.Millisecond)
|
||||
|
||||
cfg.ShutdownGroup(1)
|
||||
cfg.ShutdownGroup(2)
|
||||
time.Sleep(1000 * time.Millisecond)
|
||||
cfg.StartGroup(1)
|
||||
cfg.StartGroup(2)
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
atomic.StoreInt32(&done, 1)
|
||||
for i := 0; i < n; i++ {
|
||||
<-ch
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestUnreliable1(t *testing.T) {
|
||||
fmt.Printf("Test: unreliable 1...\n")
|
||||
|
||||
cfg := make_config(t, 3, true, 100)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(5)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
|
||||
cfg.join(1)
|
||||
cfg.join(2)
|
||||
cfg.leave(0)
|
||||
|
||||
for ii := 0; ii < n*2; ii++ {
|
||||
i := ii % n
|
||||
check(t, ck, ka[i], va[i])
|
||||
x := randstring(5)
|
||||
ck.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
|
||||
cfg.join(0)
|
||||
cfg.leave(1)
|
||||
|
||||
for ii := 0; ii < n*2; ii++ {
|
||||
i := ii % n
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestUnreliable2(t *testing.T) {
|
||||
fmt.Printf("Test: unreliable 2...\n")
|
||||
|
||||
cfg := make_config(t, 3, true, 100)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(5)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
|
||||
var done int32
|
||||
ch := make(chan bool)
|
||||
|
||||
ff := func(i int) {
|
||||
defer func() { ch <- true }()
|
||||
ck1 := cfg.makeClient()
|
||||
for atomic.LoadInt32(&done) == 0 {
|
||||
x := randstring(5)
|
||||
ck1.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
go ff(i)
|
||||
}
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
cfg.join(1)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.join(2)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.leave(0)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.leave(1)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.join(1)
|
||||
cfg.join(0)
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
atomic.StoreInt32(&done, 1)
|
||||
cfg.net.Reliable(true)
|
||||
for i := 0; i < n; i++ {
|
||||
<-ch
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestUnreliable3(t *testing.T) {
|
||||
fmt.Printf("Test: unreliable 3...\n")
|
||||
|
||||
cfg := make_config(t, 3, true, 100)
|
||||
defer cfg.cleanup()
|
||||
|
||||
begin := time.Now()
|
||||
var operations []porcupine.Operation
|
||||
var opMu sync.Mutex
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = randstring(5)
|
||||
start := int64(time.Since(begin))
|
||||
ck.Put(ka[i], va[i])
|
||||
end := int64(time.Since(begin))
|
||||
inp := models.KvInput{Op: 1, Key: ka[i], Value: va[i]}
|
||||
var out models.KvOutput
|
||||
op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: 0}
|
||||
operations = append(operations, op)
|
||||
}
|
||||
|
||||
var done int32
|
||||
ch := make(chan bool)
|
||||
|
||||
ff := func(i int) {
|
||||
defer func() { ch <- true }()
|
||||
ck1 := cfg.makeClient()
|
||||
for atomic.LoadInt32(&done) == 0 {
|
||||
ki := rand.Int() % n
|
||||
nv := randstring(5)
|
||||
var inp models.KvInput
|
||||
var out models.KvOutput
|
||||
start := int64(time.Since(begin))
|
||||
if (rand.Int() % 1000) < 500 {
|
||||
ck1.Append(ka[ki], nv)
|
||||
inp = models.KvInput{Op: 2, Key: ka[ki], Value: nv}
|
||||
} else if (rand.Int() % 1000) < 100 {
|
||||
ck1.Put(ka[ki], nv)
|
||||
inp = models.KvInput{Op: 1, Key: ka[ki], Value: nv}
|
||||
} else {
|
||||
v := ck1.Get(ka[ki])
|
||||
inp = models.KvInput{Op: 0, Key: ka[ki]}
|
||||
out = models.KvOutput{Value: v}
|
||||
}
|
||||
end := int64(time.Since(begin))
|
||||
op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: i}
|
||||
opMu.Lock()
|
||||
operations = append(operations, op)
|
||||
opMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
go ff(i)
|
||||
}
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
cfg.join(1)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.join(2)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.leave(0)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.leave(1)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
cfg.join(1)
|
||||
cfg.join(0)
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
atomic.StoreInt32(&done, 1)
|
||||
cfg.net.Reliable(true)
|
||||
for i := 0; i < n; i++ {
|
||||
<-ch
|
||||
}
|
||||
|
||||
res, info := porcupine.CheckOperationsVerbose(models.KvModel, operations, linearizabilityCheckTimeout)
|
||||
if res == porcupine.Illegal {
|
||||
file, err := ioutil.TempFile("", "*.html")
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to create temp file for visualization")
|
||||
} else {
|
||||
err = porcupine.Visualize(models.KvModel, info, file)
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
|
||||
} else {
|
||||
fmt.Printf("info: wrote history visualization to %s\n", file.Name())
|
||||
}
|
||||
}
|
||||
t.Fatal("history is not linearizable")
|
||||
} else if res == porcupine.Unknown {
|
||||
fmt.Println("info: linearizability check timed out, assuming history is ok")
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
//
|
||||
// optional test to see whether servers are deleting
|
||||
// shards for which they are no longer responsible.
|
||||
//
|
||||
func TestChallenge1Delete(t *testing.T) {
|
||||
fmt.Printf("Test: shard deletion (challenge 1) ...\n")
|
||||
|
||||
// "1" means force snapshot after every log entry.
|
||||
cfg := make_config(t, 3, false, 1)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
// 30,000 bytes of total values.
|
||||
n := 30
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i)
|
||||
va[i] = randstring(1000)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
for i := 0; i < 3; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
for iters := 0; iters < 2; iters++ {
|
||||
cfg.join(1)
|
||||
cfg.leave(0)
|
||||
cfg.join(2)
|
||||
time.Sleep(3 * time.Second)
|
||||
for i := 0; i < 3; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
cfg.leave(1)
|
||||
cfg.join(0)
|
||||
cfg.leave(2)
|
||||
time.Sleep(3 * time.Second)
|
||||
for i := 0; i < 3; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
}
|
||||
|
||||
cfg.join(1)
|
||||
cfg.join(2)
|
||||
time.Sleep(1 * time.Second)
|
||||
for i := 0; i < 3; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
for i := 0; i < 3; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
for i := 0; i < 3; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
total := 0
|
||||
for gi := 0; gi < cfg.ngroups; gi++ {
|
||||
for i := 0; i < cfg.n; i++ {
|
||||
raft := cfg.groups[gi].saved[i].RaftStateSize()
|
||||
snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
|
||||
total += raft + snap
|
||||
}
|
||||
}
|
||||
|
||||
// 27 keys should be stored once.
|
||||
// 3 keys should also be stored in client dup tables.
|
||||
// everything on 3 replicas.
|
||||
// plus slop.
|
||||
expected := 3 * (((n - 3) * 1000) + 2*3*1000 + 6000)
|
||||
if total > expected {
|
||||
t.Fatalf("snapshot + persisted Raft state are too big: %v > %v\n", total, expected)
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
func TestChallenge1Concurrent(t *testing.T) {
|
||||
fmt.Printf("Test: concurrent configuration change and restart (challenge 1)...\n")
|
||||
|
||||
cfg := make_config(t, 3, false, 300)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
cfg.join(0)
|
||||
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i)
|
||||
va[i] = randstring(1)
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
|
||||
var done int32
|
||||
ch := make(chan bool)
|
||||
|
||||
ff := func(i int, ck1 *Clerk) {
|
||||
defer func() { ch <- true }()
|
||||
for atomic.LoadInt32(&done) == 0 {
|
||||
x := randstring(1)
|
||||
ck1.Append(ka[i], x)
|
||||
va[i] += x
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
ck1 := cfg.makeClient()
|
||||
go ff(i, ck1)
|
||||
}
|
||||
|
||||
t0 := time.Now()
|
||||
for time.Since(t0) < 12*time.Second {
|
||||
cfg.join(2)
|
||||
cfg.join(1)
|
||||
time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
|
||||
cfg.ShutdownGroup(0)
|
||||
cfg.ShutdownGroup(1)
|
||||
cfg.ShutdownGroup(2)
|
||||
cfg.StartGroup(0)
|
||||
cfg.StartGroup(1)
|
||||
cfg.StartGroup(2)
|
||||
|
||||
time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
|
||||
cfg.leave(1)
|
||||
cfg.leave(2)
|
||||
time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
|
||||
}
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
atomic.StoreInt32(&done, 1)
|
||||
for i := 0; i < n; i++ {
|
||||
<-ch
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
check(t, ck, ka[i], va[i])
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
//
|
||||
// optional test to see whether servers can handle
|
||||
// shards that are not affected by a config change
|
||||
// while the config change is underway
|
||||
//
|
||||
func TestChallenge2Unaffected(t *testing.T) {
|
||||
fmt.Printf("Test: unaffected shard access (challenge 2) ...\n")
|
||||
|
||||
cfg := make_config(t, 3, true, 100)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
// JOIN 100
|
||||
cfg.join(0)
|
||||
|
||||
// Do a bunch of puts to keys in all shards
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = "100"
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
|
||||
// JOIN 101
|
||||
cfg.join(1)
|
||||
|
||||
// QUERY to find shards now owned by 101
|
||||
c := cfg.mck.Query(-1)
|
||||
owned := make(map[int]bool, n)
|
||||
for s, gid := range c.Shards {
|
||||
owned[s] = gid == cfg.groups[1].gid
|
||||
}
|
||||
|
||||
// Wait for migration to new config to complete, and for clients to
|
||||
// start using this updated config. Gets to any key k such that
|
||||
// owned[shard(k)] == true should now be served by group 101.
|
||||
<-time.After(1 * time.Second)
|
||||
for i := 0; i < n; i++ {
|
||||
if owned[i] {
|
||||
va[i] = "101"
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
}
|
||||
|
||||
// KILL 100
|
||||
cfg.ShutdownGroup(0)
|
||||
|
||||
// LEAVE 100
|
||||
// 101 doesn't get a chance to migrate things previously owned by 100
|
||||
cfg.leave(0)
|
||||
|
||||
// Wait to make sure clients see new config
|
||||
<-time.After(1 * time.Second)
|
||||
|
||||
// And finally: check that gets/puts for 101-owned keys still complete
|
||||
for i := 0; i < n; i++ {
|
||||
shard := int(ka[i][0]) % 10
|
||||
if owned[shard] {
|
||||
check(t, ck, ka[i], va[i])
|
||||
ck.Put(ka[i], va[i]+"-1")
|
||||
check(t, ck, ka[i], va[i]+"-1")
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
|
||||
//
|
||||
// optional test to see whether servers can handle operations on shards that
|
||||
// have been received as a part of a config migration when the entire migration
|
||||
// has not yet completed.
|
||||
//
|
||||
func TestChallenge2Partial(t *testing.T) {
|
||||
fmt.Printf("Test: partial migration shard access (challenge 2) ...\n")
|
||||
|
||||
cfg := make_config(t, 3, true, 100)
|
||||
defer cfg.cleanup()
|
||||
|
||||
ck := cfg.makeClient()
|
||||
|
||||
// JOIN 100 + 101 + 102
|
||||
cfg.joinm([]int{0, 1, 2})
|
||||
|
||||
// Give the implementation some time to reconfigure
|
||||
<-time.After(1 * time.Second)
|
||||
|
||||
// Do a bunch of puts to keys in all shards
|
||||
n := 10
|
||||
ka := make([]string, n)
|
||||
va := make([]string, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ka[i] = strconv.Itoa(i) // ensure multiple shards
|
||||
va[i] = "100"
|
||||
ck.Put(ka[i], va[i])
|
||||
}
|
||||
|
||||
// QUERY to find shards owned by 102
|
||||
c := cfg.mck.Query(-1)
|
||||
owned := make(map[int]bool, n)
|
||||
for s, gid := range c.Shards {
|
||||
owned[s] = gid == cfg.groups[2].gid
|
||||
}
|
||||
|
||||
// KILL 100
|
||||
cfg.ShutdownGroup(0)
|
||||
|
||||
// LEAVE 100 + 102
|
||||
// 101 can get old shards from 102, but not from 100. 101 should start
|
||||
// serving shards that used to belong to 102 as soon as possible
|
||||
cfg.leavem([]int{0, 2})
|
||||
|
||||
// Give the implementation some time to start reconfiguration
|
||||
// And to migrate 102 -> 101
|
||||
<-time.After(1 * time.Second)
|
||||
|
||||
// And finally: check that gets/puts for 101-owned keys now complete
|
||||
for i := 0; i < n; i++ {
|
||||
shard := key2shard(ka[i])
|
||||
if owned[shard] {
|
||||
check(t, ck, ka[i], va[i])
|
||||
ck.Put(ka[i], va[i]+"-2")
|
||||
check(t, ck, ka[i], va[i]+"-2")
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf(" ... Passed\n")
|
||||
}
|
||||
Reference in New Issue
Block a user