1. 程式人生 > >clique共識機制流程及#17620 bug修復理解

clique共識機制流程及#17620 bug修復理解

1 clique共識機制的特性

clique是基於授權的共識機制(POA)在以太坊中的實現。計劃在Ropsten攻擊以後,用來支援以太坊私測試鏈testnet(也可以自己搭建聯盟鏈或者私有鏈)。clique機制的特性有:

  • 不需挖礦,由預先制定好的節點輪流出塊
  • 節點管理,可通過選舉將新節點新增或剔除
  • 出塊週期固定

2 clique核心原始碼解讀

使用的版本是最新的go-ethereumc 1.8.7。lique的原始碼在go-ethereum/consensus/clique目錄下,包括api.go、clique.go和snapshot.go。api.go中主要是rpc呼叫方法,clique.go中是clique共識演算法的核心實現,snapshot.go中是實現了區塊快照,起二級快取的作用。下面通過閱讀原始碼來分析clique共識機制是如何實現它的特性。

相關結構

type Clique struct {
    config *params.CliqueConfig // 共識引擎配置引數,見下方CliqueConfig原始碼介紹
    db     ethdb.Database       // 資料庫,用來儲存以及獲取快照檢查點
 
    recents    *lru.ARCCache // 最近區塊的快照,用來加速快照重組
    signatures *lru.ARCCache // 最近區塊的簽名,用來加速挖礦
 
    proposals map[common.Address]bool // 目前我們正在推動的提案清單,存的是地址和布林值的鍵值對對映
 
    signer common.Address // 簽名者的以太坊地址
    signFn SignerFn       // 簽名方法,用來授權雜湊
    lock   sync.RWMutex   // 鎖,保護簽名欄位
}

// CliqueConfig是POA挖礦的共識引擎的配置欄位。
type CliqueConfig struct {
    Period uint64 `json:"period"` // 在區塊之間執行的秒數(可以理解為距離上一塊出塊後的流逝時間秒數)
    Epoch  uint64 `json:"epoch"`  // Epoch['iːpɒk]長度,重置投票和檢查點
}

// Snapshot物件是在給定點的一個認證投票的狀態
type Snapshot struct {
    config   *params.CliqueConfig // 配置引數
    sigcache *lru.ARCCache        // 簽名快取,最近的區塊簽名加速恢復。
 
    Number  uint64                      `json:"number"`  // 快照建立的區塊號
    Hash    common.Hash                 `json:"hash"`    // 快照建立的區塊雜湊
    Signers map[common.Address]struct{} `json:"signers"` // 當下認證簽名者的集合
    Recents map[uint64]common.Address   `json:"recents"` // 最近簽名區塊地址的集合
    Votes   []*Vote                     `json:"votes"`   // 按時間順序排列的投票名單。
    Tally   map[common.Address]Tally    `json:"tally"`   // 當前的投票結果,避免重新計算。

clique的Seal方法

//Seal方法實現了共識引擎,利用本地簽名認證來打包新區塊  
func (c *Clique) Seal(chain consensus.ChainReader, block *types.Block, results chan<- *types.Block, stop <-chan struct{}) error {  
    header := block.Header()  
  
    // genesis區塊不需要打包  
    number := header.Number.Uint64()  
    if number == 0 {  
        return errUnknownBlock  
    }  
    //當區塊週期為0時,禁止打包交易為空的區塊  
    if c.config.Period == 0 && len(block.Transactions()) == 0 {  
        log.Info("Sealing paused, waiting for transactions")  
        return nil  
    }  
    // 在整個打包過程中,不要持有signer欄位  
    c.lock.RLock()  
    signer, signFn := c.signer, c.signFn  
    c.lock.RUnlock()  
  
    // 使用snapshot方法獲取快照  
    snap, err := c.snapshot(chain, number-1, header.ParentHash, nil)  
    if err != nil {  
        return err  
    }  
          
        //利用快照檢驗簽名者是否授權  
    if _, authorized := snap.Signers[signer]; !authorized {  
        return errUnauthorizedSigner  
    }  
    // 如果我們最近剛簽名過區塊,就等待下一次簽名  
    for seen, recent := range snap.Recents {  
        if recent == signer {  
            // Signer當前簽名者在【最近簽名者】中,如果當前區塊沒有剔除他的話只能繼續等待  
            if limit := uint64(len(snap.Signers)/2 + 1); number < limit || seen > number-limit {  
                log.Info("Signed recently, must wait for others")  
                return nil  
            }  
        }  
    }  
    // 通過以上校驗,到了這裡說明協議已經允許我們來簽名這個區塊,等待此工作完成  
    delay := time.Unix(header.Time.Int64(), 0).Sub(time.Now()) // nolint: gosimple  
    if header.Difficulty.Cmp(diffNoTurn) == 0 {  
        // It's not our turn explicitly to sign, delay it a bit  
        wiggle := time.Duration(len(snap.Signers)/2+1) * wiggleTime  
        delay += time.Duration(rand.Int63n(int64(wiggle)))  
  
        log.Trace("Out-of-turn signing requested", "wiggle", common.PrettyDuration(wiggle))  
    }  
    // 進行簽名  
    sighash, err := signFn(accounts.Account{Address: signer}, sigHash(header).Bytes())  
    if err != nil {  
        return err  
    }  
    copy(header.Extra[len(header.Extra)-extraSeal:], sighash)  
    // 等待簽名結束或者超時  
    log.Trace("Waiting for slot to sign and propagate", "delay", common.PrettyDuration(delay))  
    go func() {  
        select {  
        case <-stop:  
            return  
        case <-time.After(delay):  
        }  
  
        select {  
        //將打包好的區塊傳送到results通道
        case results <- block.WithSeal(header):  
        default:  
            log.Warn("Sealing result is not read by miner", "sealhash", c.SealHash(header))  
        }  
    }()  
  
    return nil  
}  

seal函式首先檢測當前是否有打包區塊的資格,首先從獲取區塊快照snap,然後利用snap獲取當前節點是否有簽名資格;然後再判斷當前區塊是否最近剛簽名過區塊,如果簽名過,則等待下一輪。這裡的一輪是指所有簽名者都依次簽名完畢算一輪。在這裡先請記住用來獲取快照的snapshot函式。

for seen, recent := range snap.Recents {
		if recent == signer {
			// Signer is among recents, only wait if the current block doesn't shift it out
			if limit := uint64(len(snap.Signers)/2 + 1); number < limit || seen > number-limit {
				log.Info("Signed recently, must wait for others")
				return nil
			}
		}
	}

這一段,如果當前簽名者在最近簽名集合中,則在接下來的len(snap.Sinaers)/2+1個區塊中是需要等待不能再簽名。這樣保證了出塊機會均等,防止惡意攻擊者連續出塊。

生成新區快時,礦工會進行延時,對於輪到出塊的高優先順序礦工,出塊時間是:

header.Time = new(big.Int).Add(parent.Time, new(big.Int).SetUint64(c.config.Period))

這個是在clique.go中的Prepare方法中設定的。

對於普通礦工,其出塊時間需要新增一個隨機延時,延時範圍是:

time.Duration(len(snap.Signers)/2+1) * wiggleTime

這裡wiggleTime設定的是500ms。

clique的snapshot方法

// snapshot獲取在給定時間點的授權快照
func (c *Clique) snapshot(chain consensus.ChainReader, number uint64, hash common.Hash, parents []*types.Header) (*Snapshot, error) {
	// Search for a snapshot in memory or on disk for checkpoints
	var (
		headers []*types.Header
		snap    *Snapshot
	)
	for snap == nil {
		// 如果找到一個記憶體裡的快照,使用它
		if s, ok := c.recents.Get(hash); ok {
			snap = s.(*Snapshot)
			break
		}
		// 如果在磁碟上找到一個快照,使用它
		if number%checkpointInterval == 0 {
			if s, err := loadSnapshot(c.config, c.signatures, c.db, hash); err == nil {
				log.Trace("Loaded voting snapshot from disk", "number", number, "hash", hash)
				snap = s
				break
			}
		}
		// 如果是創世區塊,或者在檢查點並且沒有父區塊,則建立快照
		if number == 0 || (number%c.config.Epoch == 0 && chain.GetHeaderByNumber(number-1) == nil) {
			checkpoint := chain.GetHeaderByNumber(number)
			if checkpoint != nil {
				hash := checkpoint.Hash()

				signers := make([]common.Address, (len(checkpoint.Extra)-extraVanity-extraSeal)/common.AddressLength)
				for i := 0; i < len(signers); i++ {
					copy(signers[i][:], checkpoint.Extra[extraVanity+i*common.AddressLength:])
				}
				snap = newSnapshot(c.config, c.signatures, number, hash, signers)
				if err := snap.store(c.db); err != nil {
					return nil, err
				}
				log.Info("Stored checkpoint snapshot to disk", "number", number, "hash", hash)
				break
			}
		}
		// 沒有針對這個區塊頭的快照,則收集區塊頭並向後移動
		var header *types.Header
		if len(parents) > 0 {
			// 如果有制定的父區塊,則挑揀出來
			header = parents[len(parents)-1]
			if header.Hash() != hash || header.Number.Uint64() != number {
				return nil, consensus.ErrUnknownAncestor
			}
			parents = parents[:len(parents)-1]
		} else {
			// 如果沒有制定服區塊,則從資料庫中獲取
			header = chain.GetHeader(hash, number)
			if header == nil {
				return nil, consensus.ErrUnknownAncestor
			}
		}
		headers = append(headers, header)
		number, hash = number-1, header.ParentHash
	}
	// 找到了先前的快照,那麼將所有pending的區塊頭都放在它的上面
	for i := 0; i < len(headers)/2; i++ {
		headers[i], headers[len(headers)-1-i] = headers[len(headers)-1-i], headers[i]
	}
	snap, err := snap.apply(headers)//通過區塊頭生成一個新的snapshot物件
	if err != nil {
		return nil, err
	}
	c.recents.Add(snap.Hash, snap)//將當前快照區塊的hash存到recents中

	// 如果我們生成了一個新的檢查點快照,儲存到磁碟上
	if snap.Number%checkpointInterval == 0 && len(headers) > 0 {
		if err = snap.store(c.db); err != nil {
			return nil, err
		}
		log.Trace("Stored voting snapshot to disk", "number", snap.Number, "hash", snap.Hash)
	}
	return snap, err
}

當有新區塊頭到來時,則會使用snap.apply方法為這個區塊頭建立一個snapshot物件。

apply方法


//apply在原來快照的基礎上,採用給定區塊頭建立一個新的授權快照
func (s *Snapshot) apply(headers []*types.Header) (*Snapshot, error) {
	//區塊頭為空,直接返回
	if len(headers) == 0 {
		return s, nil
	}
	// 檢查區塊數
	for i := 0; i < len(headers)-1; i++ {
		if headers[i+1].Number.Uint64() != headers[i].Number.Uint64()+1 {
			return nil, errInvalidVotingChain
		}
	}
	if headers[0].Number.Uint64() != s.Number+1 {
		return nil, errInvalidVotingChain
	}
	//複製一個新的快照
	snap := s.copy()
    
    //迭代區塊頭
	for _, header := range headers {
		// Remove any votes on checkpoint blocks
		number := header.Number.Uint64()
        //如果在Epoch檢查點,則清空投票和計數
		if number%s.config.Epoch == 0 {
			snap.Votes = nil
			snap.Tally = make(map[common.Address]Tally)
		}
		// 從recent列表中刪除最老的驗證者以允許它繼續簽名
		if limit := uint64(len(snap.Signers)/2 + 1); number >= limit {
			delete(snap.Recents, number-limit)
		}
		// 從區塊頭中解密出來簽名者地址
		signer, err := ecrecover(header, s.sigcache)
		if err != nil {
			return nil, err
		}
        //檢查是否授權
		if _, ok := snap.Signers[signer]; !ok {
			return nil, errUnauthorizedSigner
		}
        //檢查是否重複簽名
		for _, recent := range snap.Recents {
			if recent == signer {
				return nil, errRecentlySigned
			}
		}
		snap.Recents[number] = signer

        //區塊頭已授權,移除關於這個簽名者的投票
		for i, vote := range snap.Votes {
			if vote.Signer == signer && vote.Address == header.Coinbase {
				//從快取計數器中移除投票
				snap.uncast(vote.Address, vote.Authorize)

				// 從序列中移除投票
				snap.Votes = append(snap.Votes[:i], snap.Votes[i+1:]...)
				break // only one vote allowed
			}
		}
		// 計數新的投票
		var authorize bool
		switch {
		case bytes.Equal(header.Nonce[:], nonceAuthVote):
			authorize = true
		case bytes.Equal(header.Nonce[:], nonceDropVote):
			authorize = false
		default:
			return nil, errInvalidVote
		}
		if snap.cast(header.Coinbase, authorize) {
			snap.Votes = append(snap.Votes, &Vote{
				Signer:    signer,
				Block:     number,
				Address:   header.Coinbase,
				Authorize: authorize,
			})
		}
		// 當投票超過半數就會通過,將新的簽名者加入到簽名者集合中
		if tally := snap.Tally[header.Coinbase]; tally.Votes > len(snap.Signers)/2 {
			if tally.Authorize {
				snap.Signers[header.Coinbase] = struct{}{}
			} else {
				delete(snap.Signers, header.Coinbase)

				// Signer list shrunk, delete any leftover recent caches
				if limit := uint64(len(snap.Signers)/2 + 1); number >= limit {
					delete(snap.Recents, number-limit)
				}
				// Discard any previous votes the deauthorized signer cast
				for i := 0; i < len(snap.Votes); i++ {
					if snap.Votes[i].Signer == header.Coinbase {
						// Uncast the vote from the cached tally
						snap.uncast(snap.Votes[i].Address, snap.Votes[i].Authorize)

						// Uncast the vote from the chronological list
						snap.Votes = append(snap.Votes[:i], snap.Votes[i+1:]...)

						i--
					}
				}
			}
			// Discard any previous votes around the just changed account
			for i := 0; i < len(snap.Votes); i++ {
				if snap.Votes[i].Address == header.Coinbase {
					snap.Votes = append(snap.Votes[:i], snap.Votes[i+1:]...)
					i--
				}
			}
			delete(snap.Tally, header.Coinbase)
		}
	}
	snap.Number += uint64(len(headers))
	snap.Hash = headers[len(headers)-1].Hash()

	return snap, nil
}

在這個方法中根據區塊頭,更新snapshot結構的相關成員。比較重要的一個是對簽名者signer的管理,從recents中刪除最老的簽名者,並且將當前區塊的簽名者加入到recent快取中。另一個是對投票的處理。投票是在apply方法中進行處理的。可以看到,在Epoch檢查點,會刪除原有的投票,Epoch是30000,這個也是clique的投票週期。當投票超過一半,投票才能生效。

inturn方法

// inturn returns if a signer at a given block height is in-turn or not.
func (s *Snapshot) inturn(number uint64, signer common.Address) bool {
	signers, offset := s.signers(), 0
	for offset < len(signers) && signers[offset] != signer {
		offset++
	}
	return (number % uint64(len(signers))) == uint64(offset)
}

這個方法判斷當前是否輪到驗證者來驗證區塊。就是按照順序輪流出塊。

calcDifficulty函式

func CalcDifficulty(snap *Snapshot, signer common.Address) *big.Int {
	if snap.inturn(snap.Number+1, signer) {
		return new(big.Int).Set(diffInTurn)
	}
	return new(big.Int).Set(diffNoTurn)
}

如果輪到節點出塊,它的難度係數就為2,否則設定為1。區塊鏈會選擇難度係數最大的一條鏈為當前鏈。

3 clique的#17620 bug

該bug見於go-ethereum 1.8.14和1.8.15版本,用clique機制建立的私有鏈執行正常,但是使用一個新節點想加入區塊鏈,在同步的時候,我的是在90001時報錯:

########## BAD BLOCK #########
Chain config: {ChainID: 115 Homestead: 1 DAO: <nil> DAOSupport: false EIP150: 2 EIP155: 3 EIP158: 3 Byzantium: 4 Constantinople: <nil> Engine: clique}

Number: 90001
Hash: 0xdcccdcf756f7c9e3fb5c8360bb98b2303c763126db14fb8ac499cb18ee71cd59


Error: unauthorized
##############################

網上有這個問題的討論:

https://ethereum.stackexchange.com/questions/60023/synchronisation-failed-dropping-peer-err-retrieved-hash-chain-is-invalid-me

go-ethereum開發者karalabe關於這個bug的說法:

This is the fix for the Rinkeby consensus split.

When adding the light client checkpoint sync support for Rinkeby (Clique), we needed to relax the requirement that signing/voting snapshots are generated from previous blocks, and rather trust a standalone epoch block in itself, similar to how we trust the genesis (so light nodes can sync from there instead of verifying the entire header chain).

The oversight however was that the genesis block doesn't have previous signers (who can't sign currently), whereas checkpoint blocks do have previous signers. The checkpoint sync extension caused Clique nodes to discard previous signers at epoch blocks, allowing any authorized signer to seal the next block.

This caused signers running on v1.8.14 and v1.8.15 to create an invalid block, sealed by a node that already sealed recently and shouldn't have been allowed to do so, causing a consensus split between new nodes and old nodes.

This PR fixes the issue by making the checkpoint snapshot trust more strict, only ever trusting a snapshot block blindly if it's the genesis or if its parent is missing (i.e. we're starting sync from the middle of the chain, not the genesis). For all other scenarios, we still regenerate the snapshot ourselves along with the recent signer list.

Note, this hotfix does still mean that light clients are susceptible for the same bug - whereby they accept blocks signed by the wrong signers for a couple blocks - following a LES checkpoint, but that's fine because as long as full nodes correctly enforce the good chain, light clients can only ever import a couple bad blocks before the get stuck or switch to the properly validated chain. After len(signers) / 2 blocks after initial startup, light clients become immune tho this "vulnerability" as well.

簡單說就是v1.8.14和v1.8.15倆個版本引入了這個bug,它導致一個簽名者在不該輪到它簽名的時候卻去簽名區塊生成了一個無效區塊。這個無效區塊當時被區塊鏈其它節點驗證通過並寫入了區塊鏈。但是新節點驗證時就會報錯。bug的修復就是在建立snapshot快照時,進行更嚴格的檢查,只有創世區塊或者服區塊缺失時(比如從區塊鏈中間開始同步,而不是從創世區塊)才允許建立快照。升級到1.8.16版本就能解決這個問題。

我們看1.8.15的關於建立快照的程式碼:

clique.go的snapshot方法中的:

func (c *Clique) snapshot(chain consensus.ChainReader, number uint64, hash common.Hash, parents []*types.Header) (*Snapshot, error) {
    ........
    
    // If we're at an checkpoint block, make a snapshot if it's known
		if number%c.config.Epoch == 0 {
			checkpoint := chain.GetHeaderByNumber(number)
			if checkpoint != nil {
				hash := checkpoint.Hash()

				signers := make([]common.Address, (len(checkpoint.Extra)-extraVanity-extraSeal)/common.AddressLength)
				for i := 0; i < len(signers); i++ {
					copy(signers[i][:], checkpoint.Extra[extraVanity+i*common.AddressLength:])
				}
				snap = newSnapshot(c.config, c.signatures, number, hash, signers)
				if err := snap.store(c.db); err != nil {
					return nil, err
				}
				log.Info("Stored checkpoint snapshot to disk", "number", number, "hash", hash)
				break
			}
		}

    ........
}

這個方法只要在Epoch週期檢查點就會重新建立快照,建立時會將區塊原有的簽名者都清空,這樣導致原來剛簽過名的驗證者也會繼續簽名。v1.8.17的解決方案是:

// snapshot retrieves the authorization snapshot at a given point in time.
func (c *Clique) snapshot(chain consensus.ChainReader, number uint64, hash common.Hash, parents []*types.Header) (*Snapshot, error) {
    ......
    
    // If we're at an checkpoint block, make a snapshot if it's known
		if number == 0 || (number%c.config.Epoch == 0 && chain.GetHeaderByNumber(number-1) == nil) {
			checkpoint := chain.GetHeaderByNumber(number)
			if checkpoint != nil {
				hash := checkpoint.Hash()

				signers := make([]common.Address, (len(checkpoint.Extra)-extraVanity-extraSeal)/common.AddressLength)
				for i := 0; i < len(signers); i++ {
					copy(signers[i][:], checkpoint.Extra[extraVanity+i*common.AddressLength:])
				}
				snap = newSnapshot(c.config, c.signatures, number, hash, signers)
				if err := snap.store(c.db); err != nil {
					return nil, err
				}
				log.Info("Stored checkpoint snapshot to disk", "number", number, "hash", hash)
				break
			}
		}

    ......
}

只有創世區塊或者在Epoch檢查點時父區塊缺失時才會重新建立快照。