diff options
author | chris erway <chris.erway@algorand.com> | 2022-04-06 16:55:15 -0400 |
---|---|---|
committer | chris erway <chris.erway@algorand.com> | 2022-04-06 16:55:15 -0400 |
commit | 9977349bf53e273d0e36e06a0daf69bd46b2a84f (patch) | |
tree | 0b393a9cd748a775c22e235505e992841759c6c6 | |
parent | 0146fff7322492a8db06d7dc1765e4eae271a884 (diff) |
add counters around dropping pseudonode and voteVerifier tasks, and slow pseudonode responsesagreement-counters
-rw-r--r-- | agreement/cryptoVerifier.go | 5 | ||||
-rw-r--r-- | agreement/pseudonode.go | 9 |
2 files changed, 14 insertions, 0 deletions
diff --git a/agreement/cryptoVerifier.go b/agreement/cryptoVerifier.go index ff8a6d6aa..cf6c466e5 100644 --- a/agreement/cryptoVerifier.go +++ b/agreement/cryptoVerifier.go @@ -22,8 +22,12 @@ import ( "github.com/algorand/go-algorand/logging" "github.com/algorand/go-algorand/protocol" + "github.com/algorand/go-algorand/util/metrics" ) +var voteVerifierOutFullCounter = metrics.MakeCounter( + metrics.MetricName{Name: "algod_agreement_vote_verifier_responses_dropped", Description: "Number of voteVerifier responses dropped due to full channel"}) + // TODO put these in config const ( voteParallelism = 16 @@ -210,6 +214,7 @@ func (c *poolCryptoVerifier) voteFillWorker(toBundleWait chan<- bundleFuture) { select { case c.votes.out <- asyncVerifyVoteResponse{index: votereq.TaskIndex, err: err, cancelled: true}: default: + voteVerifierOutFullCounter.Inc(nil) c.log.Infof("poolCryptoVerifier.voteFillWorker unable to write failed enqueue response to output channel") } } diff --git a/agreement/pseudonode.go b/agreement/pseudonode.go index f52854d6a..bdaa2f359 100644 --- a/agreement/pseudonode.go +++ b/agreement/pseudonode.go @@ -29,6 +29,7 @@ import ( "github.com/algorand/go-algorand/logging/logspec" "github.com/algorand/go-algorand/logging/telemetryspec" "github.com/algorand/go-algorand/protocol" + "github.com/algorand/go-algorand/util/metrics" ) // TODO put these in config @@ -43,6 +44,9 @@ var errPseudonodeVerifierClosedChannel = errors.New("crypto verifier closed the var errPseudonodeNoVotes = errors.New("no valid participation keys to generate votes for given round") var errPseudonodeNoProposals = errors.New("no valid participation keys to generate proposals for given round") +var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode tasks dropped per type") +var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode task result timeouts per type") + // A pseudonode creates proposals and votes with a KeyManager which holds participation keys. // // It constructs these messages as if they arrived from an external source and were verified. @@ -176,6 +180,7 @@ func (n asyncPseudonode) MakeProposals(ctx context.Context, r round, p period) ( return proposalTask.outputChannel(), nil default: proposalTask.close() + pseudonodeBacklogFullByType.Add("proposal", 1) return nil, fmt.Errorf("unable to make proposal for (%d, %d): %w", r, p, errPseudonodeBacklogFull) } } @@ -193,6 +198,7 @@ func (n asyncPseudonode) MakeVotes(ctx context.Context, r round, p period, s ste return proposalTask.outputChannel(), nil default: proposalTask.close() + pseudonodeBacklogFullByType.Add("vote", 1) return nil, fmt.Errorf("unable to make vote for (%d, %d, %d): %w", r, p, s, errPseudonodeBacklogFull) } } @@ -474,6 +480,7 @@ verifiedVotesLoop: return case <-outputTimeout: // we've been waiting for too long for this vote to be written to the output. + pseudonodeResultTimeoutsByType.Add("vote", 1) t.node.log.Warnf("pseudonode.makeVotes: unable to write vote to output channel for round %d, period %d", t.round, t.period) outputTimeout = nil } @@ -577,6 +584,7 @@ verifiedVotesLoop: return case <-outputTimeout: // we've been waiting for too long for this vote to be written to the output. + pseudonodeResultTimeoutsByType.Add("pvote", 1) t.node.log.Warnf("pseudonode.makeProposals: unable to write proposal vote to output channel for round %d, period %d", t.round, t.period) outputTimeout = nil } @@ -597,6 +605,7 @@ verifiedPayloadsLoop: return case <-outputTimeout: // we've been waiting for too long for this vote to be written to the output. + pseudonodeResultTimeoutsByType.Add("ppayload", 1) t.node.log.Warnf("pseudonode.makeProposals: unable to write proposal payload to output channel for round %d, period %d", t.round, t.period) outputTimeout = nil } |