summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchris erway <chris.erway@algorand.com>2022-04-06 16:55:15 -0400
committerchris erway <chris.erway@algorand.com>2022-04-06 16:55:15 -0400
commit9977349bf53e273d0e36e06a0daf69bd46b2a84f (patch)
tree0b393a9cd748a775c22e235505e992841759c6c6
parent0146fff7322492a8db06d7dc1765e4eae271a884 (diff)
add counters around dropping pseudonode and voteVerifier tasks, and slow pseudonode responsesagreement-counters
-rw-r--r--agreement/cryptoVerifier.go5
-rw-r--r--agreement/pseudonode.go9
2 files changed, 14 insertions, 0 deletions
diff --git a/agreement/cryptoVerifier.go b/agreement/cryptoVerifier.go
index ff8a6d6aa..cf6c466e5 100644
--- a/agreement/cryptoVerifier.go
+++ b/agreement/cryptoVerifier.go
@@ -22,8 +22,12 @@ import (
"github.com/algorand/go-algorand/logging"
"github.com/algorand/go-algorand/protocol"
+ "github.com/algorand/go-algorand/util/metrics"
)
+var voteVerifierOutFullCounter = metrics.MakeCounter(
+ metrics.MetricName{Name: "algod_agreement_vote_verifier_responses_dropped", Description: "Number of voteVerifier responses dropped due to full channel"})
+
// TODO put these in config
const (
voteParallelism = 16
@@ -210,6 +214,7 @@ func (c *poolCryptoVerifier) voteFillWorker(toBundleWait chan<- bundleFuture) {
select {
case c.votes.out <- asyncVerifyVoteResponse{index: votereq.TaskIndex, err: err, cancelled: true}:
default:
+ voteVerifierOutFullCounter.Inc(nil)
c.log.Infof("poolCryptoVerifier.voteFillWorker unable to write failed enqueue response to output channel")
}
}
diff --git a/agreement/pseudonode.go b/agreement/pseudonode.go
index f52854d6a..bdaa2f359 100644
--- a/agreement/pseudonode.go
+++ b/agreement/pseudonode.go
@@ -29,6 +29,7 @@ import (
"github.com/algorand/go-algorand/logging/logspec"
"github.com/algorand/go-algorand/logging/telemetryspec"
"github.com/algorand/go-algorand/protocol"
+ "github.com/algorand/go-algorand/util/metrics"
)
// TODO put these in config
@@ -43,6 +44,9 @@ var errPseudonodeVerifierClosedChannel = errors.New("crypto verifier closed the
var errPseudonodeNoVotes = errors.New("no valid participation keys to generate votes for given round")
var errPseudonodeNoProposals = errors.New("no valid participation keys to generate proposals for given round")
+var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode tasks dropped per type")
+var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode task result timeouts per type")
+
// A pseudonode creates proposals and votes with a KeyManager which holds participation keys.
//
// It constructs these messages as if they arrived from an external source and were verified.
@@ -176,6 +180,7 @@ func (n asyncPseudonode) MakeProposals(ctx context.Context, r round, p period) (
return proposalTask.outputChannel(), nil
default:
proposalTask.close()
+ pseudonodeBacklogFullByType.Add("proposal", 1)
return nil, fmt.Errorf("unable to make proposal for (%d, %d): %w", r, p, errPseudonodeBacklogFull)
}
}
@@ -193,6 +198,7 @@ func (n asyncPseudonode) MakeVotes(ctx context.Context, r round, p period, s ste
return proposalTask.outputChannel(), nil
default:
proposalTask.close()
+ pseudonodeBacklogFullByType.Add("vote", 1)
return nil, fmt.Errorf("unable to make vote for (%d, %d, %d): %w", r, p, s, errPseudonodeBacklogFull)
}
}
@@ -474,6 +480,7 @@ verifiedVotesLoop:
return
case <-outputTimeout:
// we've been waiting for too long for this vote to be written to the output.
+ pseudonodeResultTimeoutsByType.Add("vote", 1)
t.node.log.Warnf("pseudonode.makeVotes: unable to write vote to output channel for round %d, period %d", t.round, t.period)
outputTimeout = nil
}
@@ -577,6 +584,7 @@ verifiedVotesLoop:
return
case <-outputTimeout:
// we've been waiting for too long for this vote to be written to the output.
+ pseudonodeResultTimeoutsByType.Add("pvote", 1)
t.node.log.Warnf("pseudonode.makeProposals: unable to write proposal vote to output channel for round %d, period %d", t.round, t.period)
outputTimeout = nil
}
@@ -597,6 +605,7 @@ verifiedPayloadsLoop:
return
case <-outputTimeout:
// we've been waiting for too long for this vote to be written to the output.
+ pseudonodeResultTimeoutsByType.Add("ppayload", 1)
t.node.log.Warnf("pseudonode.makeProposals: unable to write proposal payload to output channel for round %d, period %d", t.round, t.period)
outputTimeout = nil
}