From 532d8ce63f5ca524c40ac2b0d14e1039afa9485b Mon Sep 17 00:00:00 2001 From: Joshua Gutow Date: Fri, 18 Aug 2023 14:19:45 -0700 Subject: [PATCH] op-challenger: Add basic metrics & pprof setup This adds metrics & pprof to the op-challenger. I had to add a default config option to the metrics & pprof in order to make the tests work (because it verifies that the result of the CLI parsing is as expected when compared to an config struct which is constructed). This also had to modify how the challenger stores its version information because we include that information in the metrics when we record that it is up. Unlike the proposer & batcher, the challenger has a different initialization flow which does not easily make the version information accessible. --- op-batcher/batcher/batch_submitter.go | 2 +- op-challenger/config/config.go | 16 +++- op-challenger/fault/service.go | 48 +++++++++++- op-challenger/flags/flags.go | 8 ++ op-challenger/metrics/metrics.go | 86 +++++++++++++++++++++ op-challenger/metrics/noop.go | 14 ++++ op-challenger/scripts/alphabet/charlie.sh | 2 + op-challenger/scripts/alphabet/mallory.sh | 2 + op-challenger/version/version.go | 8 ++ op-proposer/proposer/l2_output_submitter.go | 2 +- op-service/metrics/cli.go | 14 +++- op-service/pprof/cli.go | 14 +++- 12 files changed, 204 insertions(+), 12 deletions(-) create mode 100644 op-challenger/metrics/metrics.go create mode 100644 op-challenger/metrics/noop.go diff --git a/op-batcher/batcher/batch_submitter.go b/op-batcher/batcher/batch_submitter.go index 5193c4f2ac38..19ab6c39b210 100644 --- a/op-batcher/batcher/batch_submitter.go +++ b/op-batcher/batcher/batch_submitter.go @@ -68,7 +68,7 @@ func Main(version string, cliCtx *cli.Context) error { l.Info("starting metrics server", "addr", metricsCfg.ListenAddr, "port", metricsCfg.ListenPort) go func() { if err := m.Serve(ctx, metricsCfg.ListenAddr, metricsCfg.ListenPort); err != nil { - l.Error("error starting metrics server", err) + l.Error("error starting metrics server", "err", err) } }() m.StartBalanceMetrics(ctx, l, batchSubmitter.L1Client, batchSubmitter.TxManager.From()) diff --git a/op-challenger/config/config.go b/op-challenger/config/config.go index cdfa648416bd..b0fe832bbc63 100644 --- a/op-challenger/config/config.go +++ b/op-challenger/config/config.go @@ -5,6 +5,8 @@ import ( "fmt" "github.com/ethereum-optimism/optimism/op-node/chaincfg" + opmetrics "github.com/ethereum-optimism/optimism/op-service/metrics" + oppprof "github.com/ethereum-optimism/optimism/op-service/pprof" "github.com/ethereum-optimism/optimism/op-service/txmgr" "github.com/ethereum/go-ethereum/common" ) @@ -105,7 +107,9 @@ type Config struct { CannonL2 string // L2 RPC Url CannonSnapshotFreq uint // Frequency of snapshots to create when executing cannon (in VM instructions) - TxMgrConfig txmgr.CLIConfig + TxMgrConfig txmgr.CLIConfig + MetricsConfig opmetrics.CLIConfig + PprofConfig oppprof.CLIConfig } func NewConfig( @@ -122,7 +126,9 @@ func NewConfig( TraceType: traceType, - TxMgrConfig: txmgr.NewCLIConfig(l1EthRpc), + TxMgrConfig: txmgr.NewCLIConfig(l1EthRpc), + MetricsConfig: opmetrics.DefaultCLIConfig(), + PprofConfig: oppprof.DefaultCLIConfig(), CannonSnapshotFreq: DefaultCannonSnapshotFreq, } @@ -182,5 +188,11 @@ func (c Config) Check() error { if err := c.TxMgrConfig.Check(); err != nil { return err } + if err := c.MetricsConfig.Check(); err != nil { + return err + } + if err := c.PprofConfig.Check(); err != nil { + return err + } return nil } diff --git a/op-challenger/fault/service.go b/op-challenger/fault/service.go index a93de15a4103..5fea8459975d 100644 --- a/op-challenger/fault/service.go +++ b/op-challenger/fault/service.go @@ -10,9 +10,11 @@ import ( "github.com/ethereum-optimism/optimism/op-challenger/fault/alphabet" "github.com/ethereum-optimism/optimism/op-challenger/fault/cannon" "github.com/ethereum-optimism/optimism/op-challenger/fault/types" + "github.com/ethereum-optimism/optimism/op-challenger/metrics" + "github.com/ethereum-optimism/optimism/op-challenger/version" "github.com/ethereum-optimism/optimism/op-service/client" + oppprof "github.com/ethereum-optimism/optimism/op-service/pprof" "github.com/ethereum-optimism/optimism/op-service/txmgr" - "github.com/ethereum-optimism/optimism/op-service/txmgr/metrics" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethclient" "github.com/ethereum/go-ethereum/log" @@ -30,11 +32,13 @@ type service struct { agreeWithProposedOutput bool caller *FaultCaller logger log.Logger + metrics metrics.Metricer } // NewService creates a new Service. func NewService(ctx context.Context, logger log.Logger, cfg *config.Config) (*service, error) { - txMgr, err := txmgr.NewSimpleTxManager("challenger", logger, &metrics.NoopTxMetrics{}, cfg.TxMgrConfig) + m := metrics.NewMetrics() + txMgr, err := txmgr.NewSimpleTxManager("challenger", logger, &m.TxMetrics, cfg.TxMgrConfig) if err != nil { return nil, fmt.Errorf("failed to create the transaction manager: %w", err) } @@ -44,6 +48,27 @@ func NewService(ctx context.Context, logger log.Logger, cfg *config.Config) (*se return nil, fmt.Errorf("failed to dial L1: %w", err) } + pprofConfig := cfg.PprofConfig + if pprofConfig.Enabled { + logger.Info("starting pprof", "addr", pprofConfig.ListenAddr, "port", pprofConfig.ListenPort) + go func() { + if err := oppprof.ListenAndServe(ctx, pprofConfig.ListenAddr, pprofConfig.ListenPort); err != nil { + logger.Error("error starting pprof", "err", err) + } + }() + } + + metricsCfg := cfg.MetricsConfig + if metricsCfg.Enabled { + logger.Info("starting metrics server", "addr", metricsCfg.ListenAddr, "port", metricsCfg.ListenPort) + go func() { + if err := m.Serve(ctx, metricsCfg.ListenAddr, metricsCfg.ListenPort); err != nil { + logger.Error("error starting metrics server", "err", err) + } + }() + m.StartBalanceMetrics(ctx, logger, client, txMgr.From()) + } + contract, err := bindings.NewFaultDisputeGameCaller(cfg.GameAddress, client) if err != nil { return nil, fmt.Errorf("failed to bind the fault dispute game contract: %w", err) @@ -76,11 +101,22 @@ func NewService(ctx context.Context, logger log.Logger, cfg *config.Config) (*se return nil, fmt.Errorf("unsupported trace type: %v", cfg.TraceType) } - return newTypedService(ctx, logger, cfg, loader, gameDepth, client, trace, updater, txMgr) + return newTypedService(ctx, logger, cfg, loader, gameDepth, client, trace, updater, txMgr, m) } // newTypedService creates a new Service from a provided trace provider. -func newTypedService(ctx context.Context, logger log.Logger, cfg *config.Config, loader Loader, gameDepth uint64, client *ethclient.Client, provider types.TraceProvider, updater types.OracleUpdater, txMgr txmgr.TxManager) (*service, error) { +func newTypedService(ctx context.Context, + logger log.Logger, + cfg *config.Config, + loader Loader, + gameDepth uint64, + client *ethclient.Client, + provider types.TraceProvider, + updater types.OracleUpdater, + txMgr txmgr.TxManager, + metrics metrics.Metricer, +) (*service, error) { + if err := ValidateAbsolutePrestate(ctx, provider, loader); err != nil { return nil, fmt.Errorf("failed to validate absolute prestate: %w", err) } @@ -96,11 +132,15 @@ func newTypedService(ctx context.Context, logger log.Logger, cfg *config.Config, return nil, fmt.Errorf("failed to bind the fault contract: %w", err) } + metrics.RecordInfo(version.SimpleWithMeta) + metrics.RecordUp() + return &service{ agent: NewAgent(loader, int(gameDepth), provider, responder, updater, cfg.AgreeWithProposedOutput, gameLogger), agreeWithProposedOutput: cfg.AgreeWithProposedOutput, caller: caller, logger: gameLogger, + metrics: metrics, }, nil } diff --git a/op-challenger/flags/flags.go b/op-challenger/flags/flags.go index 5250b2c3443e..98bc522e3d39 100644 --- a/op-challenger/flags/flags.go +++ b/op-challenger/flags/flags.go @@ -9,6 +9,8 @@ import ( opservice "github.com/ethereum-optimism/optimism/op-service" openum "github.com/ethereum-optimism/optimism/op-service/enum" oplog "github.com/ethereum-optimism/optimism/op-service/log" + opmetrics "github.com/ethereum-optimism/optimism/op-service/metrics" + oppprof "github.com/ethereum-optimism/optimism/op-service/pprof" "github.com/ethereum-optimism/optimism/op-service/txmgr" "github.com/ethereum/go-ethereum/common" @@ -134,6 +136,8 @@ var optionalFlags = []cli.Flag{ func init() { optionalFlags = append(optionalFlags, oplog.CLIFlags(envVarPrefix)...) optionalFlags = append(optionalFlags, txmgr.CLIFlags(envVarPrefix)...) + optionalFlags = append(optionalFlags, opmetrics.CLIFlags(envVarPrefix)...) + optionalFlags = append(optionalFlags, oppprof.CLIFlags(envVarPrefix)...) Flags = append(requiredFlags, optionalFlags...) } @@ -201,6 +205,8 @@ func NewConfigFromCLI(ctx *cli.Context) (*config.Config, error) { } txMgrConfig := txmgr.ReadCLIConfig(ctx) + metricsConfig := opmetrics.ReadCLIConfig(ctx) + pprofConfig := oppprof.ReadCLIConfig(ctx) traceTypeFlag := config.TraceType(strings.ToLower(ctx.String(TraceTypeFlag.Name))) @@ -222,5 +228,7 @@ func NewConfigFromCLI(ctx *cli.Context) (*config.Config, error) { CannonSnapshotFreq: ctx.Uint(CannonSnapshotFreqFlag.Name), AgreeWithProposedOutput: ctx.Bool(AgreeWithProposedOutputFlag.Name), TxMgrConfig: txMgrConfig, + MetricsConfig: metricsConfig, + PprofConfig: pprofConfig, }, nil } diff --git a/op-challenger/metrics/metrics.go b/op-challenger/metrics/metrics.go new file mode 100644 index 000000000000..cefc488eab35 --- /dev/null +++ b/op-challenger/metrics/metrics.go @@ -0,0 +1,86 @@ +package metrics + +import ( + "context" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethclient" + "github.com/ethereum/go-ethereum/log" + "github.com/prometheus/client_golang/prometheus" + + opmetrics "github.com/ethereum-optimism/optimism/op-service/metrics" + txmetrics "github.com/ethereum-optimism/optimism/op-service/txmgr/metrics" +) + +const Namespace = "op_challenger" + +type Metricer interface { + RecordInfo(version string) + RecordUp() + + // Record Tx metrics + txmetrics.TxMetricer +} + +type Metrics struct { + ns string + registry *prometheus.Registry + factory opmetrics.Factory + + txmetrics.TxMetrics + + info prometheus.GaugeVec + up prometheus.Gauge +} + +var _ Metricer = (*Metrics)(nil) + +func NewMetrics() *Metrics { + registry := opmetrics.NewRegistry() + factory := opmetrics.With(registry) + + return &Metrics{ + ns: Namespace, + registry: registry, + factory: factory, + + TxMetrics: txmetrics.MakeTxMetrics(Namespace, factory), + + info: *factory.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "info", + Help: "Pseudo-metric tracking version and config info", + }, []string{ + "version", + }), + up: factory.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "up", + Help: "1 if the op-challenger has finished starting up", + }), + } +} + +func (m *Metrics) Serve(ctx context.Context, host string, port int) error { + return opmetrics.ListenAndServe(ctx, m.registry, host, port) +} + +func (m *Metrics) StartBalanceMetrics(ctx context.Context, l log.Logger, client *ethclient.Client, account common.Address) { + opmetrics.LaunchBalanceMetrics(ctx, l, m.registry, m.ns, client, account) +} + +// RecordInfo sets a pseudo-metric that contains versioning and +// config info for the op-proposer. +func (m *Metrics) RecordInfo(version string) { + m.info.WithLabelValues(version).Set(1) +} + +// RecordUp sets the up metric to 1. +func (m *Metrics) RecordUp() { + prometheus.MustRegister() + m.up.Set(1) +} + +func (m *Metrics) Document() []opmetrics.DocumentedMetric { + return m.factory.Document() +} diff --git a/op-challenger/metrics/noop.go b/op-challenger/metrics/noop.go new file mode 100644 index 000000000000..052d288092e8 --- /dev/null +++ b/op-challenger/metrics/noop.go @@ -0,0 +1,14 @@ +package metrics + +import ( + txmetrics "github.com/ethereum-optimism/optimism/op-service/txmgr/metrics" +) + +type noopMetrics struct { + txmetrics.NoopTxMetrics +} + +var NoopMetrics Metricer = new(noopMetrics) + +func (*noopMetrics) RecordInfo(version string) {} +func (*noopMetrics) RecordUp() {} diff --git a/op-challenger/scripts/alphabet/charlie.sh b/op-challenger/scripts/alphabet/charlie.sh index 97abaad76e8c..e72cbd2b407f 100755 --- a/op-challenger/scripts/alphabet/charlie.sh +++ b/op-challenger/scripts/alphabet/charlie.sh @@ -27,4 +27,6 @@ $CHALLENGER_DIR/bin/op-challenger \ --game-address $FAULT_GAME_ADDRESS \ --private-key $CHARLIE_KEY \ --num-confirmations 1 \ + --metrics.enabled --metrics.port=7304 \ + --pprof.enabled --pprof.port=6064 \ --agree-with-proposed-output=true diff --git a/op-challenger/scripts/alphabet/mallory.sh b/op-challenger/scripts/alphabet/mallory.sh index 8549b29b8583..fdc71585ed7c 100755 --- a/op-challenger/scripts/alphabet/mallory.sh +++ b/op-challenger/scripts/alphabet/mallory.sh @@ -27,4 +27,6 @@ $CHALLENGER_DIR/bin/op-challenger \ --game-address $FAULT_GAME_ADDRESS \ --private-key $MALLORY_KEY \ --num-confirmations 1 \ + --metrics.enabled --metrics.port=7305 \ + --pprof.enabled --pprof.port=6065 \ --agree-with-proposed-output=false diff --git a/op-challenger/version/version.go b/op-challenger/version/version.go index 71ddcf30fb82..834fc089b19e 100644 --- a/op-challenger/version/version.go +++ b/op-challenger/version/version.go @@ -4,3 +4,11 @@ var ( Version = "v0.1.0" Meta = "dev" ) + +var SimpleWithMeta = func() string { + v := Version + if Meta != "" { + v += "-" + Meta + } + return v +}() diff --git a/op-proposer/proposer/l2_output_submitter.go b/op-proposer/proposer/l2_output_submitter.go index 5fb5d6d5d71e..69fd34e88206 100644 --- a/op-proposer/proposer/l2_output_submitter.go +++ b/op-proposer/proposer/l2_output_submitter.go @@ -85,7 +85,7 @@ func Main(version string, cliCtx *cli.Context) error { l.Info("starting metrics server", "addr", metricsCfg.ListenAddr, "port", metricsCfg.ListenPort) go func() { if err := m.Serve(ctx, metricsCfg.ListenAddr, metricsCfg.ListenPort); err != nil { - l.Error("error starting metrics server", err) + l.Error("error starting metrics server", "err", err) } }() m.StartBalanceMetrics(ctx, l, proposerConfig.L1Client, proposerConfig.TxManager.From()) diff --git a/op-service/metrics/cli.go b/op-service/metrics/cli.go index 27d96cebcc68..ac02e33bf0c6 100644 --- a/op-service/metrics/cli.go +++ b/op-service/metrics/cli.go @@ -13,8 +13,18 @@ const ( EnabledFlagName = "metrics.enabled" ListenAddrFlagName = "metrics.addr" PortFlagName = "metrics.port" + defaultListenAddr = "0.0.0.0" + defaultListenPort = 7300 ) +func DefaultCLIConfig() CLIConfig { + return CLIConfig{ + Enabled: false, + ListenAddr: defaultListenAddr, + ListenPort: defaultListenPort, + } +} + func CLIFlags(envPrefix string) []cli.Flag { return []cli.Flag{ &cli.BoolFlag{ @@ -25,13 +35,13 @@ func CLIFlags(envPrefix string) []cli.Flag { &cli.StringFlag{ Name: ListenAddrFlagName, Usage: "Metrics listening address", - Value: "0.0.0.0", // TODO(CLI-4159): Switch to 127.0.0.1 + Value: defaultListenAddr, // TODO(CLI-4159): Switch to 127.0.0.1 EnvVars: opservice.PrefixEnvVar(envPrefix, "METRICS_ADDR"), }, &cli.IntFlag{ Name: PortFlagName, Usage: "Metrics listening port", - Value: 7300, + Value: defaultListenPort, EnvVars: opservice.PrefixEnvVar(envPrefix, "METRICS_PORT"), }, } diff --git a/op-service/pprof/cli.go b/op-service/pprof/cli.go index 161bc2ae2c86..3a732cd184ee 100644 --- a/op-service/pprof/cli.go +++ b/op-service/pprof/cli.go @@ -12,8 +12,18 @@ const ( EnabledFlagName = "pprof.enabled" ListenAddrFlagName = "pprof.addr" PortFlagName = "pprof.port" + defaultListenAddr = "0.0.0.0" + defaultListenPort = 6060 ) +func DefaultCLIConfig() CLIConfig { + return CLIConfig{ + Enabled: false, + ListenAddr: defaultListenAddr, + ListenPort: defaultListenPort, + } +} + func CLIFlags(envPrefix string) []cli.Flag { return []cli.Flag{ &cli.BoolFlag{ @@ -24,13 +34,13 @@ func CLIFlags(envPrefix string) []cli.Flag { &cli.StringFlag{ Name: ListenAddrFlagName, Usage: "pprof listening address", - Value: "0.0.0.0", // TODO(CLI-4159): Switch to 127.0.0.1 + Value: defaultListenAddr, // TODO(CLI-4159): Switch to 127.0.0.1 EnvVars: opservice.PrefixEnvVar(envPrefix, "PPROF_ADDR"), }, &cli.IntFlag{ Name: PortFlagName, Usage: "pprof listening port", - Value: 6060, + Value: defaultListenPort, EnvVars: opservice.PrefixEnvVar(envPrefix, "PPROF_PORT"), }, }