Skip to content

Commit b1aae87

Browse files
Add cpu-profile interactive command (#1358)
* Add cpu-profile interactive command * better doc markdown Signed-off-by: Tim Vaillancourt <[email protected]> * set block profile after isProfiling=1 Signed-off-by: Tim Vaillancourt <[email protected]> * improve test Signed-off-by: Tim Vaillancourt <[email protected]> * check isCPUProfiling later Signed-off-by: Tim Vaillancourt <[email protected]> * Cleanup Signed-off-by: Tim Vaillancourt <[email protected]> * Fix discrepancy Signed-off-by: Tim Vaillancourt <[email protected]> * move base64 to .applyServerCommand(...) Signed-off-by: Tim Vaillancourt <[email protected]> --------- Signed-off-by: Tim Vaillancourt <[email protected]> Co-authored-by: meiji163 <[email protected]>
1 parent 3aa6912 commit b1aae87

File tree

3 files changed

+138
-0
lines changed

3 files changed

+138
-0
lines changed

doc/interactive-commands.md

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Both interfaces may serve at the same time. Both respond to simple text command,
1717
- `help`: shows a brief list of available commands
1818
- `status`: returns a detailed status summary of migration progress and configuration
1919
- `sup`: returns a brief status summary of migration progress
20+
- `cpu-profile`: returns a base64-encoded [`runtime/pprof`](https://pkg.go.dev/runtime/pprof) CPU profile using a duration, default: `30s`. Comma-separated options `gzip` and/or `block` (blocked profile) may follow the profile duration
2021
- `coordinates`: returns recent (though not exactly up to date) binary log coordinates of the inspected server
2122
- `applier`: returns the hostname of the applier
2223
- `inspector`: returns the hostname of the inspector

go/logic/server.go

+69
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,30 @@ package logic
77

88
import (
99
"bufio"
10+
"bytes"
11+
"compress/gzip"
12+
"encoding/base64"
13+
"errors"
1014
"fmt"
1115
"io"
1216
"net"
1317
"os"
18+
"runtime"
19+
"runtime/pprof"
1420
"strconv"
1521
"strings"
1622
"sync/atomic"
23+
"time"
1724

1825
"github.com/github/gh-ost/go/base"
1926
)
2027

28+
var (
29+
ErrCPUProfilingBadOption = errors.New("unrecognized cpu profiling option")
30+
ErrCPUProfilingInProgress = errors.New("cpu profiling already in progress")
31+
defaultCPUProfileDuration = time.Second * 30
32+
)
33+
2134
type printStatusFunc func(PrintStatusRule, io.Writer)
2235

2336
// Server listens for requests on a socket file or via TCP
@@ -27,6 +40,7 @@ type Server struct {
2740
tcpListener net.Listener
2841
hooksExecutor *HooksExecutor
2942
printStatus printStatusFunc
43+
isCPUProfiling int64
3044
}
3145

3246
func NewServer(migrationContext *base.MigrationContext, hooksExecutor *HooksExecutor, printStatus printStatusFunc) *Server {
@@ -37,6 +51,54 @@ func NewServer(migrationContext *base.MigrationContext, hooksExecutor *HooksExec
3751
}
3852
}
3953

54+
func (this *Server) runCPUProfile(args string) (io.Reader, error) {
55+
duration := defaultCPUProfileDuration
56+
57+
var err error
58+
var blockProfile, useGzip bool
59+
if args != "" {
60+
s := strings.Split(args, ",")
61+
// a duration string must be the 1st field, if any
62+
if duration, err = time.ParseDuration(s[0]); err != nil {
63+
return nil, err
64+
}
65+
for _, arg := range s[1:] {
66+
switch arg {
67+
case "block", "blocked", "blocking":
68+
blockProfile = true
69+
case "gzip":
70+
useGzip = true
71+
default:
72+
return nil, ErrCPUProfilingBadOption
73+
}
74+
}
75+
}
76+
77+
if atomic.LoadInt64(&this.isCPUProfiling) > 0 {
78+
return nil, ErrCPUProfilingInProgress
79+
}
80+
atomic.StoreInt64(&this.isCPUProfiling, 1)
81+
defer atomic.StoreInt64(&this.isCPUProfiling, 0)
82+
83+
var buf bytes.Buffer
84+
var writer io.Writer = &buf
85+
if blockProfile {
86+
runtime.SetBlockProfileRate(1)
87+
defer runtime.SetBlockProfileRate(0)
88+
}
89+
if useGzip {
90+
writer = gzip.NewWriter(writer)
91+
}
92+
if err = pprof.StartCPUProfile(writer); err != nil {
93+
return nil, err
94+
}
95+
96+
time.Sleep(duration)
97+
pprof.StopCPUProfile()
98+
this.migrationContext.Log.Infof("Captured %d byte runtime/pprof CPU profile (gzip=%v)", buf.Len(), useGzip)
99+
return &buf, nil
100+
}
101+
40102
func (this *Server) BindSocketFile() (err error) {
41103
if this.migrationContext.ServeSocketFile == "" {
42104
return nil
@@ -144,6 +206,7 @@ func (this *Server) applyServerCommand(command string, writer *bufio.Writer) (pr
144206
fmt.Fprint(writer, `available commands:
145207
status # Print a detailed status message
146208
sup # Print a short status message
209+
cpu-profile=<options> # Print a base64-encoded runtime/pprof CPU profile using a duration, default: 30s. Comma-separated options 'gzip' and/or 'block' (blocked profile) may follow the profile duration
147210
coordinates # Print the currently inspected coordinates
148211
applier # Print the hostname of the applier
149212
inspector # Print the hostname of the inspector
@@ -169,6 +232,12 @@ help # This message
169232
return ForcePrintStatusOnlyRule, nil
170233
case "info", "status":
171234
return ForcePrintStatusAndHintRule, nil
235+
case "cpu-profile":
236+
cpuProfile, err := this.runCPUProfile(arg)
237+
if err == nil {
238+
fmt.Fprint(base64.NewEncoder(base64.StdEncoding, writer), cpuProfile)
239+
}
240+
return NoPrintStatusRule, err
172241
case "coordinates":
173242
{
174243
if argIsQuestion || arg == "" {

go/logic/server_test.go

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package logic
2+
3+
import (
4+
"testing"
5+
"time"
6+
7+
"github.com/github/gh-ost/go/base"
8+
"github.com/openark/golib/tests"
9+
)
10+
11+
func TestServerRunCPUProfile(t *testing.T) {
12+
t.Parallel()
13+
14+
t.Run("failed already running", func(t *testing.T) {
15+
s := &Server{isCPUProfiling: 1}
16+
profile, err := s.runCPUProfile("15ms")
17+
tests.S(t).ExpectEquals(err, ErrCPUProfilingInProgress)
18+
tests.S(t).ExpectEquals(profile, nil)
19+
})
20+
21+
t.Run("failed bad duration", func(t *testing.T) {
22+
s := &Server{isCPUProfiling: 0}
23+
profile, err := s.runCPUProfile("should-fail")
24+
tests.S(t).ExpectNotNil(err)
25+
tests.S(t).ExpectEquals(profile, nil)
26+
})
27+
28+
t.Run("failed bad option", func(t *testing.T) {
29+
s := &Server{isCPUProfiling: 0}
30+
profile, err := s.runCPUProfile("10ms,badoption")
31+
tests.S(t).ExpectEquals(err, ErrCPUProfilingBadOption)
32+
tests.S(t).ExpectEquals(profile, nil)
33+
})
34+
35+
t.Run("success", func(t *testing.T) {
36+
s := &Server{
37+
isCPUProfiling: 0,
38+
migrationContext: base.NewMigrationContext(),
39+
}
40+
defaultCPUProfileDuration = time.Millisecond * 10
41+
profile, err := s.runCPUProfile("")
42+
tests.S(t).ExpectNil(err)
43+
tests.S(t).ExpectNotEquals(profile, nil)
44+
tests.S(t).ExpectEquals(s.isCPUProfiling, int64(0))
45+
})
46+
47+
t.Run("success with block", func(t *testing.T) {
48+
s := &Server{
49+
isCPUProfiling: 0,
50+
migrationContext: base.NewMigrationContext(),
51+
}
52+
profile, err := s.runCPUProfile("10ms,block")
53+
tests.S(t).ExpectNil(err)
54+
tests.S(t).ExpectNotEquals(profile, nil)
55+
tests.S(t).ExpectEquals(s.isCPUProfiling, int64(0))
56+
})
57+
58+
t.Run("success with block and gzip", func(t *testing.T) {
59+
s := &Server{
60+
isCPUProfiling: 0,
61+
migrationContext: base.NewMigrationContext(),
62+
}
63+
profile, err := s.runCPUProfile("10ms,block,gzip")
64+
tests.S(t).ExpectNil(err)
65+
tests.S(t).ExpectNotEquals(profile, nil)
66+
tests.S(t).ExpectEquals(s.isCPUProfiling, int64(0))
67+
})
68+
}

0 commit comments

Comments
 (0)