Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor git cat-file batch and with check implementation to make it easier to maintain and reduce goroutines #33934

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
46 changes: 0 additions & 46 deletions modules/git/batch.go

This file was deleted.

126 changes: 126 additions & 0 deletions modules/git/batch_cat_file.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"os"
"os/exec"
"strings"
"time"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/util"
)

type BatchCatFile struct {
cmd *exec.Cmd
startTime time.Time
stdin io.WriteCloser
stdout io.ReadCloser
cancel context.CancelFunc
finished process.FinishedFunc
}

// NewBatchCatFile opens git cat-file --batch or --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
// isCheck is true for --batch-check, false for --batch isCheck will only get metadata, --batch will get metadata and content
func NewBatchCatFile(ctx context.Context, repoPath string, isCheck bool) (*BatchCatFile, error) {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

callerInfo := util.CallerFuncName(1 /* util */ + 1 /* this */ + 1 /* parent */)
if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 {
callerInfo = callerInfo[pos+1:]
}

batchArg := util.Iif(isCheck, "--batch-check", "--batch")

a := make([]string, 0, 4)
a = append(a, debugQuote(GitExecutable))
if len(globalCommandArgs) > 0 {
a = append(a, "...global...")
}
a = append(a, "cat-file", batchArg)
cmdLogString := strings.Join(a, " ")

// these logs are for debugging purposes only, so no guarantee of correctness or stability
desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(repoPath), cmdLogString)
log.Debug("git.BatchCatFile: %s", desc)

ctx, cancel, finished := process.GetManager().AddContext(ctx, desc)

args := make([]string, 0, len(globalCommandArgs)+2)
for _, arg := range globalCommandArgs {
args = append(args, string(arg))
}
args = append(args, "cat-file", batchArg)
cmd := exec.CommandContext(ctx, GitExecutable, args...)
cmd.Env = append(os.Environ(), CommonGitCmdEnvs()...)
cmd.Dir = repoPath
process.SetSysProcAttribute(cmd)

stdin, err := cmd.StdinPipe()
if err != nil {
return nil, err
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}

if err := cmd.Start(); err != nil {
return nil, err
}

return &BatchCatFile{
cmd: cmd,
startTime: time.Now(),
stdin: stdin,
stdout: stdout,
cancel: cancel,
finished: finished,
}, nil
}

func (b *BatchCatFile) Input(refs ...string) error {
var buf bytes.Buffer
for _, ref := range refs {
if _, err := buf.WriteString(ref + "\n"); err != nil {
return err
}
}

_, err := b.stdin.Write(buf.Bytes())
if err != nil {
return err
}

return nil
}

func (b *BatchCatFile) Reader() *bufio.Reader {
return bufio.NewReader(b.stdout)
}

func (b *BatchCatFile) Escaped() time.Duration {
return time.Since(b.startTime)
}

func (b *BatchCatFile) Cancel() {
b.cancel()
}

func (b *BatchCatFile) Close() error {
b.finished()
_ = b.stdin.Close()
log.Debug("git.BatchCatFile: %v", b.Escaped())
return b.cmd.Wait()
}
149 changes: 149 additions & 0 deletions modules/git/batch_cat_file_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"context"
"io"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

func Test_GitBatchOperatorsNormal(t *testing.T) {
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
batch, err := NewBatchCatFile(t.Context(), bareRepo1Path, false)
assert.NoError(t, err)
assert.NotNil(t, batch)
defer batch.Close()

err = batch.Input("refs/heads/master")
assert.NoError(t, err)
rd := batch.Reader()
assert.NotNil(t, rd)

_, typ, size, err := ReadBatchLine(rd)
assert.NoError(t, err)
assert.Equal(t, "commit", typ)
assert.Equal(t, int64(1075), size)

// this step is very important, otherwise the next read will be wrong
s, err := rd.Discard(int(size))
assert.NoError(t, err)
assert.EqualValues(t, size, s)

err = batch.Input("ce064814f4a0d337b333e646ece456cd39fab612")
assert.NoError(t, err)
assert.NotNil(t, rd)

_, typ, size, err = ReadBatchLine(rd)
assert.NoError(t, err)
assert.Equal(t, "commit", typ)
assert.Equal(t, int64(1075), size)

s, err = rd.Discard(int(size))
assert.NoError(t, err)
assert.EqualValues(t, size, s)

kases := []struct {
refname string
size int64
}{
{"refs/heads/master", 1075},
{"feaf4ba6bc635fec442f46ddd4512416ec43c2c2", 1074},
{"37991dec2c8e592043f47155ce4808d4580f9123", 239},
}

var inputs []string
for _, kase := range kases {
inputs = append(inputs, kase.refname)
}

// input once for 3 refs
err = batch.Input(inputs...)
assert.NoError(t, err)
assert.NotNil(t, rd)

for i := 0; i < 3; i++ {
_, typ, size, err = ReadBatchLine(rd)
assert.NoError(t, err)
assert.Equal(t, "commit", typ)
assert.Equal(t, kases[i].size, size)

s, err := rd.Discard(int(size))
assert.NoError(t, err)
assert.EqualValues(t, size, s)
}

// input 3 times
for _, input := range inputs {
err = batch.Input(input)
assert.NoError(t, err)
assert.NotNil(t, rd)
}

for i := 0; i < 3; i++ {
_, typ, size, err = ReadBatchLine(rd)
assert.NoError(t, err)
assert.Equal(t, "commit", typ)
assert.Equal(t, kases[i].size, size)

s, err := rd.Discard(int(size))
assert.NoError(t, err)
assert.EqualValues(t, size, s)
}
}

func Test_GitBatchOperatorsCancel(t *testing.T) {
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
batch, err := NewBatchCatFile(t.Context(), bareRepo1Path, false)
assert.NoError(t, err)
assert.NotNil(t, batch)
defer batch.Close()

err = batch.Input("refs/heads/master")
assert.NoError(t, err)
rd := batch.Reader()
assert.NotNil(t, rd)

_, typ, size, err := ReadBatchLine(rd)
assert.NoError(t, err)
assert.Equal(t, "commit", typ)
assert.Equal(t, int64(1075), size)

go func() {
time.Sleep(time.Second)
batch.Cancel()
}()
// block here to wait cancel
_, err = io.ReadAll(rd)
assert.NoError(t, err)
}

func Test_GitBatchOperatorsTimeout(t *testing.T) {
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")

ctx, cancel := context.WithTimeout(t.Context(), 1*time.Second)
defer cancel()

batch, err := NewBatchCatFile(ctx, bareRepo1Path, false)
assert.NoError(t, err)
assert.NotNil(t, batch)
defer batch.Close()

err = batch.Input("refs/heads/master")
assert.NoError(t, err)
rd := batch.Reader()
assert.NotNil(t, rd)

_, typ, size, err := ReadBatchLine(rd)
assert.NoError(t, err)
assert.Equal(t, "commit", typ)
assert.Equal(t, int64(1075), size)
// block here until timeout
_, err = io.ReadAll(rd)
assert.NoError(t, err)
}
Loading