diff --git a/modules/git/batch.go b/modules/git/batch.go deleted file mode 100644 index 3ec4f1ddccf84..0000000000000 --- a/modules/git/batch.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2024 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package git - -import ( - "bufio" - "context" -) - -type Batch struct { - cancel context.CancelFunc - Reader *bufio.Reader - Writer WriteCloserError -} - -func (repo *Repository) NewBatch(ctx context.Context) (*Batch, error) { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := ensureValidGitRepository(ctx, repo.Path); err != nil { - return nil, err - } - - var batch Batch - batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repo.Path) - return &batch, nil -} - -func (repo *Repository) NewBatchCheck(ctx context.Context) (*Batch, error) { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := ensureValidGitRepository(ctx, repo.Path); err != nil { - return nil, err - } - - var check Batch - check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repo.Path) - return &check, nil -} - -func (b *Batch) Close() { - if b.cancel != nil { - b.cancel() - b.Reader = nil - b.Writer = nil - b.cancel = nil - } -} diff --git a/modules/git/batch_cat_file.go b/modules/git/batch_cat_file.go new file mode 100644 index 0000000000000..c701cbb75244c --- /dev/null +++ b/modules/git/batch_cat_file.go @@ -0,0 +1,126 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "strings" + "time" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/process" + "code.gitea.io/gitea/modules/util" +) + +type BatchCatFile struct { + cmd *exec.Cmd + startTime time.Time + stdin io.WriteCloser + stdout io.ReadCloser + cancel context.CancelFunc + finished process.FinishedFunc +} + +// NewBatchCatFile opens git cat-file --batch or --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function +// isCheck is true for --batch-check, false for --batch isCheck will only get metadata, --batch will get metadata and content +func NewBatchCatFile(ctx context.Context, repoPath string, isCheck bool) (*BatchCatFile, error) { + // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! + if err := ensureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + + callerInfo := util.CallerFuncName(1 /* util */ + 1 /* this */ + 1 /* parent */) + if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 { + callerInfo = callerInfo[pos+1:] + } + + batchArg := util.Iif(isCheck, "--batch-check", "--batch") + + a := make([]string, 0, 4) + a = append(a, debugQuote(GitExecutable)) + if len(globalCommandArgs) > 0 { + a = append(a, "...global...") + } + a = append(a, "cat-file", batchArg) + cmdLogString := strings.Join(a, " ") + + // these logs are for debugging purposes only, so no guarantee of correctness or stability + desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(repoPath), cmdLogString) + log.Debug("git.BatchCatFile: %s", desc) + + ctx, cancel, finished := process.GetManager().AddContext(ctx, desc) + + args := make([]string, 0, len(globalCommandArgs)+2) + for _, arg := range globalCommandArgs { + args = append(args, string(arg)) + } + args = append(args, "cat-file", batchArg) + cmd := exec.CommandContext(ctx, GitExecutable, args...) + cmd.Env = append(os.Environ(), CommonGitCmdEnvs()...) + cmd.Dir = repoPath + process.SetSysProcAttribute(cmd) + + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, err + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + if err := cmd.Start(); err != nil { + return nil, err + } + + return &BatchCatFile{ + cmd: cmd, + startTime: time.Now(), + stdin: stdin, + stdout: stdout, + cancel: cancel, + finished: finished, + }, nil +} + +func (b *BatchCatFile) Input(refs ...string) error { + var buf bytes.Buffer + for _, ref := range refs { + if _, err := buf.WriteString(ref + "\n"); err != nil { + return err + } + } + + _, err := b.stdin.Write(buf.Bytes()) + if err != nil { + return err + } + + return nil +} + +func (b *BatchCatFile) Reader() *bufio.Reader { + return bufio.NewReader(b.stdout) +} + +func (b *BatchCatFile) Escaped() time.Duration { + return time.Since(b.startTime) +} + +func (b *BatchCatFile) Cancel() { + b.cancel() +} + +func (b *BatchCatFile) Close() error { + b.finished() + _ = b.stdin.Close() + log.Debug("git.BatchCatFile: %v", b.Escaped()) + return b.cmd.Wait() +} diff --git a/modules/git/batch_cat_file_test.go b/modules/git/batch_cat_file_test.go new file mode 100644 index 0000000000000..f1734e9d56853 --- /dev/null +++ b/modules/git/batch_cat_file_test.go @@ -0,0 +1,149 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + "io" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func Test_GitBatchOperatorsNormal(t *testing.T) { + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + batch, err := NewBatchCatFile(t.Context(), bareRepo1Path, false) + assert.NoError(t, err) + assert.NotNil(t, batch) + defer batch.Close() + + err = batch.Input("refs/heads/master") + assert.NoError(t, err) + rd := batch.Reader() + assert.NotNil(t, rd) + + _, typ, size, err := ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + + // this step is very important, otherwise the next read will be wrong + s, err := rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + + err = batch.Input("ce064814f4a0d337b333e646ece456cd39fab612") + assert.NoError(t, err) + assert.NotNil(t, rd) + + _, typ, size, err = ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + + s, err = rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + + kases := []struct { + refname string + size int64 + }{ + {"refs/heads/master", 1075}, + {"feaf4ba6bc635fec442f46ddd4512416ec43c2c2", 1074}, + {"37991dec2c8e592043f47155ce4808d4580f9123", 239}, + } + + var inputs []string + for _, kase := range kases { + inputs = append(inputs, kase.refname) + } + + // input once for 3 refs + err = batch.Input(inputs...) + assert.NoError(t, err) + assert.NotNil(t, rd) + + for i := 0; i < 3; i++ { + _, typ, size, err = ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, kases[i].size, size) + + s, err := rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + } + + // input 3 times + for _, input := range inputs { + err = batch.Input(input) + assert.NoError(t, err) + assert.NotNil(t, rd) + } + + for i := 0; i < 3; i++ { + _, typ, size, err = ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, kases[i].size, size) + + s, err := rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + } +} + +func Test_GitBatchOperatorsCancel(t *testing.T) { + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + batch, err := NewBatchCatFile(t.Context(), bareRepo1Path, false) + assert.NoError(t, err) + assert.NotNil(t, batch) + defer batch.Close() + + err = batch.Input("refs/heads/master") + assert.NoError(t, err) + rd := batch.Reader() + assert.NotNil(t, rd) + + _, typ, size, err := ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + + go func() { + time.Sleep(time.Second) + batch.Cancel() + }() + // block here to wait cancel + _, err = io.ReadAll(rd) + assert.NoError(t, err) +} + +func Test_GitBatchOperatorsTimeout(t *testing.T) { + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + + ctx, cancel := context.WithTimeout(t.Context(), 1*time.Second) + defer cancel() + + batch, err := NewBatchCatFile(ctx, bareRepo1Path, false) + assert.NoError(t, err) + assert.NotNil(t, batch) + defer batch.Close() + + err = batch.Input("refs/heads/master") + assert.NoError(t, err) + rd := batch.Reader() + assert.NotNil(t, rd) + + _, typ, size, err := ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + // block here until timeout + _, err = io.ReadAll(rd) + assert.NoError(t, err) +} diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 7bbab76bb821c..3fb10d9cf09a7 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -13,9 +13,6 @@ import ( "strings" "code.gitea.io/gitea/modules/log" - - "github.com/djherbis/buffer" - "github.com/djherbis/nio/v3" ) // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function @@ -40,100 +37,6 @@ func ensureValidGitRepository(ctx context.Context, repoPath string) error { return nil } -// catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { - batchStdinReader, batchStdinWriter := io.Pipe() - batchStdoutReader, batchStdoutWriter := io.Pipe() - ctx, ctxCancel := context.WithCancel(ctx) - closed := make(chan struct{}) - cancel := func() { - ctxCancel() - _ = batchStdoutReader.Close() - _ = batchStdinWriter.Close() - <-closed - } - - // Ensure cancel is called as soon as the provided context is cancelled - go func() { - <-ctx.Done() - cancel() - }() - - go func() { - stderr := strings.Builder{} - err := NewCommand("cat-file", "--batch-check"). - Run(ctx, &RunOpts{ - Dir: repoPath, - Stdin: batchStdinReader, - Stdout: batchStdoutWriter, - Stderr: &stderr, - - UseContextTimeout: true, - }) - if err != nil { - _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) - _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) - } else { - _ = batchStdoutWriter.Close() - _ = batchStdinReader.Close() - } - close(closed) - }() - - // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check - batchReader := bufio.NewReader(batchStdoutReader) - - return batchStdinWriter, batchReader, cancel -} - -// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { - // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. - // so let's create a batch stdin and stdout - batchStdinReader, batchStdinWriter := io.Pipe() - batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024)) - ctx, ctxCancel := context.WithCancel(ctx) - closed := make(chan struct{}) - cancel := func() { - ctxCancel() - _ = batchStdinWriter.Close() - _ = batchStdoutReader.Close() - <-closed - } - - // Ensure cancel is called as soon as the provided context is cancelled - go func() { - <-ctx.Done() - cancel() - }() - - go func() { - stderr := strings.Builder{} - err := NewCommand("cat-file", "--batch"). - Run(ctx, &RunOpts{ - Dir: repoPath, - Stdin: batchStdinReader, - Stdout: batchStdoutWriter, - Stderr: &stderr, - - UseContextTimeout: true, - }) - if err != nil { - _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) - _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) - } else { - _ = batchStdoutWriter.Close() - _ = batchStdinReader.Close() - } - close(closed) - }() - - // For simplicities sake we'll us a buffered reader to read from the cat-file --batch - batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) - - return batchStdinWriter, batchReader, cancel -} - // ReadBatchLine reads the header line from cat-file --batch // We expect: SP SP LF // then leaving the rest of the stream " LF" to be read diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index af3ce376d6a46..f4200c1e5ddce 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -26,13 +26,14 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { return nil, err } - _, err = wr.Write([]byte(b.ID.String() + "\n")) - if err != nil { + rd := batch.Reader() + + if err = batch.Input(b.ID.String()); err != nil { cancel() return nil, err } @@ -67,18 +68,17 @@ func (b *Blob) Size() int64 { return b.size } - wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) + batch, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } defer cancel() - _, err = wr.Write([]byte(b.ID.String() + "\n")) - if err != nil { + if err = batch.Input(b.ID.String()); err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } - _, _, b.size, err = ReadBatchLine(rd) + _, _, b.size, err = ReadBatchLine(batch.Reader()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 diff --git a/modules/git/command.go b/modules/git/command.go index d85a91804aac8..db4032962a1c7 100644 --- a/modules/git/command.go +++ b/modules/git/command.go @@ -60,15 +60,16 @@ func logArgSanitize(arg string) string { return arg } +var debugQuote = func(s string) string { + if strings.ContainsAny(s, " `'\"\t\r\n") { + return fmt.Sprintf("%q", s) + } + return s +} + func (c *Command) LogString() string { // WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space), // It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms here. - debugQuote := func(s string) string { - if strings.ContainsAny(s, " `'\"\t\r\n") { - return fmt.Sprintf("%q", s) - } - return s - } a := make([]string, 0, len(c.args)+1) a = append(a, debugQuote(c.prog)) if c.globalArgsLength > 0 { diff --git a/modules/git/commit_info_nogogit.go b/modules/git/commit_info_nogogit.go index 7a6af0410bbc9..53dad5a08acf3 100644 --- a/modules/git/commit_info_nogogit.go +++ b/modules/git/commit_info_nogogit.go @@ -124,12 +124,14 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, return nil, err } - batchStdinWriter, batchReader, cancel, err := commit.repo.CatFileBatch(ctx) + batch, cancel, err := commit.repo.CatFileBatch(ctx) if err != nil { return nil, err } defer cancel() + batchReader := batch.Reader() + commitsMap := map[string]*Commit{} commitsMap[commit.ID.String()] = commit @@ -145,8 +147,7 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, continue } - _, err := batchStdinWriter.Write([]byte(commitID + "\n")) - if err != nil { + if err := batch.Input(commitID); err != nil { return nil, err } _, typ, size, err := ReadBatchLine(batchReader) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index c5eed737011a6..e5bc1074d269a 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -46,12 +46,13 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() + batchReader := batch.Reader() // We'll use a scanner for the revList because it's simpler than a bufio.Reader scan := bufio.NewScanner(revListReader) trees := [][]byte{} @@ -63,15 +64,10 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err for scan.Scan() { // Get the next commit ID - commitID := scan.Bytes() + commitID := scan.Text() // push the commit to the cat-file --batch process - _, err := batchStdinWriter.Write(commitID) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte{'\n'}) - if err != nil { + if err := batch.Input(commitID); err != nil { return nil, err } @@ -92,14 +88,13 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err if err != nil { return nil, err } - _, err = batchStdinWriter.Write([]byte(id + "\n")) - if err != nil { + if err = batch.Input(id); err != nil { return nil, err } continue case "commit": // Read in the commit to get its tree and in case this is one of the last used commits - curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, size)) if err != nil { return nil, err } @@ -107,7 +102,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } - if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { + if err := batch.Input(curCommit.Tree.ID.String()); err != nil { return nil, err } curPath = "" @@ -139,12 +134,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } if len(trees) > 0 { - _, err := batchStdinWriter.Write(trees[len(trees)-1]) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte("\n")) - if err != nil { + if err := batch.Input(string(trees[len(trees)-1])); err != nil { return nil, err } curPath = paths[len(paths)-1] diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index 477e3b8742976..a7d31b568d7e4 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -7,11 +7,10 @@ package git import ( - "bufio" "context" "path/filepath" - "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" ) @@ -26,10 +25,10 @@ type Repository struct { gpgSettings *GPGSettings batchInUse bool - batch *Batch + batch *BatchCatFile checkInUse bool - check *Batch + batchCheck *BatchCatFile Ctx context.Context LastCommitCache *LastCommitCache @@ -59,53 +58,59 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { } // CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { +func (repo *Repository) CatFileBatch(ctx context.Context) (*BatchCatFile, func(), error) { if repo.batch == nil { var err error - repo.batch, err = repo.NewBatch(ctx) + repo.batch, err = NewBatchCatFile(ctx, repo.Path, false) if err != nil { - return nil, nil, nil, err + return nil, nil, err } } if !repo.batchInUse { repo.batchInUse = true - return repo.batch.Writer, repo.batch.Reader, func() { + return repo.batch, func() { repo.batchInUse = false }, nil } - log.Debug("Opening temporary cat file batch for: %s", repo.Path) - tempBatch, err := repo.NewBatch(ctx) + if !setting.DisableTempCatFileBatchCheck { + setting.PanicInDevOrTesting("Opening temporary cat file batch for: %s", repo.Path) + } + tempBatch, err := NewBatchCatFile(ctx, repo.Path, false) if err != nil { - return nil, nil, nil, err + return nil, nil, err } - return tempBatch.Writer, tempBatch.Reader, tempBatch.Close, nil + return tempBatch, func() { + _ = tempBatch.Close() + }, nil } // CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { - if repo.check == nil { +func (repo *Repository) CatFileBatchCheck(ctx context.Context) (*BatchCatFile, func(), error) { + if repo.batchCheck == nil { var err error - repo.check, err = repo.NewBatchCheck(ctx) + repo.batchCheck, err = NewBatchCatFile(ctx, repo.Path, true) if err != nil { - return nil, nil, nil, err + return nil, nil, err } } if !repo.checkInUse { repo.checkInUse = true - return repo.check.Writer, repo.check.Reader, func() { + return repo.batchCheck, func() { repo.checkInUse = false }, nil } - log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := repo.NewBatchCheck(ctx) + setting.PanicInDevOrTesting("Opening temporary cat file batch with check for: %s", repo.Path) + tempBatch, err := NewBatchCatFile(ctx, repo.Path, true) if err != nil { - return nil, nil, nil, err + return nil, nil, err } - return tempBatchCheck.Writer, tempBatchCheck.Reader, tempBatchCheck.Close, nil + return tempBatch, func() { + _ = tempBatch.Close() + }, nil } func (repo *Repository) Close() error { @@ -117,9 +122,9 @@ func (repo *Repository) Close() error { repo.batch = nil repo.batchInUse = false } - if repo.check != nil { - repo.check.Close() - repo.check = nil + if repo.batchCheck != nil { + repo.batchCheck.Close() + repo.batchCheck = nil repo.checkInUse = false } repo.LastCommitCache = nil diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 0d11198523515..9e879939455af 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -22,18 +22,17 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = wr.Write([]byte(name + "\n")) - if err != nil { + if err = batch.Input(name); err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } - sha, _, _, err := ReadBatchLine(rd) + sha, _, _, err := ReadBatchLine(batch.Reader()) return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) } @@ -43,18 +42,17 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = wr.Write([]byte(name + "\n")) - if err != nil { + if err = batch.Input(name); err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } - _, _, _, err = ReadBatchLine(rd) + _, _, _, err = ReadBatchLine(batch.Reader()) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index 5aa0e9ec0457d..eacad401653f3 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -6,7 +6,6 @@ package git import ( - "bufio" "errors" "io" "strings" @@ -33,16 +32,15 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = wr.Write([]byte(name + "\n")) - if err != nil { + if err = batch.Input(name); err != nil { return "", err } - shaBs, _, _, err := ReadBatchLine(rd) + shaBs, _, _, err := ReadBatchLine(batch.Reader()) if IsErrNotExist(err) { return "", ErrNotExist{name, ""} } @@ -73,18 +71,21 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(id.String() + "\n")) + if err = batch.Input(id.String()); err != nil { + return nil, err + } - return repo.getCommitFromBatchReader(rd, id) + return repo.getCommitFromBatchReader(batch, id) } -func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id ObjectID) (*Commit, error) { +func (repo *Repository) getCommitFromBatchReader(batch *BatchCatFile, id ObjectID) (*Commit, error) { + rd := batch.Reader() _, typ, size, err := ReadBatchLine(rd) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { @@ -112,7 +113,11 @@ func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id ObjectID) return nil, err } - commit, err := tag.Commit(repo) + if err = batch.Input(tag.Object.String()); err != nil { + return nil, err + } + + commit, err := repo.getCommitFromBatchReader(batch, tag.Object) if err != nil { return nil, err } @@ -153,16 +158,15 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, err = wr.Write([]byte(commitID + "\n")) - if err != nil { + if err = batch.Input(commitID); err != nil { return nil, err } - sha, _, _, err := ReadBatchLine(rd) + sha, _, _, err := ReadBatchLine(batch.Reader()) if err != nil { if IsErrNotExist(err) { return nil, ErrNotExist{commitID, ""} diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index de7707bd6cd8b..df8c4452297d6 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -20,20 +20,16 @@ import ( func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - writeID := func(id string) error { - _, err := batchStdinWriter.Write([]byte(id + "\n")) - return err - } - - if err := writeID(commitID); err != nil { + if err := batch.Input(commitID); err != nil { return nil, err } + batchReader := batch.Reader() shaBytes, typ, size, err := ReadBatchLine(batchReader) if typ != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) @@ -146,7 +142,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - if err := writeID(f.ID.String()); err != nil { + if err := batch.Input(f.ID.String()); err != nil { return nil, err } _, _, size, err := ReadBatchLine(batchReader) diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index e0a3104249ebc..9fb44c47b9ee6 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -31,18 +31,20 @@ func (repo *Repository) GetTags(skip, limit int) (tags []string, err error) { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = wr.Write([]byte(id.String() + "\n")) - if err != nil { + if err = batch.Input(id.String()); err != nil { return "", err } - _, typ, _, err := ReadBatchLine(rd) - if IsErrNotExist(err) { - return "", ErrNotExist{ID: id.String()} + _, typ, _, err := ReadBatchLine(batch.Reader()) + if err != nil { + if IsErrNotExist(err) { + return "", ErrNotExist{ID: id.String()} + } + return "", err } return typ, nil } @@ -92,15 +94,16 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } // The tag is an annotated tag with a message. - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - if _, err := wr.Write([]byte(tagID.String() + "\n")); err != nil { + if err := batch.Input(tagID.String()); err != nil { return nil, err } + rd := batch.Reader() _, typ, size, err := ReadBatchLine(rd) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index d74769ccb2122..e40f777a460a7 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -10,13 +10,17 @@ import ( ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(id.String() + "\n")) + if err = batch.Input(id.String()); err != nil { + return nil, err + } + + rd := batch.Reader() // ignore the SHA _, typ, size, err := ReadBatchLine(rd) diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 81fb638d56fbe..6c0edd36ce855 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -36,18 +36,17 @@ func (te *TreeEntry) Size() int64 { return te.size } - wr, rd, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + batch, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } defer cancel() - _, err = wr.Write([]byte(te.ID.String() + "\n")) - if err != nil { + if err = batch.Input(te.ID.String()); err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } - _, _, te.size, err = ReadBatchLine(rd) + _, _, te.size, err = ReadBatchLine(batch.Reader()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index f88788418e27d..ebdd01b7ca837 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -33,13 +33,16 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - wr, rd, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) + batch, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(t.ID.String() + "\n")) + if err = batch.Input(t.ID.String()); err != nil { + return nil, err + } + rd := batch.Reader() _, typ, sz, err := ReadBatchLine(rd) if err != nil { return nil, err @@ -49,7 +52,10 @@ func (t *Tree) ListEntries() (Entries, error) { if err != nil && err != io.EOF { return nil, err } - _, _ = wr.Write([]byte(treeID + "\n")) + if err = batch.Input(treeID); err != nil { + return nil, err + } + _, typ, sz, err = ReadBatchLine(rd) if err != nil { return nil, err diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index e1a381f992c82..432ae10f48632 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -4,7 +4,6 @@ package bleve import ( - "bufio" "context" "fmt" "io" @@ -16,7 +15,6 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer" path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path" "code.gitea.io/gitea/modules/indexer/code/internal" @@ -151,7 +149,7 @@ func NewIndexer(indexDir string) *Indexer { } } -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, +func (b *Indexer) addUpdate(ctx context.Context, batchCatFile *git.BatchCatFile, commitSha string, update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search @@ -177,10 +175,12 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return b.addDelete(update.Filename, repo, batch) } - if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + if err := batchCatFile.Input(update.BlobSha); err != nil { return err } + batchReader := batchCatFile.Reader() + _, _, size, err = git.ReadBatchLine(batchReader) if err != nil { return err @@ -217,19 +217,14 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - gitBatch, err := r.NewBatch(ctx) + gitBatch, err := git.NewBatchCatFile(ctx, repo.RepoPath(), false) if err != nil { return err } - defer gitBatch.Close() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, gitBatch.Writer, gitBatch.Reader, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, gitBatch, sha, update, repo, batch); err != nil { + gitBatch.Close() return err } } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index be8efad5fd89d..da369b5413af4 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -4,7 +4,6 @@ package elasticsearch import ( - "bufio" "context" "fmt" "io" @@ -15,7 +14,6 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" @@ -138,7 +136,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, batch *git.BatchCatFile, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -161,9 +159,10 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + if err := batch.Input(update.BlobSha); err != nil { return nil, err } + batchReader := batch.Reader() _, _, size, err = git.ReadBatchLine(batchReader) if err != nil { @@ -209,19 +208,14 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - batch, err := r.NewBatch(ctx) + batch, err := git.NewBatchCatFile(ctx, repo.RepoPath(), false) if err != nil { return err } defer batch.Close() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, batch, sha, update, repo) if err != nil { return err } diff --git a/modules/setting/git.go b/modules/setting/git.go index 48a4e7f30deb5..12ce9903def06 100644 --- a/modules/setting/git.go +++ b/modules/setting/git.go @@ -11,6 +11,9 @@ import ( "code.gitea.io/gitea/modules/log" ) +// DisableTempCatFileBatchCheck disables the temporary cat-file batch check +var DisableTempCatFileBatchCheck = false + // Git settings var Git = struct { Path string diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 4ce7a8e3a4480..3defd310e674e 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -33,6 +33,15 @@ import ( func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["IsViewFile"] = true ctx.Data["HideRepoInfo"] = true + commit, err := ctx.Repo.Commit.GetCommitByPath(ctx.Repo.TreePath) + if err != nil { + ctx.ServerError("GetCommitByPath", err) + return + } + if !loadLatestCommitData(ctx, commit) { + return + } + blob := entry.Blob() buf, dataRc, fInfo, err := getFileReader(ctx, ctx.Repo.Repository.ID, blob) if err != nil { @@ -46,16 +55,6 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["FileName"] = blob.Name() ctx.Data["RawFileLink"] = ctx.Repo.RepoLink + "/raw/" + ctx.Repo.RefTypeNameSubURL() + "/" + util.PathEscapeSegments(ctx.Repo.TreePath) - commit, err := ctx.Repo.Commit.GetCommitByPath(ctx.Repo.TreePath) - if err != nil { - ctx.ServerError("GetCommitByPath", err) - return - } - - if !loadLatestCommitData(ctx, commit) { - return - } - if ctx.Repo.TreePath == ".editorconfig" { _, editorconfigWarning, editorconfigErr := ctx.Repo.GetEditorconfig(ctx.Repo.Commit) if editorconfigWarning != nil { diff --git a/tests/integration/git_misc_test.go b/tests/integration/git_misc_test.go index bf9e2c02ab38b..f84ee43991c39 100644 --- a/tests/integration/git_misc_test.go +++ b/tests/integration/git_misc_test.go @@ -18,12 +18,18 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" files_service "code.gitea.io/gitea/services/repository/files" "github.com/stretchr/testify/assert" ) func TestDataAsyncDoubleRead_Issue29101(t *testing.T) { + // in this test, we will have two parallel readers reading the same blob + // So we need to ignore the temporary CatFileBranch checking to make the test pass + defer test.MockVariableValue(&setting.DisableTempCatFileBatchCheck, true)() + onGiteaRun(t, func(t *testing.T, u *url.URL) { user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1})