Store Commit.Hash by pointer (kept in a pool of hashes)

This in itself is not an improvement, because hashes are unique (they are shared
between real commits and rebase todos, but there are so few of those that it
doesn't matter). However, it becomes an improvement once we also store parent
hashes in the same pool; but the real motivation for this change is to also
reuse the hash pointers in Pipe objects later in the branch. This will be a big
win because in a merge-heavy git repo there are many more Pipe instances than
commits.
This commit is contained in:
Stefan Haller 2025-04-19 16:29:36 +02:00
parent 1037371a44
commit e27bc15bbd
15 changed files with 119 additions and 74 deletions

View file

@ -66,6 +66,7 @@ type GetCommitsOptions struct {
// If non-empty, show divergence from this ref (left-right log)
RefToShowDivergenceFrom string
MainBranches *MainBranches
HashPool *utils.StringPool
}
// GetCommits obtains the commits of the current branch
@ -74,7 +75,7 @@ func (self *CommitLoader) GetCommits(opts GetCommitsOptions) ([]*models.Commit,
if opts.IncludeRebaseCommits && opts.FilterPath == "" {
var err error
commits, err = self.MergeRebasingCommits(commits)
commits, err = self.MergeRebasingCommits(opts.HashPool, commits)
if err != nil {
return nil, err
}
@ -89,7 +90,7 @@ func (self *CommitLoader) GetCommits(opts GetCommitsOptions) ([]*models.Commit,
defer wg.Done()
logErr = self.getLogCmd(opts).RunAndProcessLines(func(line string) (bool, error) {
commit := self.extractCommitFromLine(line, opts.RefToShowDivergenceFrom != "")
commit := self.extractCommitFromLine(opts.HashPool, line, opts.RefToShowDivergenceFrom != "")
commits = append(commits, commit)
return false, nil
})
@ -159,7 +160,7 @@ func (self *CommitLoader) GetCommits(opts GetCommitsOptions) ([]*models.Commit,
return commits, nil
}
func (self *CommitLoader) MergeRebasingCommits(commits []*models.Commit) ([]*models.Commit, error) {
func (self *CommitLoader) MergeRebasingCommits(hashPool *utils.StringPool, commits []*models.Commit) ([]*models.Commit, error) {
// chances are we have as many commits as last time so we'll set the capacity to be the old length
result := make([]*models.Commit, 0, len(commits))
for i, commit := range commits {
@ -172,7 +173,7 @@ func (self *CommitLoader) MergeRebasingCommits(commits []*models.Commit) ([]*mod
workingTreeState := self.getWorkingTreeState()
addConflictedRebasingCommit := true
if workingTreeState.CherryPicking || workingTreeState.Reverting {
sequencerCommits, err := self.getHydratedSequencerCommits(workingTreeState)
sequencerCommits, err := self.getHydratedSequencerCommits(hashPool, workingTreeState)
if err != nil {
return nil, err
}
@ -181,7 +182,7 @@ func (self *CommitLoader) MergeRebasingCommits(commits []*models.Commit) ([]*mod
}
if workingTreeState.Rebasing {
rebasingCommits, err := self.getHydratedRebasingCommits(addConflictedRebasingCommit)
rebasingCommits, err := self.getHydratedRebasingCommits(hashPool, addConflictedRebasingCommit)
if err != nil {
return nil, err
}
@ -196,7 +197,7 @@ func (self *CommitLoader) MergeRebasingCommits(commits []*models.Commit) ([]*mod
// then puts them into a commit object
// example input:
// 8ad01fe32fcc20f07bc6693f87aa4977c327f1e1|10 hours ago|Jesse Duffield| (HEAD -> master, tag: v0.15.2)|refresh commits when adding a tag
func (self *CommitLoader) extractCommitFromLine(line string, showDivergence bool) *models.Commit {
func (self *CommitLoader) extractCommitFromLine(hashPool *utils.StringPool, line string, showDivergence bool) *models.Commit {
split := strings.SplitN(line, "\x00", 8)
hash := split[0]
@ -234,7 +235,7 @@ func (self *CommitLoader) extractCommitFromLine(line string, showDivergence bool
parents = strings.Split(parentHashes, " ")
}
return models.NewCommit(models.NewCommitOpts{
return models.NewCommit(hashPool, models.NewCommitOpts{
Hash: hash,
Name: message,
Tags: tags,
@ -247,13 +248,13 @@ func (self *CommitLoader) extractCommitFromLine(line string, showDivergence bool
})
}
func (self *CommitLoader) getHydratedRebasingCommits(addConflictingCommit bool) ([]*models.Commit, error) {
func (self *CommitLoader) getHydratedRebasingCommits(hashPool *utils.StringPool, addConflictingCommit bool) ([]*models.Commit, error) {
todoFileHasShortHashes := self.version.IsOlderThan(2, 25, 2)
return self.getHydratedTodoCommits(self.getRebasingCommits(addConflictingCommit), todoFileHasShortHashes)
return self.getHydratedTodoCommits(hashPool, self.getRebasingCommits(hashPool, addConflictingCommit), todoFileHasShortHashes)
}
func (self *CommitLoader) getHydratedSequencerCommits(workingTreeState models.WorkingTreeState) ([]*models.Commit, error) {
commits := self.getSequencerCommits()
func (self *CommitLoader) getHydratedSequencerCommits(hashPool *utils.StringPool, workingTreeState models.WorkingTreeState) ([]*models.Commit, error) {
commits := self.getSequencerCommits(hashPool)
if len(commits) > 0 {
// If we have any commits in .git/sequencer/todo, then the last one of
// those is the conflicting one.
@ -262,16 +263,16 @@ func (self *CommitLoader) getHydratedSequencerCommits(workingTreeState models.Wo
// For single-commit cherry-picks and reverts, git apparently doesn't
// use the sequencer; in that case, CHERRY_PICK_HEAD or REVERT_HEAD is
// our conflicting commit, so synthesize it here.
conflicedCommit := self.getConflictedSequencerCommit(workingTreeState)
conflicedCommit := self.getConflictedSequencerCommit(hashPool, workingTreeState)
if conflicedCommit != nil {
commits = append(commits, conflicedCommit)
}
}
return self.getHydratedTodoCommits(commits, true)
return self.getHydratedTodoCommits(hashPool, commits, true)
}
func (self *CommitLoader) getHydratedTodoCommits(todoCommits []*models.Commit, todoFileHasShortHashes bool) ([]*models.Commit, error) {
func (self *CommitLoader) getHydratedTodoCommits(hashPool *utils.StringPool, todoCommits []*models.Commit, todoFileHasShortHashes bool) ([]*models.Commit, error) {
if len(todoCommits) == 0 {
return nil, nil
}
@ -292,7 +293,7 @@ func (self *CommitLoader) getHydratedTodoCommits(todoCommits []*models.Commit, t
fullCommits := map[string]*models.Commit{}
err := cmdObj.RunAndProcessLines(func(line string) (bool, error) {
commit := self.extractCommitFromLine(line, false)
commit := self.extractCommitFromLine(hashPool, line, false)
fullCommits[commit.Hash()] = commit
return false, nil
})
@ -331,7 +332,7 @@ func (self *CommitLoader) getHydratedTodoCommits(todoCommits []*models.Commit, t
// git-rebase-todo example:
// pick ac446ae94ee560bdb8d1d057278657b251aaef17 ac446ae
// pick afb893148791a2fbd8091aeb81deba4930c73031 afb8931
func (self *CommitLoader) getRebasingCommits(addConflictingCommit bool) []*models.Commit {
func (self *CommitLoader) getRebasingCommits(hashPool *utils.StringPool, addConflictingCommit bool) []*models.Commit {
bytesContent, err := self.readFile(filepath.Join(self.repoPaths.WorktreeGitDirPath(), "rebase-merge/git-rebase-todo"))
if err != nil {
self.Log.Error(fmt.Sprintf("error occurred reading git-rebase-todo: %s", err.Error()))
@ -350,7 +351,7 @@ func (self *CommitLoader) getRebasingCommits(addConflictingCommit bool) []*model
// See if the current commit couldn't be applied because it conflicted; if
// so, add a fake entry for it
if addConflictingCommit {
if conflictedCommit := self.getConflictedCommit(todos); conflictedCommit != nil {
if conflictedCommit := self.getConflictedCommit(hashPool, todos); conflictedCommit != nil {
commits = append(commits, conflictedCommit)
}
}
@ -364,7 +365,7 @@ func (self *CommitLoader) getRebasingCommits(addConflictingCommit bool) []*model
// Command does not have a commit associated, skip
continue
}
commits = utils.Prepend(commits, models.NewCommit(models.NewCommitOpts{
commits = utils.Prepend(commits, models.NewCommit(hashPool, models.NewCommitOpts{
Hash: t.Commit,
Name: t.Msg,
Status: models.StatusRebasing,
@ -375,7 +376,7 @@ func (self *CommitLoader) getRebasingCommits(addConflictingCommit bool) []*model
return commits
}
func (self *CommitLoader) getConflictedCommit(todos []todo.Todo) *models.Commit {
func (self *CommitLoader) getConflictedCommit(hashPool *utils.StringPool, todos []todo.Todo) *models.Commit {
bytesContent, err := self.readFile(filepath.Join(self.repoPaths.WorktreeGitDirPath(), "rebase-merge/done"))
if err != nil {
self.Log.Error(fmt.Sprintf("error occurred reading rebase-merge/done: %s", err.Error()))
@ -391,10 +392,10 @@ func (self *CommitLoader) getConflictedCommit(todos []todo.Todo) *models.Commit
amendFileExists, _ := self.os.FileExists(filepath.Join(self.repoPaths.WorktreeGitDirPath(), "rebase-merge/amend"))
messageFileExists, _ := self.os.FileExists(filepath.Join(self.repoPaths.WorktreeGitDirPath(), "rebase-merge/message"))
return self.getConflictedCommitImpl(todos, doneTodos, amendFileExists, messageFileExists)
return self.getConflictedCommitImpl(hashPool, todos, doneTodos, amendFileExists, messageFileExists)
}
func (self *CommitLoader) getConflictedCommitImpl(todos []todo.Todo, doneTodos []todo.Todo, amendFileExists bool, messageFileExists bool) *models.Commit {
func (self *CommitLoader) getConflictedCommitImpl(hashPool *utils.StringPool, todos []todo.Todo, doneTodos []todo.Todo, amendFileExists bool, messageFileExists bool) *models.Commit {
// Should never be possible, but just to be safe:
if len(doneTodos) == 0 {
self.Log.Error("no done entries in rebase-merge/done file")
@ -465,14 +466,14 @@ func (self *CommitLoader) getConflictedCommitImpl(todos []todo.Todo, doneTodos [
// Any other todo that has a commit associated with it must have failed with
// a conflict, otherwise we wouldn't have stopped the rebase:
return models.NewCommit(models.NewCommitOpts{
return models.NewCommit(hashPool, models.NewCommitOpts{
Hash: lastTodo.Commit,
Action: lastTodo.Command,
Status: models.StatusConflicted,
})
}
func (self *CommitLoader) getSequencerCommits() []*models.Commit {
func (self *CommitLoader) getSequencerCommits(hashPool *utils.StringPool) []*models.Commit {
bytesContent, err := self.readFile(filepath.Join(self.repoPaths.WorktreeGitDirPath(), "sequencer/todo"))
if err != nil {
self.Log.Error(fmt.Sprintf("error occurred reading sequencer/todo: %s", err.Error()))
@ -493,7 +494,7 @@ func (self *CommitLoader) getSequencerCommits() []*models.Commit {
// Command does not have a commit associated, skip
continue
}
commits = utils.Prepend(commits, models.NewCommit(models.NewCommitOpts{
commits = utils.Prepend(commits, models.NewCommit(hashPool, models.NewCommitOpts{
Hash: t.Commit,
Name: t.Msg,
Status: models.StatusCherryPickingOrReverting,
@ -504,7 +505,7 @@ func (self *CommitLoader) getSequencerCommits() []*models.Commit {
return commits
}
func (self *CommitLoader) getConflictedSequencerCommit(workingTreeState models.WorkingTreeState) *models.Commit {
func (self *CommitLoader) getConflictedSequencerCommit(hashPool *utils.StringPool, workingTreeState models.WorkingTreeState) *models.Commit {
var shaFile string
var action todo.TodoCommand
if workingTreeState.CherryPicking {
@ -526,7 +527,7 @@ func (self *CommitLoader) getConflictedSequencerCommit(workingTreeState models.W
if len(lines) == 0 {
return nil
}
return models.NewCommit(models.NewCommitOpts{
return models.NewCommit(hashPool, models.NewCommitOpts{
Hash: lines[0],
Status: models.StatusConflicted,
Action: action,