diff --git a/modules/git/commit_message.go b/modules/git/commit_message.go index 8fd3601f0d0..0729609d87e 100644 --- a/modules/git/commit_message.go +++ b/modules/git/commit_message.go @@ -70,9 +70,11 @@ func (c *CommitMessage) MessageTrailer() CommitMessageTrailerValues { var commitMessageTrailerSplit = sync.OnceValue(func() *regexp.Regexp { // the sep is either something like "\n---\n" or "\n\n" in the body, or at the start of the body like "---\n" - return regexp.MustCompile(`(?s)^(?P.*?)(?P^|^\n|^-{3,}\n|\n-{3,}\n|\n\n)(?P(?:[A-Za-z0-9][-A-Za-z0-9]*:[^\n]*\n?)*)$`) + return regexp.MustCompile(`(?s)^(?P.*?)(?P^|^\n|^-{3,}\n+|\n-{3,}\n+|\n\n)(?P(?:[A-Za-z0-9][-A-Za-z0-9]*:[^\n]*\n?)*\n*)$`) }) +// CommitMessageSplitTrailer tries to split the message by the trailer separator +// content + sep + trailer will reconstruct the original message func CommitMessageSplitTrailer(s string) (content, sep, trailer string) { s = util.NormalizeStringEOL(s) re := commitMessageTrailerSplit() diff --git a/modules/git/commit_message_test.go b/modules/git/commit_message_test.go index 049f1c03f78..1d13f818033 100644 --- a/modules/git/commit_message_test.go +++ b/modules/git/commit_message_test.go @@ -26,8 +26,10 @@ func TestCommitMessageTrailer(t *testing.T) { {"a", "a", "", ""}, {"a\n\nk", "a\n\nk", "", ""}, {"a\n\nk:v", "a", "\n\n", "k:v"}, + {"a\n\nk:v\n\n", "a", "\n\n", "k:v\n\n"}, {"a\n--\nk:v", "a\n--\nk:v", "", ""}, {"a\n---\nk:v", "a", "\n---\n", "k:v"}, + {"a\n\n---\n\nk:v", "a\n", "\n---\n\n", "k:v"}, {"k: v", "", "", "k: v"}, {"\nk:v", "", "\n", "k:v"}, diff --git a/services/pull/pull.go b/services/pull/pull.go index 4491406b33d..13f8ffa8df6 100644 --- a/services/pull/pull.go +++ b/services/pull/pull.go @@ -4,14 +4,15 @@ package pull import ( + "bytes" "context" "errors" "fmt" "io" - "regexp" "slices" "strings" "time" + "unicode" "unicode/utf8" "gitea.dev/models/db" @@ -767,8 +768,6 @@ func CloseRepoBranchesPulls(ctx context.Context, doer *user_model.User, repo *re return errors.Join(errs...) } -var commitMessageTrailersPattern = regexp.MustCompile(`(?:^|\n\n)(?:[\w-]+[ \t]*:[^\n]+\n*(?:[ \t]+[^\n]+\n*)*)+$`) - // GetSquashMergeCommitMessages returns the commit messages between head and merge base (if there is one) func GetSquashMergeCommitMessages(ctx context.Context, pr *issues_model.PullRequest) string { if err := pr.LoadIssue(ctx); err != nil { @@ -819,54 +818,44 @@ func GetSquashMergeCommitMessages(ctx context.Context, pr *issues_model.PullRequ return "" } - posterSig := pr.Issue.Poster.NewGitSig().String() + mergeMessage := strings.TrimSpace(pr.Issue.Content) // use PR's title and description as squash commit message + if setting.Repository.PullRequest.PopulateSquashCommentWithCommitMessages { + mergeMessage = formatSquashMergeCommitMessages(limitedCommits) // use PR's commit messages as squash commit message + } + coAuthors := collectSquashMergeCommitCoAuthors(ctx, gitRepo, pr, headCommitRef, mergeBaseRef, limit, limitedCommits) + return buildSquashMergeCommitMessages(mergeMessage, coAuthors) +} - uniqueAuthors := make(container.Set[string]) - authors := make([]string, 0, len(limitedCommits)) - stringBuilder := strings.Builder{} - - if !setting.Repository.PullRequest.PopulateSquashCommentWithCommitMessages { - // use PR's title and description as squash commit message - message := strings.TrimSpace(pr.Issue.Content) - stringBuilder.WriteString(message) - if stringBuilder.Len() > 0 { - stringBuilder.WriteRune('\n') - if !commitMessageTrailersPattern.MatchString(message) { - // TODO: this trailer check doesn't work with the separator line added below for the co-authors - stringBuilder.WriteRune('\n') - } - } - } else { - // use PR's commit messages as squash commit message - // commits list is in reverse chronological order - maxMsgSize := setting.Repository.PullRequest.DefaultMergeMessageSize - for _, commit := range slices.Backward(limitedCommits) { - msg := strings.TrimSpace(commit.MessageUTF8()) - if msg == "" { - continue - } - - // This format follows GitHub's squash commit message style, - // even if there are other "* " in the commit message body, they are written as-is. - // Maybe, ideally, we should indent those lines too. - _, _ = fmt.Fprintf(&stringBuilder, "* %s\n\n", msg) - if maxMsgSize > 0 && stringBuilder.Len() >= maxMsgSize { - tmp := stringBuilder.String() - wasValidUtf8 := utf8.ValidString(tmp) - tmp = tmp[:maxMsgSize] + "..." - if wasValidUtf8 { - // If the message was valid UTF-8 before truncation, ensure it remains valid after truncation - // For non-utf8 messages, we can't do much about it, end users should use utf-8 as much as possible - tmp = strings.ToValidUTF8(tmp, "") - } - stringBuilder.Reset() - stringBuilder.WriteString(tmp) - break - } - } +func buildSquashMergeCommitMessages(mergeMessage string, coAuthors []string) string { + if len(coAuthors) == 0 { + return mergeMessage } - // collect co-authors + msgContent, msgSep, msgTrailer := git.CommitMessageSplitTrailer(mergeMessage) + if (msgSep == "" || msgSep == "\n\n") && msgTrailer == "" { + msgContent = strings.TrimRightFunc(msgContent, unicode.IsSpace) + msgSep = "\n\n---------\n\n" + } + var sb strings.Builder + sb.WriteString(msgContent) + sb.WriteString(msgSep) + if msgTrailer = strings.TrimSpace(msgTrailer); msgTrailer != "" { + sb.WriteString(msgTrailer) + sb.WriteRune('\n') + } + for _, author := range coAuthors { + sb.WriteString(git.CoAuthoredByTrailer + ": ") + sb.WriteString(author) + sb.WriteRune('\n') + } + return sb.String() +} + +func collectSquashMergeCommitCoAuthors(ctx context.Context, gitRepo *git.Repository, pr *issues_model.PullRequest, headCommitRef, mergeBaseRef git.RefName, limitFirst int, limitedCommits []*git.Commit) []string { + posterSig := pr.Issue.Poster.NewGitSig().String() + uniqueAuthors := make(container.Set[string]) + authors := make([]string, 0, len(limitedCommits)) + for _, commit := range limitedCommits { authorString := commit.Author.String() if uniqueAuthors.Add(authorString) && authorString != posterSig { @@ -880,14 +869,14 @@ func GetSquashMergeCommitMessages(ctx context.Context, pr *issues_model.PullRequ } // collect the remaining authors - if limit >= 0 && setting.Repository.PullRequest.DefaultMergeMessageAllAuthors { - skip := limit - limit = 30 + if limitFirst >= 0 && setting.Repository.PullRequest.DefaultMergeMessageAllAuthors { + skip := limitFirst + batchLimit := 30 for { - commits, err := gitRepo.CommitsBetween(headCommitRef, mergeBaseRef, limit, skip) + commits, err := gitRepo.CommitsBetween(headCommitRef, mergeBaseRef, batchLimit, skip) if err != nil { log.Error("Unable to get commits between: %s %s Error: %v", pr.HeadBranch, pr.MergeBase, err) - return "" + return authors } if len(commits) == 0 { break @@ -901,22 +890,46 @@ func GetSquashMergeCommitMessages(ctx context.Context, pr *issues_model.PullRequ } } } - skip += limit + skip += batchLimit + } + } + return authors +} + +func formatSquashMergeCommitMessages(commits []*git.Commit) string { + maxMsgSize := setting.Repository.PullRequest.DefaultMergeMessageSize + sb := &bytes.Buffer{} + // commits list is in reverse chronological order + for _, commit := range slices.Backward(commits) { + msg := strings.TrimSpace(commit.MessageUTF8()) + if msg == "" { + continue + } + + // This format follows GitHub's squash commit message style, + // even if there are other "* " in the commit message body, they are written as-is. + // Maybe, ideally, we should indent those lines too. + _, _ = fmt.Fprintf(sb, "* %s\n\n", msg) + if maxMsgSize > 0 && sb.Len() >= maxMsgSize { + break } } - if stringBuilder.Len() > 0 && len(authors) > 0 { - // TODO: this separator line doesn't work with the trailer check (commitMessageTrailersPattern) above - stringBuilder.WriteString("---------\n\n") + buf := bytes.TrimSpace(sb.Bytes()) + if maxMsgSize > 0 && len(buf) > maxMsgSize { + buf = buf[:maxMsgSize] + for { + r, sz := utf8.DecodeLastRune(buf) + if r == utf8.RuneError && sz == 1 { + buf = buf[:len(buf)-1] + continue + } + break + } + buf = append(buf, '.', '.', '.') } - - for _, author := range authors { - stringBuilder.WriteString(git.CoAuthoredByTrailer + ": ") - stringBuilder.WriteString(author) - stringBuilder.WriteRune('\n') - } - - return stringBuilder.String() + buf = append(buf, '\n', '\n') + return util.UnsafeBytesToString(buf) } // GetIssuesAllCommitStatus returns a map of issue ID to a list of all statuses for the most recent commit as well as a map of issue ID to only the commit's latest status diff --git a/services/pull/pull_test.go b/services/pull/pull_test.go index 0648cd383d5..554c9ed5775 100644 --- a/services/pull/pull_test.go +++ b/services/pull/pull_test.go @@ -11,28 +11,33 @@ import ( repo_model "gitea.dev/models/repo" "gitea.dev/models/unit" "gitea.dev/models/unittest" + "gitea.dev/modules/git" "gitea.dev/modules/gitrepo" + "gitea.dev/modules/setting" + "gitea.dev/modules/test" "github.com/stretchr/testify/assert" ) // TODO TestPullRequest_PushToBaseRepo -func TestPullRequest_CommitMessageTrailersPattern(t *testing.T) { - // Not a valid trailer section - assert.False(t, commitMessageTrailersPattern.MatchString("")) - assert.False(t, commitMessageTrailersPattern.MatchString("No trailer.")) - assert.False(t, commitMessageTrailersPattern.MatchString("Signed-off-by: Bob \nNot a trailer due to following text.")) - assert.False(t, commitMessageTrailersPattern.MatchString("Message body not correctly separated from trailer section by empty line.\nSigned-off-by: Bob ")) - // Valid trailer section - assert.True(t, commitMessageTrailersPattern.MatchString("Signed-off-by: Bob ")) - assert.True(t, commitMessageTrailersPattern.MatchString("Signed-off-by: Bob \nOther-Trailer: Value")) - assert.True(t, commitMessageTrailersPattern.MatchString("Message body correctly separated from trailer section by empty line.\n\nSigned-off-by: Bob ")) - assert.True(t, commitMessageTrailersPattern.MatchString("Multiple trailers.\n\nSigned-off-by: Bob \nOther-Trailer: Value")) - assert.True(t, commitMessageTrailersPattern.MatchString("Newline after trailer section.\n\nSigned-off-by: Bob \n")) - assert.True(t, commitMessageTrailersPattern.MatchString("No space after colon is accepted.\n\nSigned-off-by:Bob ")) - assert.True(t, commitMessageTrailersPattern.MatchString("Additional whitespace is accepted.\n\nSigned-off-by \t : \tBob ")) - assert.True(t, commitMessageTrailersPattern.MatchString("Folded value.\n\nFolded-trailer: This is\n a folded\n trailer value\nOther-Trailer: Value")) +func TestPullRequest_FormatSquashMergeCommitMessages(t *testing.T) { + oldest := &git.Commit{CommitMessage: git.CommitMessage{MessageRaw: "commit msg 1"}} + newest := &git.Commit{CommitMessage: git.CommitMessage{MessageRaw: "commit msg 2\n\nCommit description."}} + + defer test.MockVariableValue(&setting.Repository.PullRequest.DefaultMergeMessageSize, 0)() + + assert.Equal(t, "* commit msg 1\n\n* commit msg 2\n\nCommit description.\n\n", formatSquashMergeCommitMessages([]*git.Commit{newest, oldest})) + + utf8Msg := &git.Commit{CommitMessage: git.CommitMessage{MessageRaw: "🌞"}} + setting.Repository.PullRequest.DefaultMergeMessageSize = 3 + assert.Equal(t, "* ...\n\n", formatSquashMergeCommitMessages([]*git.Commit{utf8Msg})) + setting.Repository.PullRequest.DefaultMergeMessageSize = 4 + assert.Equal(t, "* ...\n\n", formatSquashMergeCommitMessages([]*git.Commit{utf8Msg})) + setting.Repository.PullRequest.DefaultMergeMessageSize = 5 + assert.Equal(t, "* ...\n\n", formatSquashMergeCommitMessages([]*git.Commit{utf8Msg})) + setting.Repository.PullRequest.DefaultMergeMessageSize = 6 + assert.Equal(t, "* 🌞\n\n", formatSquashMergeCommitMessages([]*git.Commit{utf8Msg})) } func TestPullRequest_GetDefaultMergeMessage_InternalTracker(t *testing.T) { @@ -88,3 +93,27 @@ func TestPullRequest_GetDefaultMergeMessage_ExternalTracker(t *testing.T) { assert.Equal(t, "Merge pull request 'issue3' (#3) from user2/repo2:branch2 into master", mergeMessage) } + +func TestBuildSquashMergeCommitMessages(t *testing.T) { + cases := []struct { + msg string + coAuthors []string + expected string + }{ + {"title", nil, "title"}, + {"title", []string{"the-user"}, "title\n\n---------\n\nCo-authored-by: the-user\n"}, + {"title\n\n", []string{"the-user"}, "title\n\n---------\n\nCo-authored-by: the-user\n"}, + {"title\n\nKey: val", []string{"the-user"}, "title\n\nKey: val\nCo-authored-by: the-user\n"}, + {"title\n\n----\nKey: val", []string{"the-user"}, "title\n\n----\nKey: val\nCo-authored-by: the-user\n"}, + {"title\n\n----\nKey: val\n\n", []string{"the-user"}, "title\n\n----\nKey: val\nCo-authored-by: the-user\n"}, + + {"title\n\nbody", nil, "title\n\nbody"}, + {"title\n\nbody", []string{"the-user"}, "title\n\nbody\n\n---------\n\nCo-authored-by: the-user\n"}, + {"title\n\nbody\n\nKey: val", []string{"the-user"}, "title\n\nbody\n\nKey: val\nCo-authored-by: the-user\n"}, + {"title\n\nbody\n\n----\nKey: val", []string{"the-user"}, "title\n\nbody\n\n----\nKey: val\nCo-authored-by: the-user\n"}, + } + for _, c := range cases { + msg := buildSquashMergeCommitMessages(c.msg, c.coAuthors) + assert.Equal(t, c.expected, msg, "msg: %s", c.msg) + } +} diff --git a/tests/integration/pull_merge_test.go b/tests/integration/pull_merge_test.go index 8469057620e..ae1f8a34909 100644 --- a/tests/integration/pull_merge_test.go +++ b/tests/integration/pull_merge_test.go @@ -1272,7 +1272,7 @@ Commit description. commitMessage: `loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong message`, }, }, - expectedMessage: `* looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo...`, + expectedMessage: "* looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo...\n\n", }, { name: "Test Co-authored-by",