Multi Branch Commit Aggregation
Generate comprehensive commit history report by aggregating changes from multiple branches with contributor analysis and merge timeline.
Model Ranking
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
|---|---|---|---|---|---|---|---|---|
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
claude-opus-4-1 | 0 /1 | - | - | 597.6s | 18.0 | 1,328,632 | 7,930 | 1,336,562 |
claude-opus-4-5-high | 0 /4 | 138.3s | 4.8 | 157,040 | 5,456 | 162,496 | ||
claude-sonnet-4 | 0 /4 | 214.3s | 15.8 | 1,097,531 | 4,745 | 1,102,276 | ||
claude-sonnet-4-5 | 0 /4 | 136.3s | 7.3 | 328,970 | 5,375 | 334,345 | ||
claude-sonnet-4-high | 0 /4 | 160.5s | 19.0 | 534,237 | 4,562 | 538,799 | ||
claude-sonnet-4-low | 0 /4 | 166.2s | 18.8 | 530,029 | 4,544 | 534,572 | ||
deepseek-chat | 0 /4 | 221.4s | 16.0 | 676,383 | 986 | 677,369 | ||
deepseek-v3-1-terminus | 0 /4 | 194.6s | 9.5 | 297,876 | 2,531 | 300,407 | ||
deepseek-v3-1-terminus-thinking | 0 /4 | 1176.3s | 7.3 | 255,239 | 28,276 | 283,515 | ||
deepseek-v3-2-chat | 0 /4 | 346.4s | 23.5 | 536,885 | 7,739 | 544,624 | ||
deepseek-v3-2-thinking | 0 /4 | 582.2s | 27.0 | 764,697 | 15,201 | 779,898 | ||
gemini-2-5-flash | 0 /4 | 604.3s | 13.0 | 3,596,942 | 34,546 | 3,631,488 | ||
gemini-2-5-pro | 0 /4 | 64.2s | 2.0 | 14,062 | 6,031 | 20,093 | ||
gemini-3-pro-high | 0 /4 | 272.1s | 10.3 | 364,563 | 16,281 | 380,844 | ||
gemini-3-pro-low | 0 /4 | 287.1s | 16.5 | 461,670 | 15,768 | 477,438 | ||
glm-4-5 | 0 /4 | 193.7s | 15.8 | 675,400 | 3,743 | 679,143 | ||
gpt-4-1 | 0 /4 | 84.0s | 10.5 | 308,486 | 3,007 | 311,493 | ||
gpt-4-1-mini | 0 /4 | 144.4s | 8.0 | 244,887 | 3,160 | 248,047 | ||
gpt-4-1-nano | 0 /4 | 57.3s | 12.0 | 436,797 | 1,859 | 438,656 | ||
gpt-5-high | 0 /4 | 1207.2s | 9.0 | 226,299 | 44,521 | 270,820 | ||
gpt-5-low | 0 /4 | 109.8s | 3.3 | 39,237 | 6,235 | 45,472 | ||
gpt-5-medium | 0 /4 | 501.0s | 7.8 | 197,555 | 24,419 | 221,974 | ||
gpt-5-mini-high | 0 /4 | 565.9s | 21.8 | 723,905 | 46,348 | 770,253 | ||
gpt-5-mini-low | 0 /4 | 73.0s | 9.0 | 450,010 | 3,008 | 453,018 | ||
gpt-5-mini-medium | 0 /4 | 183.8s | 17.0 | 477,171 | 15,784 | 492,955 | ||
gpt-5-nano-high | 0 /4 | 310.7s | 9.5 | 288,108 | 60,458 | 348,566 | ||
gpt-5-nano-low | 0 /4 | 56.5s | 6.3 | 103,786 | 4,514 | 108,300 | ||
gpt-5-nano-medium | 0 /4 | 200.0s | 12.8 | 336,061 | 34,315 | 370,376 | ||
gpt-oss-120b | 0 /4 | 11.1s | 2.5 | 24,624 | 525 | 25,150 | ||
grok-4 | 0 /4 | 295.4s | 4.5 | 553,226 | 997 | 558,768 | ||
grok-4-fast | 0 /4 | 88.4s | 7.3 | 319,443 | 8,773 | 328,216 | ||
grok-code-fast-1 | 0 /4 | 83.6s | 18.5 | 786,064 | 9,677 | 795,740 | ||
kimi-k2-0711 | 0 /4 | 278.3s | 7.0 | 425,368 | 2,307 | 427,675 | ||
kimi-k2-0905 | 0 /4 | 278.7s | 16.0 | 355,001 | 3,003 | 358,004 | ||
o3 | 0 /4 | 636.5s | 14.5 | 960,067 | 11,759 | 971,826 | ||
o4-mini | 0 /4 | 657.2s | 13.3 | 882,091 | 21,846 | 903,937 | ||
qwen-3-coder-plus | 0 /4 | 250.3s | 18.0 | 1,343,891 | 3,746 | 1,347,637 | ||
qwen-3-max | 0 /4 | 166.2s | 28.5 | 965,529 | 3,810 | 969,339 |
Task State
Instruction
I need you to create a comprehensive commit history report by aggregating changes from multiple branches. Here's what you need to do:
Step 1: Create Analysis Branch Create a new branch called 'history-report-2025' from the main branch.
Step 2: Generate Branch Commits Report
In the 'history-report-2025' branch, create a file called BRANCH_COMMITS.json that contains a JSON object with the following structure:
- For each of these branches: ['pr/45-googlefan256-main', 'pr/25-neuralsorcerer-patch-1', 'pr/41-amirhosseinghanipour-fix-race-conditions-and-offline-api']
- List the 3 most recent commits for each branch
- Each commit must include: SHA, GitHub username, commit message, and files changed count
- The JSON structure should be:
{
"pr/45-googlefan256-main": [
{
"sha": "commit_sha",
"author": "github_username",
"message": "commit message",
"files_changed": number
}
],
"pr/25-neuralsorcerer-patch-1": [...],
"pr/41-amirhosseinghanipour-fix-race-conditions-and-offline-api": [...]
}Step 3: Create Cross-Branch Analysis
Create a file CROSS_BRANCH_ANALYSIS.md that contains:
- A section "## Top Contributors" listing the 3 contributors with the most commits on the main branch, sorted by commit count (format: "github_username: X commits")
- Must include keywords: "contributors"
Step 4: Generate Merge Timeline
Create a file MERGE_TIMELINE.txt that lists the 10 most recent merge commits from the main branch:
- Format:
DATE | MERGE_COMMIT_MESSAGE | COMMIT_SHA - List in reverse chronological order (newest first)
- Only include actual merge commits (commits that have exactly 2 parent commits)
- Note: While the commit messages reference PR numbers, those PRs no longer exist in the repository
Verify
import sys
import os
import requests
from typing import Dict, Optional, Tuple
import base64
import json
from dotenv import load_dotenv
def _get_github_api(
endpoint: str, headers: Dict[str, str], org: str
) -> Tuple[bool, Optional[Dict]]:
"""Make a GET request to GitHub API and return (success, response)."""
url = f"https://api.github.com/repos/{org}/harmony/{endpoint}"
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return True, response.json()
elif response.status_code == 404:
return False, None
else:
print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
return False, None
except Exception as e:
print(f"Exception for {endpoint}: {e}", file=sys.stderr)
return False, None
def _check_branch_exists(branch_name: str, headers: Dict[str, str], org: str) -> bool:
"""Verify that a branch exists in the repository."""
success, _ = _get_github_api(f"branches/{branch_name}", headers, org)
return success
def _get_file_content(
branch: str, file_path: str, headers: Dict[str, str], org: str
) -> Optional[str]:
"""Get the content of a file from a specific branch."""
success, result = _get_github_api(f"contents/{file_path}?ref={branch}", headers, org)
if not success or not result:
return None
try:
content = base64.b64decode(result.get("content", "")).decode("utf-8")
return content
except Exception as e:
print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
return None
def _check_branch_commits_json(content: str) -> bool:
"""Verify BRANCH_COMMITS.json has correct structure and expected data."""
expected_data = {
"pr/45-googlefan256-main": [
{
"sha": "9fa3f54cf2a2501c7dcbf554d5fbdd0de619fdda",
"author": "googlefan256",
"message": "Update format.md",
"files_changed": 1,
},
{
"sha": "3efbf742533a375fc148d75513597e139329578b",
"author": "scott-oai",
"message": "Merge pull request #29 from axion66/improve-readme-and-checks",
"files_changed": 1,
},
{
"sha": "9d653a4c7382abc42d115014d195d9354e7ad357",
"author": "scott-oai",
"message": "Merge pull request #30 from Yuan-ManX/harmony-format",
"files_changed": 1,
},
],
"pr/25-neuralsorcerer-patch-1": [
{
"sha": "c505a03e9c9a388a511b6125756097eee523742a",
"author": "neuralsorcerer",
"message": "fix: `meta_sep` token and add to registry",
"files_changed": 1,
},
{
"sha": "c044bf33f7e835ca6a723ccc97848de25dba5164",
"author": "neuralsorcerer",
"message": "fix: `meta_sep` token in `encoding.rs`",
"files_changed": 1,
},
{
"sha": "b255cbeb6274adbea774f26fd9590922ce8874ed",
"author": "scott-oai",
"message": "Merge pull request #18 from openai/dev/scl/better-ci",
"files_changed": 6,
},
],
"pr/41-amirhosseinghanipour-fix-race-conditions-and-offline-api": [
{
"sha": "1dca6392934bf4e3c403b2ecc2104e8ff3f67f45",
"author": "amirhosseinghanipour",
"message": "fix race conditions and add offline tokenizer loading api",
"files_changed": 8,
},
{
"sha": "9528c7b4a00a3307fd9685fc1328aee11c3d9c90",
"author": "scott-oai",
"message": "version bump",
"files_changed": 2,
},
{
"sha": "82b3afb9eb043343f322c937262cc50405e892c3",
"author": "scott-oai",
"message": "Merge pull request #26 from jordan-wu-97/jordan/fix-function-call-atomic-bool",
"files_changed": 6,
},
],
}
try:
data = json.loads(content)
# Check if all required branches are present
for branch in expected_data.keys():
if branch not in data:
print(
f"Missing branch {branch} in BRANCH_COMMITS.json", file=sys.stderr
)
return False
# Verify the exact content matches expected data
for branch, expected_commits in expected_data.items():
actual_commits = data.get(branch, [])
if len(actual_commits) != 3:
print(
f"Branch {branch} should have exactly 3 commits, found {len(actual_commits)}",
file=sys.stderr,
)
return False
for i, expected_commit in enumerate(expected_commits):
if i >= len(actual_commits):
print(
f"Missing commit {i + 1} for branch {branch}", file=sys.stderr
)
return False
actual_commit = actual_commits[i]
for field in ["sha", "author", "files_changed"]:
if actual_commit.get(field) != expected_commit.get(field):
print(
f"Mismatch in {field} for commit {i + 1} in branch {branch}",
file=sys.stderr,
)
print(
f"Expected: {expected_commit.get(field)}, Got: {actual_commit.get(field)}",
file=sys.stderr,
)
return False
# For message field, use substring matching to be more flexible
expected_message = expected_commit.get("message", "")
actual_message = actual_commit.get("message", "")
if expected_message not in actual_message:
print(
f"Mismatch in message for commit {i + 1} in branch {branch}",
file=sys.stderr,
)
print(
f"Expected: {expected_message}, Got: {actual_message}",
file=sys.stderr,
)
return False
return True
except json.JSONDecodeError as e:
print(f"Invalid JSON in BRANCH_COMMITS.json: {e}", file=sys.stderr)
return False
except Exception as e:
print(f"Error checking BRANCH_COMMITS.json: {e}", file=sys.stderr)
return False
def _check_cross_branch_analysis(content: str) -> bool:
"""Verify CROSS_BRANCH_ANALYSIS.md contains required sections and data."""
# Check for required section header
if "## Top Contributors" not in content:
print(
"Missing section '## Top Contributors' in CROSS_BRANCH_ANALYSIS.md",
file=sys.stderr,
)
return False
# Check for required keyword
if "contributors" not in content.lower():
print(
"Missing keyword 'contributors' in CROSS_BRANCH_ANALYSIS.md",
file=sys.stderr,
)
return False
# Verify the top 3 contributors with correct counts from main branch (order matters)
expected_contributors = [
"scott-oai: 35 commits",
"egorsmkv: 4 commits",
"axion66: 2 commits",
]
for contributor in expected_contributors:
if contributor not in content:
print(
f"Missing or incorrect contributor entry: {contributor}",
file=sys.stderr,
)
return False
return True
def _check_merge_timeline(content: str) -> bool:
"""Verify MERGE_TIMELINE.txt has correct format and expected merge commits."""
expected_timeline = [
"2025-08-06 | Merge pull request #29 from axion66/improve-readme-and-checks | 3efbf742533a375fc148d75513597e139329578b",
"2025-08-06 | Merge pull request #30 from Yuan-ManX/harmony-format | 9d653a4c7382abc42d115014d195d9354e7ad357",
"2025-08-06 | Merge pull request #28 from dkqjrm/fix-typo-format-md | 161e5fe2a57c63e9f8353c4c5b8faa3c3854bb5f",
"2025-08-05 | Merge pull request #26 from jordan-wu-97/jordan/fix-function-call-atomic-bool | 82b3afb9eb043343f322c937262cc50405e892c3",
"2025-08-05 | Merge pull request #18 from openai/dev/scl/better-ci | b255cbeb6274adbea774f26fd9590922ce8874ed",
"2025-08-05 | Merge pull request #21 from Tialo/main | 058ef3257c24fb099aac7960c10ce51c8e55d9fe",
"2025-08-05 | Merge branch 'main' into dev/scl/better-ci | 6375a15ea1b0a486cbb1468964cf8f5800ff5a5c",
"2025-08-05 | Merge pull request #8 from RustedBytes/main | f6179119ca894eda4124c86d408c01fdbf5281f0",
"2025-08-05 | Merge branch 'main' into main | eb86106b6980790b94f5702dc510483c66027277",
"2025-08-05 | Merge pull request #17 from openai/dev/scl/add-docs-to-cargo | 64bca4cf327ebeafa0bbd0345650d86e2d02142f",
]
# Verify each expected timeline entry exists in the content
for i, expected_line in enumerate(expected_timeline):
if expected_line not in content:
print(f"Missing expected timeline entry {i + 1} in MERGE_TIMELINE.txt", file=sys.stderr)
print(f"Expected: {expected_line}", file=sys.stderr)
return False
return True
def verify_task() -> bool:
"""Verify the multi-branch commit aggregation task."""
# Get GitHub token from environment
load_dotenv(".mcp_env")
github_token = os.environ.get("MCP_GITHUB_TOKEN")
if not github_token:
print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
return False
# Get GitHub organization from environment
github_org = os.environ.get("GITHUB_EVAL_ORG")
if not github_org:
print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
return False
headers = {
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3+json",
}
# 1. Check if branch 'history-report-2025' exists
if not _check_branch_exists("history-report-2025", headers, github_org):
print("Branch 'history-report-2025' does not exist", file=sys.stderr)
return False
print("✓ Branch 'history-report-2025' exists")
# 2. Check BRANCH_COMMITS.json
content = _get_file_content("history-report-2025", "BRANCH_COMMITS.json", headers, github_org)
if not content:
print(
"File 'BRANCH_COMMITS.json' not found in 'history-report-2025' branch",
file=sys.stderr,
)
return False
if not _check_branch_commits_json(content):
return False
print("✓ BRANCH_COMMITS.json has correct structure and data")
# 3. Check CROSS_BRANCH_ANALYSIS.md
content = _get_file_content(
"history-report-2025", "CROSS_BRANCH_ANALYSIS.md", headers, github_org
)
if not content:
print(
"File 'CROSS_BRANCH_ANALYSIS.md' not found in 'history-report-2025' branch",
file=sys.stderr,
)
return False
if not _check_cross_branch_analysis(content):
return False
print("✓ CROSS_BRANCH_ANALYSIS.md contains required sections and data")
# 4. Check MERGE_TIMELINE.txt
content = _get_file_content("history-report-2025", "MERGE_TIMELINE.txt", headers, github_org)
if not content:
print(
"File 'MERGE_TIMELINE.txt' not found in 'history-report-2025' branch",
file=sys.stderr,
)
return False
if not _check_merge_timeline(content):
return False
print("✓ MERGE_TIMELINE.txt has correct format and data")
print("\nAll verification checks passed! ✅")
return True
if __name__ == "__main__":
success = verify_task()
sys.exit(0 if success else 1)