Claude Collaboration Analysis
Analyze Claude AI collaboration patterns in commit history and create a comprehensive report of co-authored commits and top collaborators.
Model Ranking
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
|---|---|---|---|---|---|---|---|---|
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
gpt-5-high | 4 /4 | 948.2s | 6.5 | 299,907 | 32,759 | 332,667 | ||
gpt-5-medium | 3 /4 | 441.5s | 6.3 | 314,523 | 21,878 | 336,400 | ||
gpt-5-mini-high | 2 /4 | 284.3s | 5.3 | 228,481 | 27,682 | 256,163 | ||
claude-opus-4-1 | 0 /1 | - | - | 59.6s | 2.0 | 214,092 | 271 | 214,363 |
claude-opus-4-5-high | 0 /4 | 64.0s | 4.3 | 203,119 | 2,277 | 205,397 | ||
claude-sonnet-4 | 0 /4 | 63.8s | 2.5 | 315,284 | 360 | 315,644 | ||
claude-sonnet-4-5 | 0 /4 | 80.6s | 5.5 | 347,667 | 2,424 | 350,091 | ||
claude-sonnet-4-high | 0 /4 | 76.2s | 5.8 | 336,269 | 2,484 | 338,754 | ||
claude-sonnet-4-low | 0 /4 | 75.5s | 5.3 | 303,705 | 2,190 | 305,895 | ||
deepseek-chat | 0 /4 | 41.0s | 3.3 | 62,292 | 192 | 62,484 | ||
deepseek-v3-1-terminus | 0 /4 | 87.5s | 3.3 | 171,017 | 698 | 171,714 | ||
deepseek-v3-1-terminus-thinking | 0 /4 | 885.8s | 4.8 | 270,135 | 22,542 | 292,676 | ||
deepseek-v3-2-chat | 0 /4 | 152.2s | 6.0 | 277,531 | 3,668 | 281,199 | ||
deepseek-v3-2-thinking | 0 /4 | 334.5s | 7.8 | 397,125 | 8,573 | 405,698 | ||
gemini-2-5-flash | 0 /4 | 222.9s | 3.0 | 832,493 | 1,509 | 834,002 | ||
gemini-2-5-pro | 0 /4 | 30.8s | 1.3 | 6,653 | 2,733 | 9,386 | ||
gemini-3-pro-high | 0 /4 | 102.5s | 3.5 | 154,899 | 4,787 | 159,686 | ||
gemini-3-pro-low | 0 /4 | 417.3s | 5.5 | 372,543 | 20,833 | 393,376 | ||
glm-4-5 | 0 /4 | 52.5s | 2.5 | 136,356 | 1,616 | 137,972 | ||
gpt-4-1 | 0 /4 | 22.5s | 3.0 | 57,304 | 794 | 58,098 | ||
gpt-4-1-mini | 0 /4 | 20.9s | 2.5 | 72,553 | 341 | 72,894 | ||
gpt-4-1-nano | 0 /4 | 27.3s | 5.0 | 146,351 | 687 | 147,037 | ||
gpt-5-low | 0 /4 | 196.6s | 3.5 | 313,959 | 7,549 | 321,508 | ||
gpt-5-mini-low | 0 /4 | 29.7s | 2.8 | 270,330 | 846 | 271,176 | ||
gpt-5-mini-medium | 0 /4 | 92.0s | 5.0 | 206,391 | 9,633 | 216,024 | ||
gpt-5-nano-high | 0 /4 | 376.8s | 12.0 | 870,667 | 70,103 | 940,770 | ||
gpt-5-nano-low | 0 /4 | 40.0s | 4.8 | 168,522 | 3,023 | 171,545 | ||
gpt-5-nano-medium | 0 /4 | 92.5s | 4.8 | 194,803 | 16,268 | 211,071 | ||
gpt-oss-120b | 0 /4 | 45.8s | 3.3 | 108,466 | 3,790 | 112,257 | ||
grok-4 | 0 /4 | 247.7s | 5.5 | 670,319 | 351 | 674,496 | ||
grok-4-fast | 0 /4 | 34.2s | 5.0 | 131,595 | 2,505 | 134,100 | ||
grok-code-fast-1 | 0 /4 | 33.9s | 5.3 | 213,385 | 3,805 | 217,190 | ||
kimi-k2-0711 | 0 /4 | 148.4s | 4.0 | 139,980 | 589 | 140,569 | ||
kimi-k2-0905 | 0 /4 | 98.5s | 4.0 | 160,382 | 974 | 161,356 | ||
o3 | 0 /4 | 43.8s | 2.8 | 233,384 | 1,083 | 234,467 | ||
o4-mini | 0 /4 | 199.3s | 2.8 | 270,266 | 4,225 | 274,491 | ||
qwen-3-coder-plus | 0 /4 | 426.7s | 8.3 | 1,828,067 | 1,072 | 1,829,139 | ||
qwen-3-max | 0 /4 | 42.6s | 4.3 | 150,786 | 275 | 151,060 |
Task State
Instruction
I need you to analyze the collaboration patterns between human developers and Claude (the AI assistant) in the repository by examining all available commit history, then create a comprehensive analysis report and submit it as a new file to the repository.
Step 1: Commit History Analysis Analyze ALL commits in the repository to identify:
- Claude Co-Authored Commits: Find all commits that were co-authored by Claude (look for "Co-Authored-By: Claude noreply@anthropic.com" in commit messages)
- Top Claude Collaborators: Identify the top 3 human developers who most frequently collaborated with Claude
Step 2: Create Collaboration Analysis Report
Create a file called CLAUDE_COLLABORATION_ANALYSIS.md in the repository root with:
-
A "# Claude AI Collaboration Analysis" title
-
A "## Summary Statistics" section with these exact format requirements:
- "Total commits analyzed: [NUMBER]"
- "Number of Claude co-authored commits found: [NUMBER]"
- "Percentage of commits with Claude collaboration: [NUMBER]%"
- "Number of unique human collaborators who worked with Claude: [NUMBER]"
-
A "## Top Claude Collaborators" section with this exact table format:
| Developer | GitHub Username | Claude Collaborations |
|-----------|----------------|----------------------|Include the top 3 developers by number of Claude collaborations.
Step 3: Commit Analysis to Repository
Commit the CLAUDE_COLLABORATION_ANALYSIS.md file to the main branch with:
- Commit message: "Add Claude AI collaboration analysis report"
- Ensure all statistics are accurate based on actual commit data
Verify
import sys
import os
import requests
from typing import Dict, List, Optional, Tuple
import base64
import re
from dotenv import load_dotenv
def _get_github_api(
endpoint: str, headers: Dict[str, str], org: str, repo: str = "claude-code"
) -> Tuple[bool, Optional[Dict]]:
"""Make a GET request to GitHub API and return (success, response)."""
url = f"https://api.github.com/repos/{org}/{repo}/{endpoint}"
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return True, response.json()
elif response.status_code == 404:
return False, None
else:
print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
return False, None
except Exception as e:
print(f"Exception for {endpoint}: {e}", file=sys.stderr)
return False, None
def _get_file_content(
file_path: str,
headers: Dict[str, str],
org: str,
repo: str = "claude-code",
ref: str = "main",
) -> Optional[str]:
"""Get the content of a file from the repository."""
success, result = _get_github_api(
f"contents/{file_path}?ref={ref}", headers, org, repo
)
if not success or not result:
return None
try:
content = base64.b64decode(result.get("content", "")).decode("utf-8")
return content
except Exception as e:
print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
return None
def _parse_summary_statistics(content: str) -> Dict:
"""Parse the summary statistics section from the report."""
stats = {}
lines = content.split("\n")
in_summary = False
for line in lines:
if "## Summary Statistics" in line:
in_summary = True
continue
if in_summary:
if "##" in line and "Summary Statistics" not in line:
break
# Parse statistics lines
if "Total commits analyzed" in line:
match = re.search(r"(\d+)", line)
if match:
stats["total_analyzed"] = int(match.group(1))
elif "Number of Claude co-authored commits" in line:
match = re.search(r"(\d+)", line)
if match:
stats["claude_commits"] = int(match.group(1))
elif "Percentage of commits with Claude collaboration" in line:
match = re.search(r"([\d.]+)%", line)
if match:
stats["percentage"] = float(match.group(1))
elif "Number of unique human collaborators" in line:
match = re.search(r"(\d+)", line)
if match:
stats["unique_collaborators"] = int(match.group(1))
return stats
def _parse_collaborators_table(content: str) -> List[Dict]:
"""Parse the top collaborators table from the report."""
collaborators = []
lines = content.split("\n")
in_table = False
for line in lines:
if "| Developer | GitHub Username | Claude Collaborations |" in line:
in_table = True
continue
if in_table and line.startswith("|---"):
continue
if in_table and line.startswith("|"):
parts = [p.strip() for p in line.split("|")]
if len(parts) >= 4: # Should have 3 columns plus empty parts
developer = parts[1].strip()
username = parts[2].strip()
collaborations = parts[3].strip()
if developer and username and collaborations:
try:
collaborators.append(
{
"developer": developer,
"username": username,
"collaborations": int(collaborations),
}
)
except ValueError:
pass
if in_table and line and not line.startswith("|") and "##" in line:
break
return collaborators
def verify_task() -> bool:
"""Verify the Claude collaboration analysis task."""
# Load environment variables from .mcp_env
load_dotenv(".mcp_env")
# Get GitHub token and org
github_token = os.environ.get("MCP_GITHUB_TOKEN")
github_org = os.environ.get("GITHUB_EVAL_ORG")
if not github_token:
print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
return False
if not github_org:
print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
return False
headers = {
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3+json",
}
# Pre-computed expected values based on repository analysis
# These are the correct answers the agent should find
EXPECTED_TOP_COLLABORATORS = [
{
"username": "bcherny",
"min_collaborations": 14,
}, # Boris Cherny has many Claude collaborations
{"username": "ashwin-ant", "min_collaborations": 5}, # Ashwin Bhat has some
{"username": "ant-kurt", "min_collaborations": 3}, # Kurt Carpenter has several
]
# Expected exact values for summary statistics
EXPECTED_STATS = {
"total_analyzed": 158,
"claude_commits": 25,
"percentage": 15.82,
"unique_collaborators": 6,
}
print("Verifying Claude collaboration analysis task...")
# 1. Check if CLAUDE_COLLABORATION_ANALYSIS.md exists in main branch
print("1. Checking if CLAUDE_COLLABORATION_ANALYSIS.md exists...")
content = _get_file_content("CLAUDE_COLLABORATION_ANALYSIS.md", headers, github_org)
if not content:
print(
"Error: CLAUDE_COLLABORATION_ANALYSIS.md not found in main branch",
file=sys.stderr,
)
return False
print("✓ CLAUDE_COLLABORATION_ANALYSIS.md found")
# 2. Check required sections exist
print("2. Checking required sections...")
required_sections = [
"# Claude AI Collaboration Analysis",
"## Summary Statistics",
"## Top Claude Collaborators",
]
for section in required_sections:
if section not in content:
print(f"Error: Missing required section '{section}'", file=sys.stderr)
return False
print("✓ All required sections present")
# 3. Parse and validate summary statistics
print("3. Validating summary statistics...")
stats = _parse_summary_statistics(content)
if "total_analyzed" not in stats:
print("Error: Total commits analyzed not found", file=sys.stderr)
return False
# Check exact values against expected statistics
if stats.get("total_analyzed") != EXPECTED_STATS["total_analyzed"]:
print(
f"Error: Total analyzed should be {EXPECTED_STATS['total_analyzed']}, found {stats.get('total_analyzed')}",
file=sys.stderr,
)
return False
if stats.get("claude_commits") != EXPECTED_STATS["claude_commits"]:
print(
f"Error: Claude commits should be {EXPECTED_STATS['claude_commits']}, found {stats.get('claude_commits')}",
file=sys.stderr,
)
return False
# Allow 0.1% tolerance for percentage
expected_percentage = EXPECTED_STATS["percentage"]
actual_percentage = stats.get("percentage", 0)
if abs(actual_percentage - expected_percentage) > 0.1:
print(
f"Error: Percentage should be around {expected_percentage}% (±0.1%), found {actual_percentage}%",
file=sys.stderr,
)
return False
if stats.get("unique_collaborators") != EXPECTED_STATS["unique_collaborators"]:
print(
f"Error: Unique collaborators should be {EXPECTED_STATS['unique_collaborators']}, found {stats.get('unique_collaborators')}",
file=sys.stderr,
)
return False
print("✓ Summary statistics validated")
# 4. Validate top collaborators table
print("4. Validating top collaborators...")
collaborators = _parse_collaborators_table(content)
if len(collaborators) < 3:
print(
f"Error: Expected 3 top collaborators, found {len(collaborators)}",
file=sys.stderr,
)
return False
# Check that expected top collaborators are present
found_usernames = [c["username"] for c in collaborators]
# The top 3 should include at least 2 of our expected collaborators
expected_found = 0
for expected in EXPECTED_TOP_COLLABORATORS:
if expected["username"] in found_usernames[:3]:
expected_found += 1
# Also check they have reasonable collaboration counts
for collab in collaborators:
if collab["username"] == expected["username"]:
if collab["collaborations"] < expected["min_collaborations"]:
print(
f"Error: {expected['username']} should have at least {expected['min_collaborations']} collaborations, found {collab['collaborations']}",
file=sys.stderr,
)
return False
if expected_found < 2:
print(
f"Error: Expected to find at least 2 of the known top collaborators in top 3, found {expected_found}",
file=sys.stderr,
)
print(
f"Expected to see at least 2 of: {[e['username'] for e in EXPECTED_TOP_COLLABORATORS]}",
file=sys.stderr,
)
print(f"Found: {found_usernames[:3]}", file=sys.stderr)
return False
print("✓ Top collaborators validated")
# 5. Check commit message verification
print("5. Verifying commit message...")
success, latest_commits = _get_github_api(
"commits?per_page=10", headers, github_org
)
if not success:
print("Error: Failed to fetch recent commits", file=sys.stderr)
return False
# Look for commit with expected message
expected_commit_message = "Add Claude AI collaboration analysis report"
commit_found = False
for commit in latest_commits:
if commit["commit"]["message"].startswith(expected_commit_message):
commit_found = True
break
if not commit_found:
print(
f"Error: Expected commit message '{expected_commit_message}' not found in recent commits",
file=sys.stderr,
)
return False
print("✓ Commit message verified")
# 6. Additional validation: Check unique collaborators count
print("6. Final validation complete...")
print("✓ All statistics match expected values")
print("\n✅ All verification checks passed!")
print("Claude collaboration analysis completed successfully:")
print(" - File: CLAUDE_COLLABORATION_ANALYSIS.md created in main branch")
print(f" - Commits analyzed: {stats.get('total_analyzed', 'N/A')}")
print(f" - Claude collaborations found: {stats.get('claude_commits', 'N/A')}")
print(f" - Top collaborators identified: {len(collaborators)}")
print(" - All statistics verified")
print(" - Commit message verified")
return True
if __name__ == "__main__":
success = verify_task()
sys.exit(0 if success else 1)