Find Rag Commit
L3
GithubBuild Your Own X
Identify the specific commit SHA that added the RAG for Document Search entry to the repository.
Created by Xiangyan Liu
2025-08-15
Repository Analysis
Model Ranking
Click on the dots to view the trajectory of each task run
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
|---|---|---|---|---|---|---|---|---|
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
claude-opus-4-5-high | 4 /4 | 114.5s | 11.5 | 988,254 | 2,135 | 990,388 | ||
claude-sonnet-4-high | 4 /4 | 253.8s | 26.5 | 2,175,395 | 4,159 | 2,179,554 | ||
claude-sonnet-4-low | 4 /4 | 230.2s | 24.8 | 1,663,257 | 4,098 | 1,667,355 | ||
gemini-3-pro-high | 4 /4 | 228.5s | 14.0 | 557,314 | 3,177 | 560,491 | ||
gpt-5-high | 4 /4 | 1183.9s | 25.8 | 1,290,063 | 30,094 | 1,320,157 | ||
claude-sonnet-4-5 | 3 /4 | 130.7s | 17.5 | 1,847,817 | 2,478 | 1,850,294 | ||
deepseek-v3-2-chat | 3 /4 | 339.9s | 28.8 | 1,479,022 | 4,543 | 1,483,565 | ||
deepseek-v3-2-thinking | 3 /4 | 390.4s | 26.3 | 1,262,969 | 7,570 | 1,270,539 | ||
gemini-3-pro-low | 3 /4 | 484.0s | 17.5 | 1,013,371 | 4,965 | 1,018,336 | ||
gpt-5-medium | 3 /4 | 346.4s | 19.8 | 982,660 | 13,944 | 996,604 | ||
gpt-5-low | 2 /4 | 317.8s | 13.3 | 1,572,736 | 7,369 | 1,580,104 | ||
deepseek-v3-1-terminus | 1 /4 | 358.8s | 15.3 | 718,310 | 1,121 | 719,430 | ||
deepseek-v3-1-terminus-thinking | 1 /4 | 1114.8s | 19.0 | 792,916 | 24,156 | 817,072 | ||
glm-4-5 | 1 /4 | 97.1s | 14.8 | 498,908 | 2,059 | 500,967 | ||
gpt-4-1-mini | 1 /4 | 161.7s | 24.0 | 723,655 | 1,019 | 724,674 | ||
gpt-5-mini-medium | 1 /4 | 121.0s | 18.5 | 616,556 | 5,976 | 622,532 | ||
kimi-k2-0905 | 1 /4 | 378.3s | 34.3 | 1,436,495 | 2,235 | 1,438,730 | ||
o4-mini | 1 /4 | 387.1s | 17.8 | 1,546,860 | 6,150 | 1,553,011 | ||
qwen-3-coder-plus | 1 /4 | 790.3s | 40.8 | 7,149,095 | 3,911 | 7,153,006 | ||
claude-opus-4-1 | 0 /1 | - | - | 71.3s | 5.0 | 100,338 | 599 | 100,937 |
claude-sonnet-4 | 0 /4 | 58.6s | 4.3 | 92,351 | 532 | 92,883 | ||
deepseek-chat | 0 /4 | 131.3s | 7.3 | 293,268 | 475 | 293,743 | ||
gemini-2-5-flash | 0 /4 | 53.6s | 6.5 | 544,693 | 6,543 | 551,236 | ||
gemini-2-5-pro | 0 /4 | 56.1s | 2.3 | 22,084 | 4,844 | 26,928 | ||
gpt-4-1 | 0 /4 | 101.9s | 21.0 | 1,837,507 | 909 | 1,838,416 | ||
gpt-4-1-nano | 0 /4 | 38.8s | 13.0 | 308,036 | 599 | 308,634 | ||
gpt-5-mini-high | 0 /4 | 145.3s | 17.0 | 604,844 | 8,511 | 613,354 | ||
gpt-5-mini-low | 0 /4 | 64.8s | 9.8 | 708,819 | 916 | 709,735 | ||
gpt-5-nano-high | 0 /4 | 412.1s | 36.5 | 4,271,073 | 40,220 | 4,311,293 | ||
gpt-5-nano-low | 0 /4 | 54.4s | 5.5 | 462,774 | 1,523 | 464,298 | ||
gpt-5-nano-medium | 0 /4 | 321.1s | 28.0 | 3,268,285 | 28,773 | 3,297,058 | ||
gpt-oss-120b | 0 /4 | 15.1s | 4.8 | 62,306 | 571 | 62,877 | ||
grok-4 | 0 /4 | 413.7s | 23.0 | 1,551,784 | 1,293 | 1,559,826 | ||
grok-4-fast | 0 /4 | 344.8s | 19.5 | 676,100 | 30,018 | 706,118 | ||
grok-code-fast-1 | 0 /4 | 791.6s | 25.3 | 981,144 | 8,885 | 990,028 | ||
kimi-k2-0711 | 0 /4 | 159.3s | 6.5 | 120,563 | 385 | 120,948 | ||
o3 | 0 /4 | 259.1s | 13.8 | 1,374,052 | 2,535 | 1,376,587 | ||
qwen-3-max | 0 /4 | 106.5s | 27.5 | 918,622 | 1,880 | 920,502 |
Task State
Instruction
Find out the specific commit SHA of adding an entry about "RAG for Document Search". After finding this information, create an ANSWER.md file in the repository with the content being the commit SHA (e.g., 023dfa35694db2709057488ad338afdbc89fb226).
Hint: It should be in an "AI model" section I think.
Verify
Python
import sys
import os
import requests
from typing import Dict, Optional, Tuple
import base64
from dotenv import load_dotenv
def _get_github_api(
endpoint: str, headers: Dict[str, str], org: str, repo: str = "build-your-own-x"
) -> Tuple[bool, Optional[Dict]]:
"""Make a GET request to GitHub API and return (success, response)."""
url = f"https://api.github.com/repos/{org}/{repo}/{endpoint}"
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return True, response.json()
elif response.status_code == 404:
return False, None
else:
print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
return False, None
except Exception as e:
print(f"Exception for {endpoint}: {e}", file=sys.stderr)
return False, None
def _get_file_content(
file_path: str,
headers: Dict[str, str],
org: str,
repo: str = "build-your-own-x",
ref: str = "master",
) -> Optional[str]:
"""Get the content of a file from the repository."""
success, result = _get_github_api(
f"contents/{file_path}?ref={ref}", headers, org, repo
)
if not success or not result:
return None
try:
content = base64.b64decode(result.get("content", "")).decode("utf-8")
return content
except Exception as e:
print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
return None
def verify_task() -> bool:
"""Verify the find RAG commit SHA task."""
# Load environment variables from .mcp_env
load_dotenv(".mcp_env")
# Get GitHub token and org
github_token = os.environ.get("MCP_GITHUB_TOKEN")
github_org = os.environ.get("GITHUB_EVAL_ORG")
if not github_token:
print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
return False
if not github_org:
print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
return False
headers = {
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3+json",
}
print("Verifying RAG commit SHA task...")
# Expected commit SHA for RAG for Document Search
expected_sha = "048cd3b3de70e4b429057891576ea394a50cdf48"
# 1. Check if ANSWER.md exists in the repository
print("1. Checking if ANSWER.md exists...")
content = _get_file_content("ANSWER.md", headers, github_org)
if not content:
print("Error: ANSWER.md not found in repository", file=sys.stderr)
return False
print("✓ ANSWER.md found")
# 2. Check the content matches expected SHA
print("2. Checking commit SHA...")
content = content.strip()
if content != expected_sha:
print(f"Error: Incorrect commit SHA. Expected {expected_sha}, got: {content}", file=sys.stderr)
return False
print("✓ Commit SHA is correct")
# 3. Verify the commit exists
print("3. Verifying the commit exists...")
success, commit_data = _get_github_api(f"commits/{content}", headers, github_org)
if not success or not commit_data:
print(f"Error: Commit {content} not found in repository", file=sys.stderr)
return False
print(f"✓ Commit {content} exists")
print("\n✅ All verification checks passed!")
print("Task completed successfully:")
print(f" - ANSWER.md created with correct commit SHA: {content}")
print(f" - Commit exists in the repository")
print(f" - Commit message: {commit_data.get('commit', {}).get('message', '')}")
return True
if __name__ == "__main__":
success = verify_task()
sys.exit(0 if success else 1)