Find Rag Commit

L3
ModelContextProtocolGithubBuild Your Own X

Identify the specific commit SHA that added the RAG for Document Search entry to the repository.

Created by Xiangyan Liu
2025-08-15
Repository Analysis

Model Ranking

Click on the dots to view the trajectory of each task run
Model
Run Results
Pass@4
Pass^4
Avg Time
Avg Turns
Input Tokens
Output Tokens
Total Tokens
Claude
claude-opus-4-5-high
4
/4
114.5s
11.5
988,254
2,135
990,388
Claude
claude-sonnet-4-high
4
/4
253.8s
26.5
2,175,395
4,159
2,179,554
Claude
claude-sonnet-4-low
4
/4
230.2s
24.8
1,663,257
4,098
1,667,355
Gemini
gemini-3-pro-high
4
/4
228.5s
14.0
557,314
3,177
560,491
OpenAI
gpt-5-high
4
/4
1183.9s
25.8
1,290,063
30,094
1,320,157
Claude
claude-sonnet-4-5
3
/4
130.7s
17.5
1,847,817
2,478
1,850,294
DeepSeek
deepseek-v3-2-chat
3
/4
339.9s
28.8
1,479,022
4,543
1,483,565
DeepSeek
deepseek-v3-2-thinking
3
/4
390.4s
26.3
1,262,969
7,570
1,270,539
Gemini
gemini-3-pro-low
3
/4
484.0s
17.5
1,013,371
4,965
1,018,336
OpenAI
gpt-5-medium
3
/4
346.4s
19.8
982,660
13,944
996,604
OpenAI
gpt-5-low
2
/4
317.8s
13.3
1,572,736
7,369
1,580,104
DeepSeek
deepseek-v3-1-terminus
1
/4
358.8s
15.3
718,310
1,121
719,430
DeepSeek
deepseek-v3-1-terminus-thinking
1
/4
1114.8s
19.0
792,916
24,156
817,072
Z.ai
glm-4-5
1
/4
97.1s
14.8
498,908
2,059
500,967
OpenAI
gpt-4-1-mini
1
/4
161.7s
24.0
723,655
1,019
724,674
OpenAI
gpt-5-mini-medium
1
/4
121.0s
18.5
616,556
5,976
622,532
MoonshotAI
kimi-k2-0905
1
/4
378.3s
34.3
1,436,495
2,235
1,438,730
OpenAI
o4-mini
1
/4
387.1s
17.8
1,546,860
6,150
1,553,011
Qwen
qwen-3-coder-plus
1
/4
790.3s
40.8
7,149,095
3,911
7,153,006
Claude
claude-opus-4-1
0
/1
--
71.3s
5.0
100,338
599
100,937
Claude
claude-sonnet-4
0
/4
58.6s
4.3
92,351
532
92,883
DeepSeek
deepseek-chat
0
/4
131.3s
7.3
293,268
475
293,743
Gemini
gemini-2-5-flash
0
/4
53.6s
6.5
544,693
6,543
551,236
Gemini
gemini-2-5-pro
0
/4
56.1s
2.3
22,084
4,844
26,928
OpenAI
gpt-4-1
0
/4
101.9s
21.0
1,837,507
909
1,838,416
OpenAI
gpt-4-1-nano
0
/4
38.8s
13.0
308,036
599
308,634
OpenAI
gpt-5-mini-high
0
/4
145.3s
17.0
604,844
8,511
613,354
OpenAI
gpt-5-mini-low
0
/4
64.8s
9.8
708,819
916
709,735
OpenAI
gpt-5-nano-high
0
/4
412.1s
36.5
4,271,073
40,220
4,311,293
OpenAI
gpt-5-nano-low
0
/4
54.4s
5.5
462,774
1,523
464,298
OpenAI
gpt-5-nano-medium
0
/4
321.1s
28.0
3,268,285
28,773
3,297,058
OpenAI
gpt-oss-120b
0
/4
15.1s
4.8
62,306
571
62,877
Grok
grok-4
0
/4
413.7s
23.0
1,551,784
1,293
1,559,826
Grok
grok-4-fast
0
/4
344.8s
19.5
676,100
30,018
706,118
Grok
grok-code-fast-1
0
/4
791.6s
25.3
981,144
8,885
990,028
MoonshotAI
kimi-k2-0711
0
/4
159.3s
6.5
120,563
385
120,948
OpenAI
o3
0
/4
259.1s
13.8
1,374,052
2,535
1,376,587
Qwen
qwen-3-max
0
/4
106.5s
27.5
918,622
1,880
920,502

Task State


Instruction

Find out the specific commit SHA of adding an entry about "RAG for Document Search". After finding this information, create an ANSWER.md file in the repository with the content being the commit SHA (e.g., 023dfa35694db2709057488ad338afdbc89fb226).

Hint: It should be in an "AI model" section I think.



Verify

*.py
Python
import sys
import os
import requests
from typing import Dict, Optional, Tuple
import base64
from dotenv import load_dotenv


def _get_github_api(
    endpoint: str, headers: Dict[str, str], org: str, repo: str = "build-your-own-x"
) -> Tuple[bool, Optional[Dict]]:
    """Make a GET request to GitHub API and return (success, response)."""
    url = f"https://api.github.com/repos/{org}/{repo}/{endpoint}"
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return True, response.json()
        elif response.status_code == 404:
            return False, None
        else:
            print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
            return False, None
    except Exception as e:
        print(f"Exception for {endpoint}: {e}", file=sys.stderr)
        return False, None


def _get_file_content(
    file_path: str,
    headers: Dict[str, str],
    org: str,
    repo: str = "build-your-own-x",
    ref: str = "master",
) -> Optional[str]:
    """Get the content of a file from the repository."""
    success, result = _get_github_api(
        f"contents/{file_path}?ref={ref}", headers, org, repo
    )
    if not success or not result:
        return None

    try:
        content = base64.b64decode(result.get("content", "")).decode("utf-8")
        return content
    except Exception as e:
        print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
        return None


def verify_task() -> bool:
    """Verify the find RAG commit SHA task."""
    # Load environment variables from .mcp_env
    load_dotenv(".mcp_env")

    # Get GitHub token and org
    github_token = os.environ.get("MCP_GITHUB_TOKEN")
    github_org = os.environ.get("GITHUB_EVAL_ORG")

    if not github_token:
        print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
        return False

    if not github_org:
        print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
        return False

    headers = {
        "Authorization": f"Bearer {github_token}",
        "Accept": "application/vnd.github.v3+json",
    }

    print("Verifying RAG commit SHA task...")

    # Expected commit SHA for RAG for Document Search
    expected_sha = "048cd3b3de70e4b429057891576ea394a50cdf48"

    # 1. Check if ANSWER.md exists in the repository
    print("1. Checking if ANSWER.md exists...")
    content = _get_file_content("ANSWER.md", headers, github_org)
    if not content:
        print("Error: ANSWER.md not found in repository", file=sys.stderr)
        return False
    print("✓ ANSWER.md found")

    # 2. Check the content matches expected SHA
    print("2. Checking commit SHA...")
    content = content.strip()
    
    if content != expected_sha:
        print(f"Error: Incorrect commit SHA. Expected {expected_sha}, got: {content}", file=sys.stderr)
        return False
    print("✓ Commit SHA is correct")

    # 3. Verify the commit exists
    print("3. Verifying the commit exists...")
    success, commit_data = _get_github_api(f"commits/{content}", headers, github_org)
    if not success or not commit_data:
        print(f"Error: Commit {content} not found in repository", file=sys.stderr)
        return False
    print(f"✓ Commit {content} exists")

    print("\n✅ All verification checks passed!")
    print("Task completed successfully:")
    print(f"  - ANSWER.md created with correct commit SHA: {content}")
    print(f"  - Commit exists in the repository")
    print(f"  - Commit message: {commit_data.get('commit', {}).get('message', '')}")

    return True


if __name__ == "__main__":
    success = verify_task()
    sys.exit(0 if success else 1)