Find Legacy Name

L3
ModelContextProtocolGithubMissing Semester

Find the old name and domain of The Missing Semester course from commit history and document the findings.

Created by Zijian Wu
2025-08-15
Repository Analysis

Model Ranking

Click on the dots to view the trajectory of each task run
Model
Run Results
Pass@4
Pass^4
Avg Time
Avg Turns
Input Tokens
Output Tokens
Total Tokens
Claude
claude-sonnet-4-5
4
/4
115.5s
14.5
1,457,736
2,207
1,459,943
Claude
claude-sonnet-4-high
4
/4
164.8s
15.8
1,240,077
2,454
1,242,531
Claude
claude-sonnet-4-low
4
/4
216.2s
15.0
1,510,415
2,391
1,512,806
Gemini
gemini-3-pro-high
4
/4
93.7s
10.8
860,640
2,654
863,294
Gemini
gemini-3-pro-low
4
/4
140.7s
15.5
1,101,046
3,950
1,104,996
OpenAI
gpt-5-medium
3
/4
351.0s
19.5
1,803,533
9,988
1,813,521
Claude
claude-opus-4-5-high
2
/4
91.6s
9.0
988,790
1,261
990,051
DeepSeek
deepseek-v3-2-chat
2
/4
199.8s
17.5
1,154,005
2,280
1,156,285
OpenAI
gpt-5-low
2
/4
229.3s
12.3
1,366,041
6,262
1,372,304
MoonshotAI
kimi-k2-0905
2
/4
416.4s
19.5
1,097,187
1,392
1,098,579
Claude
claude-sonnet-4
1
/4
150.2s
8.0
1,224,933
1,082
1,226,016
DeepSeek
deepseek-v3-2-thinking
1
/4
270.6s
20.5
1,233,496
4,695
1,238,191
Gemini
gemini-2-5-pro
1
/4
56.4s
4.3
120,464
3,500
123,964
OpenAI
gpt-5-high
1
/4
725.5s
25.5
2,529,980
19,669
2,549,649
Qwen
qwen-3-coder-plus
1
/4
667.7s
19.5
3,417,088
1,593
3,418,681
Claude
claude-opus-4-1
0
/1
--
58.6s
2.0
192,528
290
192,818
DeepSeek
deepseek-chat
0
/4
49.1s
2.5
91,394
170
91,564
DeepSeek
deepseek-v3-1-terminus
0
/4
241.8s
5.3
417,427
386
417,813
DeepSeek
deepseek-v3-1-terminus-thinking
0
/4
406.7s
6.8
438,387
5,614
444,000
Gemini
gemini-2-5-flash
0
/4
32.1s
3.0
350,220
3,823
354,043
Z.ai
glm-4-5
0
/4
228.6s
6.8
331,434
849
332,283
OpenAI
gpt-4-1
0
/4
89.6s
17.3
1,167,818
811
1,168,629
OpenAI
gpt-4-1-mini
0
/4
118.6s
24.3
1,596,995
965
1,597,960
OpenAI
gpt-4-1-nano
0
/4
24.3s
7.0
164,432
469
164,902
OpenAI
gpt-5-mini-high
0
/4
332.9s
19.5
1,394,341
21,336
1,415,677
OpenAI
gpt-5-mini-low
0
/4
51.7s
8.5
466,197
848
467,045
OpenAI
gpt-5-mini-medium
0
/4
106.8s
14.8
827,300
4,212
831,511
OpenAI
gpt-5-nano-high
0
/4
381.3s
28.3
3,558,835
37,056
3,595,891
OpenAI
gpt-5-nano-low
0
/4
28.1s
4.0
127,457
1,587
129,044
OpenAI
gpt-5-nano-medium
0
/4
221.5s
8.5
325,147
43,842
368,989
OpenAI
gpt-oss-120b
0
/4
9.9s
2.5
33,669
281
33,950
Grok
grok-4
0
/4
303.0s
16.3
1,738,899
1,950
1,745,465
Grok
grok-4-fast
0
/4
101.3s
11.8
434,842
7,614
442,457
Grok
grok-code-fast-1
0
/4
94.0s
24.3
1,216,539
6,639
1,223,178
MoonshotAI
kimi-k2-0711
0
/4
178.5s
4.5
382,765
341
383,106
OpenAI
o3
0
/4
195.4s
10.8
1,316,562
2,716
1,319,278
OpenAI
o4-mini
0
/4
239.9s
11.5
1,135,262
3,744
1,139,006
Qwen
qwen-3-max
0
/4
512.9s
79.5
4,302,588
4,425
4,307,013

Task State


Instruction

I remember that a long time ago, The Missing Semester of Your CS Education had a different name and domain. There should be some related commit history. Please find the old name and domain and create an ANSWER.md file with them, formatted as:

title

Then push the file to the master branch.



Verify

*.py
Python
import sys
import os
import requests
import base64
from typing import Dict, Optional, Tuple
from dotenv import load_dotenv


def _get_github_api(
    endpoint: str, headers: Dict[str, str], org: str, repo: str = "missing-semester"
) -> Tuple[bool, Optional[Dict]]:
    """Make a GET request to GitHub API and return (success, response)."""
    url = f"https://api.github.com/repos/{org}/{repo}/{endpoint}"
    
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return True, response.json()
        elif response.status_code == 404:
            return False, None
        else:
            print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
            return False, None
    except Exception as e:
        print(f"Exception for {endpoint}: {e}", file=sys.stderr)
        return False, None


def _get_file_content(
    file_path: str,
    headers: Dict[str, str],
    org: str,
    repo: str = "missing-semester",
    ref: str = "master",
) -> Optional[str]:
    """Get the content of a file from the repository."""
    success, result = _get_github_api(
        f"contents/{file_path}?ref={ref}", headers, org, repo
    )
    if not success or not result:
        return None

    try:
        content = base64.b64decode(result.get("content", "")).decode("utf-8")
        return content
    except Exception as e:
        print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
        return None


def verify() -> bool:
    """
    Programmatically verify that the legacy name finding task was completed correctly.
    Checks for ANSWER.md file in master branch with the correct content.
    """
    # Expected answer content
    EXPECTED_CONTENT = "[Hacker Tools](https://hacker-tools.github.io)"
    
    # Load environment variables from .mcp_env
    load_dotenv(".mcp_env")

    # Get GitHub token and org
    github_token = os.environ.get("MCP_GITHUB_TOKEN")
    github_org = os.environ.get("GITHUB_EVAL_ORG")

    if not github_token:
        print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
        return False

    if not github_org:
        print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
        return False

    headers = {
        "Authorization": f"Bearer {github_token}",
        "Accept": "application/vnd.github.v3+json",
    }

    # Run verification checks
    print("Verifying legacy name finding task completion...")

    # 1. Check that ANSWER.md exists in master branch
    print("1. Checking ANSWER.md exists in master branch...")
    answer_content = _get_file_content("ANSWER.md", headers, github_org, "missing-semester", "master")
    
    if not answer_content:
        print("Error: ANSWER.md not found in master branch", file=sys.stderr)
        return False

    print("✓ ANSWER.md found in master branch")

    # 2. Check that the content matches expected answer
    print("2. Verifying ANSWER.md content...")
    answer_content = answer_content.strip()
    
    if answer_content != EXPECTED_CONTENT:
        print(f"Error: ANSWER.md content does not match expected answer", file=sys.stderr)
        print(f"Expected: {EXPECTED_CONTENT}", file=sys.stderr)
        print(f"Found: {answer_content}", file=sys.stderr)
        return False

    print("✓ ANSWER.md contains correct legacy name and URL")

    print("\n✅ All verification checks passed!")
    print("Legacy name finding task completed successfully:")
    print(f"  - ANSWER.md created in master branch")
    print(f"  - Content: {EXPECTED_CONTENT}")

    return True


if __name__ == "__main__":
    success = verify()
    sys.exit(0 if success else 1)