Find Legacy Name
L3
GithubMissing Semester
Find the old name and domain of The Missing Semester course from commit history and document the findings.
Created by Zijian Wu
2025-08-15
Repository Analysis
Model Ranking
Click on the dots to view the trajectory of each task run
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
---|---|---|---|---|---|---|---|---|
gpt-5 | 2 /4 | 229.3s | 12.3 | 1,366,041 | 6,262 | 1,372,304 | ||
claude-4-sonnet | 1 /4 | 150.2s | 8.0 | 1,224,933 | 1,082 | 1,226,016 | ||
gemini-2-5-pro | 1 /4 | 50.8s | 3.5 | 42,614 | 3,249 | 45,862 | ||
claude-4-1-opus | 0 /1 | - | - | 58.6s | 2.0 | 192,528 | 290 | 192,818 |
deepseek-chat | 0 /4 | 49.1s | 2.5 | 91,394 | 170 | 91,564 | ||
grok-4 | 0 /4 | 30.2s | - | - | - | - | ||
k2 | 0 /4 | 178.5s | 4.5 | 382,765 | 341 | 383,106 | ||
o3 | 0 /4 | 195.4s | 10.8 | 1,316,562 | 2,716 | 1,319,278 | ||
qwen-3-coder | 0 /4 | 563.9s | 15.8 | 2,886,625 | 1,327 | 2,887,952 |
Task State
Instruction
Verify
Python
import sys
import os
import requests
import base64
from typing import Dict, Optional, Tuple
from dotenv import load_dotenv
def _get_github_api(
endpoint: str, headers: Dict[str, str], org: str, repo: str = "missing-semester"
) -> Tuple[bool, Optional[Dict]]:
"""Make a GET request to GitHub API and return (success, response)."""
url = f"https://api.github.com/repos/{org}/{repo}/{endpoint}"
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return True, response.json()
elif response.status_code == 404:
return False, None
else:
print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
return False, None
except Exception as e:
print(f"Exception for {endpoint}: {e}", file=sys.stderr)
return False, None
def _get_file_content(
file_path: str,
headers: Dict[str, str],
org: str,
repo: str = "missing-semester",
ref: str = "master",
) -> Optional[str]:
"""Get the content of a file from the repository."""
success, result = _get_github_api(
f"contents/{file_path}?ref={ref}", headers, org, repo
)
if not success or not result:
return None
try:
content = base64.b64decode(result.get("content", "")).decode("utf-8")
return content
except Exception as e:
print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
return None
def verify() -> bool:
"""
Programmatically verify that the legacy name finding task was completed correctly.
Checks for ANSWER.md file in master branch with the correct content.
"""
# Expected answer content
EXPECTED_CONTENT = "[Hacker Tools](https://hacker-tools.github.io)"
# Load environment variables from .mcp_env
load_dotenv(".mcp_env")
# Get GitHub token and org
github_token = os.environ.get("MCP_GITHUB_TOKEN")
github_org = os.environ.get("GITHUB_EVAL_ORG")
if not github_token:
print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
return False
if not github_org:
print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
return False
headers = {
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3+json",
}
# Run verification checks
print("Verifying legacy name finding task completion...")
# 1. Check that ANSWER.md exists in master branch
print("1. Checking ANSWER.md exists in master branch...")
answer_content = _get_file_content("ANSWER.md", headers, github_org, "missing-semester", "master")
if not answer_content:
print("Error: ANSWER.md not found in master branch", file=sys.stderr)
return False
print("✓ ANSWER.md found in master branch")
# 2. Check that the content matches expected answer
print("2. Verifying ANSWER.md content...")
answer_content = answer_content.strip()
if answer_content != EXPECTED_CONTENT:
print(f"Error: ANSWER.md content does not match expected answer", file=sys.stderr)
print(f"Expected: {EXPECTED_CONTENT}", file=sys.stderr)
print(f"Found: {answer_content}", file=sys.stderr)
return False
print("✓ ANSWER.md contains correct legacy name and URL")
print("\n✅ All verification checks passed!")
print("Legacy name finding task completed successfully:")
print(f" - ANSWER.md created in master branch")
print(f" - Content: {EXPECTED_CONTENT}")
return True
if __name__ == "__main__":
success = verify()
sys.exit(0 if success else 1)