Find Salient File

L3
ModelContextProtocolGithubMissing Semester

Identify the most frequently modified file in the past 100 commits, excluding GitHub Actions related files, and create an ANSWER.md with the file name.

Created by Zijian Wu
2025-08-15
Commit AnalysisFile TrackingGit History

Model Ranking

Click on the dots to view the trajectory of each task run
Model
Run Results
Pass@4
Pass^4
Avg Time
Avg Turns
Input Tokens
Output Tokens
Total Tokens
Claude
claude-4-1-opus
0
/1
--
321.6s
9.0
1,509,742
1,380
1,511,122
Claude
claude-4-sonnet
0
/4
241.8s
10.5
1,802,531
1,841
1,804,372
DeepSeek
deepseek-chat
0
/4
29.5s
2.5
41,997
146
42,142
Gemini
gemini-2-5-pro
0
/4
36.2s
1.8
10,691
2,095
12,786
OpenAI
gpt-5
0
/4
220.7s
3.3
298,352
5,190
303,542
Grok
grok-4
0
/4
29.8s
-
-
-
-
MoonshotAI
k2
0
/4
119.1s
2.0
142,122
176
142,298
OpenAI
o3
0
/4
69.8s
3.8
338,368
965
339,333
Qwen
qwen-3-coder
0
/4
360.1s
11.0
1,862,964
1,094
1,864,057

Task State


Instruction



Verify

*.py
Python
import sys
import os
import requests
import base64
from typing import Dict, Optional, Tuple
from dotenv import load_dotenv


def _get_github_api(
    endpoint: str, headers: Dict[str, str], org: str, repo: str = "missing-semester"
) -> Tuple[bool, Optional[Dict]]:
    """Make a GET request to GitHub API and return (success, response)."""
    url = f"https://api.github.com/repos/{org}/{repo}/{endpoint}"
    
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return True, response.json()
        elif response.status_code == 404:
            return False, None
        else:
            print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
            return False, None
    except Exception as e:
        print(f"Exception for {endpoint}: {e}", file=sys.stderr)
        return False, None


def _get_file_content(
    file_path: str,
    headers: Dict[str, str],
    org: str,
    repo: str = "missing-semester",
    ref: str = "master",
) -> Optional[str]:
    """Get the content of a file from the repository."""
    success, result = _get_github_api(
        f"contents/{file_path}?ref={ref}", headers, org, repo
    )
    if not success or not result:
        return None

    try:
        content = base64.b64decode(result.get("content", "")).decode("utf-8")
        return content
    except Exception as e:
        print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
        return None


def verify() -> bool:
    """
    Programmatically verify that the most frequently modified file was identified correctly.
    Checks for ANSWER.md file in master branch with the correct content.
    """
    # Expected answer content (excluding GitHub Actions files)
    EXPECTED_CONTENT = "index.md"
    
    # Load environment variables from .mcp_env
    load_dotenv(".mcp_env")

    # Get GitHub token and org
    github_token = os.environ.get("MCP_GITHUB_TOKEN")
    github_org = os.environ.get("GITHUB_EVAL_ORG")

    if not github_token:
        print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
        return False

    if not github_org:
        print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
        return False

    headers = {
        "Authorization": f"Bearer {github_token}",
        "Accept": "application/vnd.github.v3+json",
    }

    # Run verification checks
    print("Verifying salient file identification task completion...")

    # 1. Check that ANSWER.md exists in master branch
    print("1. Checking ANSWER.md exists in master branch...")
    answer_content = _get_file_content("ANSWER.md", headers, github_org, "missing-semester", "master")
    
    if not answer_content:
        print("Error: ANSWER.md not found in master branch", file=sys.stderr)
        return False

    print("✅ ANSWER.md found in master branch")

    # 2. Check that the content matches expected answer
    print("2. Verifying ANSWER.md content...")
    answer_content = answer_content.strip()
    
    if answer_content != EXPECTED_CONTENT:
        print(f"Error: ANSWER.md content does not match expected answer", file=sys.stderr)
        print(f"Expected: {EXPECTED_CONTENT}", file=sys.stderr)
        print(f"Found: {answer_content}", file=sys.stderr)
        return False

    print("✅ ANSWER.md contains correct filename")

    print("\n✅ All verification checks passed!")
    print("Salient file identification task completed successfully:")
    print(f"  - ANSWER.md created in master branch")
    print(f"  - Content: {EXPECTED_CONTENT}")

    return True


if __name__ == "__main__":
    success = verify()
    sys.exit(0 if success else 1)