Find Commit Date
L3
GithubBuild Your Own X
Find when Voxel Engine entries were first created by Daniel Stefanovic and document the date.
Created by Xiangyan Liu
2025-08-15
Repository Analysis
Model Ranking
Click on the dots to view the trajectory of each task run
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
|---|---|---|---|---|---|---|---|---|
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
claude-opus-4-5-high | 4 /4 | 76.3s | 8.3 | 658,898 | 1,443 | 660,341 | ||
gemini-3-pro-high | 4 /4 | 252.1s | 14.8 | 1,192,044 | 4,336 | 1,196,379 | ||
gpt-5-high | 4 /4 | 1145.4s | 21.3 | 1,912,874 | 28,796 | 1,941,669 | ||
gemini-3-pro-low | 3 /4 | 183.4s | 18.0 | 1,692,479 | 4,540 | 1,697,019 | ||
gpt-5-medium | 3 /4 | 494.2s | 21.3 | 1,386,968 | 20,594 | 1,407,562 | ||
claude-sonnet-4-high | 2 /4 | 154.7s | 16.5 | 1,227,221 | 2,692 | 1,229,913 | ||
claude-sonnet-4-5 | 1 /4 | 95.6s | 12.5 | 1,207,771 | 1,913 | 1,209,685 | ||
claude-sonnet-4-low | 1 /4 | 178.9s | 20.8 | 1,520,693 | 3,230 | 1,523,923 | ||
grok-4-fast | 1 /4 | 931.7s | 21.0 | 3,272,500 | 65,867 | 3,338,367 | ||
claude-opus-4-1 | 0 /1 | - | - | 101.4s | 5.0 | 258,338 | 680 | 259,018 |
claude-sonnet-4 | 0 /4 | 60.4s | 3.8 | 89,580 | 564 | 90,144 | ||
deepseek-chat | 0 /4 | 66.1s | 3.8 | 125,972 | 291 | 126,263 | ||
deepseek-v3-1-terminus | 0 /4 | 232.9s | 14.5 | 636,599 | 1,078 | 637,676 | ||
deepseek-v3-1-terminus-thinking | 0 /4 | 688.9s | 16.3 | 623,980 | 15,196 | 639,176 | ||
deepseek-v3-2-chat | 0 /4 | 337.0s | 25.5 | 1,514,986 | 4,917 | 1,519,903 | ||
deepseek-v3-2-thinking | 0 /4 | 397.4s | 25.5 | 992,686 | 7,586 | 1,000,273 | ||
gemini-2-5-flash | 0 /4 | 43.5s | 4.0 | 112,599 | 5,616 | 118,215 | ||
gemini-2-5-pro | 0 /4 | 80.0s | 3.0 | 25,923 | 7,236 | 33,158 | ||
glm-4-5 | 0 /4 | 151.6s | 18.3 | 861,359 | 2,081 | 863,439 | ||
gpt-4-1 | 0 /4 | 26.3s | 6.5 | 118,600 | 285 | 118,885 | ||
gpt-4-1-mini | 0 /4 | 49.7s | 11.0 | 268,051 | 418 | 268,469 | ||
gpt-4-1-nano | 0 /4 | 31.6s | 9.8 | 180,456 | 516 | 180,972 | ||
gpt-5-low | 0 /4 | 369.7s | 13.8 | 1,741,221 | 7,872 | 1,749,093 | ||
gpt-5-mini-high | 0 /4 | 344.3s | 20.8 | 1,483,001 | 16,616 | 1,499,617 | ||
gpt-5-mini-low | 0 /4 | 86.9s | 17.0 | 598,955 | 1,846 | 600,801 | ||
gpt-5-mini-medium | 0 /4 | 172.8s | 16.8 | 1,161,931 | 4,625 | 1,166,555 | ||
gpt-5-nano-high | 0 /4 | 386.6s | 27.8 | 2,619,346 | 44,425 | 2,663,771 | ||
gpt-5-nano-low | 0 /4 | 63.5s | 13.3 | 394,889 | 1,914 | 396,802 | ||
gpt-5-nano-medium | 0 /4 | 318.9s | 34.5 | 2,272,700 | 31,630 | 2,304,330 | ||
gpt-oss-120b | 0 /4 | 11.2s | 2.5 | 55,135 | 283 | 55,418 | ||
grok-4 | 0 /4 | 320.4s | 16.0 | 1,593,199 | 843 | 1,599,468 | ||
grok-code-fast-1 | 0 /4 | 105.0s | 17.5 | 759,315 | 8,321 | 767,635 | ||
kimi-k2-0711 | 0 /4 | 101.5s | 3.0 | 52,754 | 314 | 53,067 | ||
kimi-k2-0905 | 0 /4 | 314.8s | 23.3 | 1,193,005 | 1,579 | 1,194,584 | ||
o3 | 0 /4 | 90.3s | 5.0 | 508,663 | 2,132 | 510,795 | ||
o4-mini | 0 /4 | 574.9s | 26.5 | 1,305,673 | 14,708 | 1,320,381 | ||
qwen-3-coder-plus | 0 /4 | 227.0s | 31.8 | 2,462,162 | 2,540 | 2,464,702 | ||
qwen-3-max | 0 /4 | 321.5s | 75.0 | 3,550,762 | 5,210 | 3,555,971 |
Task State
Instruction
Find out when the entries in the Voxel Engine section were first created by Daniel Stefanovic. After finding this information, create an ANSWER.md file in the repository with the content being the date in [YYYY]-[MM]-[DD] format (e.g., 2000-06-02).
Verify
Python
import sys
import os
import requests
from typing import Dict, Optional, Tuple
import base64
from dotenv import load_dotenv
def _get_github_api(
endpoint: str, headers: Dict[str, str], org: str, repo: str = "build-your-own-x"
) -> Tuple[bool, Optional[Dict]]:
"""Make a GET request to GitHub API and return (success, response)."""
url = f"https://api.github.com/repos/{org}/{repo}/{endpoint}"
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return True, response.json()
elif response.status_code == 404:
return False, None
else:
print(f"API error for {endpoint}: {response.status_code}", file=sys.stderr)
return False, None
except Exception as e:
print(f"Exception for {endpoint}: {e}", file=sys.stderr)
return False, None
def _get_file_content(
file_path: str,
headers: Dict[str, str],
org: str,
repo: str = "build-your-own-x",
ref: str = "master",
) -> Optional[str]:
"""Get the content of a file from the repository."""
success, result = _get_github_api(
f"contents/{file_path}?ref={ref}", headers, org, repo
)
if not success or not result:
return None
try:
content = base64.b64decode(result.get("content", "")).decode("utf-8")
return content
except Exception as e:
print(f"Content decode error for {file_path}: {e}", file=sys.stderr)
return None
def verify_task() -> bool:
"""Verify the find commit data task for Voxel Engine entries."""
# Load environment variables from .mcp_env
load_dotenv(".mcp_env")
# Get GitHub token and org
github_token = os.environ.get("MCP_GITHUB_TOKEN")
github_org = os.environ.get("GITHUB_EVAL_ORG")
if not github_token:
print("Error: MCP_GITHUB_TOKEN environment variable not set", file=sys.stderr)
return False
if not github_org:
print("Error: GITHUB_EVAL_ORG environment variable not set", file=sys.stderr)
return False
headers = {
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3+json",
}
print("Verifying Voxel Engine commit date task...")
# 1. Check if ANSWER.md exists in the repository
print("1. Checking if ANSWER.md exists...")
content = _get_file_content("ANSWER.md", headers, github_org)
if not content:
print("Error: ANSWER.md not found in repository", file=sys.stderr)
return False
print("✓ ANSWER.md found")
# 2. Check the content format
print("2. Checking content format...")
content = content.strip()
# The expected date when Daniel Stefanovic added Voxel Engine entries
# Based on historical records, this should be 2018-07-07
expected_date = "2018-07-07"
# Check if the content matches the expected date format (YYYY-MM-DD)
import re
date_pattern = r'^\d{4}-\d{2}-\d{2}$'
if not re.match(date_pattern, content):
print(f"Error: Invalid date format. Expected YYYY-MM-DD, got: {content}", file=sys.stderr)
return False
print("✓ Date format is correct")
# 3. Verify the date is correct
print("3. Verifying the date...")
if content != expected_date:
print(f"Error: Incorrect date. Expected {expected_date}, got: {content}", file=sys.stderr)
return False
print(f"✓ Date is correct: {content}")
# 4. Verify README.md contains Voxel Engine section
print("4. Checking if README.md contains Voxel Engine section...")
readme_content = _get_file_content("README.md", headers, github_org)
if not readme_content:
print("Error: README.md not found in repository", file=sys.stderr)
return False
if "Voxel Engine" not in readme_content:
print("Error: Voxel Engine section not found in README.md", file=sys.stderr)
return False
# Check for specific Voxel Engine entries
voxel_entries = [
"Let's Make a Voxel Engine",
"Java Voxel Engine Tutorial"
]
for entry in voxel_entries:
if entry not in readme_content:
print(f"Warning: Voxel Engine entry '{entry}' not found in README.md", file=sys.stderr)
print("✓ Voxel Engine section found in README.md")
print("\n✅ All verification checks passed!")
print("Task completed successfully:")
print(f" - ANSWER.md created with date: {content}")
print(" - Date format is correct (YYYY-MM-DD)")
print(" - Date matches expected creation date for Voxel Engine entries by Daniel Stefanovic")
print(" - Voxel Engine section exists in README.md")
return True
if __name__ == "__main__":
success = verify_task()
sys.exit(0 if success else 1)