Swap Tasks

L3
ModelContextProtocolNotionTeam Projects

Find the person responsible for the most and fewest tasks, then swap their assigned tasks.

Created by Xiangyan Liu
2025-08-12
Data AggregationAutomated MigrationConditional Filtering

Model Ranking

Click on the dots to view the trajectory of each task run
Model
Run Results
Pass@4
Pass^4
Avg Time
Avg Turns
Input Tokens
Output Tokens
Total Tokens
OpenAI
gpt-5-high
4
/4
518.1s
7.8
126,202
28,782
154,984
OpenAI
gpt-5-low
4
/4
214.8s
6.0
89,320
14,111
103,431
OpenAI
gpt-5-medium
4
/4
287.3s
6.0
126,596
13,969
140,564
Claude
claude-sonnet-4-low
3
/4
168.9s
19.3
933,370
4,071
937,441
OpenAI
gpt-5-mini-medium
3
/4
144.9s
8.5
556,175
11,151
567,326
Claude
claude-sonnet-4-high
2
/4
305.3s
21.3
3,465,262
4,405
3,469,668
OpenAI
gpt-5-mini-high
2
/4
475.2s
8.8
376,633
41,471
418,104
OpenAI
gpt-5-mini-low
2
/4
45.6s
4.5
53,975
2,467
56,442
Qwen
qwen-3-coder-plus
2
/4
78.2s
19.3
648,073
2,822
650,895
Claude
claude-opus-4-1
1
/1
--
313.7s
18.0
524,794
3,329
528,123
Claude
claude-sonnet-4
1
/4
244.1s
20.3
908,981
4,201
913,181
DeepSeek
deepseek-chat
1
/4
255.2s
19.5
596,529
2,398
598,927
Gemini
gemini-2-5-flash
1
/4
62.3s
4.0
65,691
10,144
75,835
Z.ai
glm-4-5
1
/4
220.5s
21.5
537,995
5,611
543,606
MoonshotAI
kimi-k2-0711
1
/4
103.0s
10.5
229,160
2,121
231,280
Gemini
gemini-2-5-pro
0
/4
58.8s
2.5
27,046
4,807
31,853
OpenAI
gpt-4-1
0
/4
12.6s
4.0
27,828
209
28,037
OpenAI
gpt-4-1-mini
0
/4
38.9s
8.5
124,311
939
125,250
OpenAI
gpt-4-1-nano
0
/4
20.5s
6.0
125,926
352
126,278
OpenAI
gpt-5-nano-high
0
/4
343.8s
10.5
800,031
61,780
861,812
OpenAI
gpt-5-nano-low
0
/4
37.6s
5.0
28,799
5,407
34,206
OpenAI
gpt-5-nano-medium
0
/4
50.5s
4.8
48,313
9,170
57,483
OpenAI
gpt-oss-120b
0
/4
19.5s
5.5
39,305
942
40,247
Grok
grok-4
0
/4
354.8s
16.3
489,954
12,077
502,031
Grok
grok-code-fast-1
0
/4
248.5s
22.3
679,133
7,687
688,064
MoonshotAI
kimi-k2-0905
0
/4
262.3s
17.8
550,005
3,018
553,023
OpenAI
o3
0
/4
162.2s
7.5
111,540
7,121
118,661
OpenAI
o4-mini
0
/4
664.1s
13.8
252,768
35,290
288,058
Qwen
qwen-3-max
0
/4
60.6s
6.3
134,011
339
134,350

Task State

Notion Workspace
This task is executed based on this Notion workspace
This workspace is cloned from notion official template marketplace.View Original Template

Instruction

Go to the Team Projects page, find the person responsible for the most tasks and the person responsible for the fewest tasks, then swap their assigned tasks.



Verify

*.py
Python
import sys
from notion_client import Client
from tasks.utils import notion_utils

def verify(notion: Client, main_id: str = None) -> bool:
    """
    Verifies that the task assignees have been swapped correctly.
    Checks:
    1. "Develop a plan for promotion" and "Evaluate different third-party services" have swapped assignees
    2. The person with most tasks and person with least tasks have swapped all their tasks
    """
    # Step 1: Find the Team Projects page
    if main_id:
        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)
        if not found_id or object_type != 'page':
            print("Error: Team Projects page not found.", file=sys.stderr)
            return False
    else:
        # Try to find the page by searching
        found_id = notion_utils.find_page(notion, "Team Projects")
        if not found_id:
            print("Error: Team Projects page not found.", file=sys.stderr)
            return False
    
    # Get all blocks from the page to find database references
    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)
    
    # Find Tasks database ID from the page
    tasks_db_id = None
    
    for block in all_blocks:
        if block and block.get("type") == "child_database":
            db_title = block.get("child_database", {}).get("title", "")
            if "Tasks" in db_title:
                tasks_db_id = block["id"]
                break
    
    if not tasks_db_id:
        print("Error: Tasks database not found.", file=sys.stderr)
        return False
    
    print("\n📋 Starting verification...")
    
    # Step 2: Query all tasks to analyze assignees
    
    try:
        all_tasks_response = notion.databases.query(
            database_id=tasks_db_id,
            page_size=100
        )
        
        if not all_tasks_response.get("results"):
            print("Error: No tasks found in Tasks database.", file=sys.stderr)
            return False
        
        tasks = all_tasks_response["results"]
        
    except Exception as e:
        print(f"Error querying Tasks database: {e}", file=sys.stderr)
        return False
    
    # Step 3: Check specific tasks have swapped assignees
    
    develop_plan_task = None
    evaluate_services_task = None
    
    for task in tasks:
        task_name = task["properties"]["Name"]["title"][0]["text"]["content"]
        if task_name == "Develop a plan for promotion":
            develop_plan_task = task
        elif task_name == "Evaluate different third-party services":
            evaluate_services_task = task
    
    if not develop_plan_task or not evaluate_services_task:
        print("Error: Could not find both required tasks.", file=sys.stderr)
        return False
    
    # Get assignees for these tasks
    develop_plan_assignees = develop_plan_task["properties"]["Assigned"]["people"]
    evaluate_services_assignees = evaluate_services_task["properties"]["Assigned"]["people"]
    
    if not develop_plan_assignees or not evaluate_services_assignees:
        print("Error: Tasks don't have assignees.", file=sys.stderr)
        return False
    
    develop_plan_assignee_id = develop_plan_assignees[0]["id"]
    evaluate_services_assignee_id = evaluate_services_assignees[0]["id"]
    
    # These should be different (swapped)
    if develop_plan_assignee_id == evaluate_services_assignee_id:
        print("Error: Tasks should have different assignees after swap.", file=sys.stderr)
        return False
    
    # Step 4: Count tasks per person
    
    task_counts = {}
    unassigned_count = 0
    
    for task in tasks:
        assignees = task["properties"]["Assigned"]["people"]
        if assignees:
            assignee_id = assignees[0]["id"]
            if assignee_id not in task_counts:
                task_counts[assignee_id] = []
            task_counts[assignee_id].append(task["properties"]["Name"]["title"][0]["text"]["content"])
        else:
            unassigned_count += 1
    
    # Sort by task count
    sorted_assignees = sorted(task_counts.items(), key=lambda x: len(x[1]))
    
    if len(sorted_assignees) < 2:
        print("Error: Need at least 2 people with tasks to verify swap.", file=sys.stderr)
        return False
    
    # Get person with least and most tasks
    person_with_least = sorted_assignees[0]
    person_with_most = sorted_assignees[-1]
    
    least_id, least_tasks = person_with_least
    most_id, most_tasks = person_with_most
    
    # Step 5: Verify the swap pattern
    
    # Original distribution (before swap):
    # - 5ac96c02-49a4-4320-8de6-b663ba83126b had 3 tasks (least)
    # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a had 10 tasks (most)
    
    # After complete swap, we expect:
    # - 5ac96c02-49a4-4320-8de6-b663ba83126b should have 10 tasks
    # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a should have 3 tasks
    
    original_least_id = "5ac96c02-49a4-4320-8de6-b663ba83126b"
    original_most_id = "ac7a3bd0-c111-4464-8f45-8a857a1abc8a"
    
    # Check if the swap has been completed
    swap_completed = False
    for assignee_id, assignee_tasks in task_counts.items():
        if assignee_id == original_least_id and len(assignee_tasks) == 10:
            # Person who had 3 now has 10
            for other_id, other_tasks in task_counts.items():
                if other_id == original_most_id and len(other_tasks) == 3:
                    # Person who had 10 now has 3
                    swap_completed = True
                    break
    
    # Step 6: Summary
    print(f"\n📊 Task Distribution:")
    print(f"  • Total tasks: {len(tasks)}")
    print(f"  • Assigned tasks: {len(tasks) - unassigned_count}")
    print(f"  • Unassigned tasks: {unassigned_count}")
    print(f"  • People with tasks: {len(task_counts)}")
    print(f"\n  Task counts by person:")
    for assignee_id, assignee_tasks in sorted_assignees:
        print(f"    - {assignee_id[:8]}...: {len(assignee_tasks)} tasks")
    
    # Step 7: Final verification
    print("\n🔍 Verification Results:")
    
    # Check that the swap has created a significant difference
    if len(most_tasks) - len(least_tasks) < 5:
        print(f"Warning: Difference between most and least is only {len(most_tasks) - len(least_tasks)} tasks", file=sys.stderr)
    
    # Verify specific expected outcomes
    verification_passed = True
    
    # Check 1: Specific tasks have been swapped
    specific_tasks_swapped = develop_plan_assignee_id != evaluate_services_assignee_id
    if specific_tasks_swapped:
        print("  ✓ Specific tasks have been swapped")
    else:
        print("  ✗ Specific tasks were not swapped", file=sys.stderr)
        verification_passed = False
    
    # Check 2: Task distribution shows a complete swap
    if swap_completed:
        print("  ✓ Complete task swap verified (3↔10 tasks)")
    else:
        # Show actual distribution for debugging
        person1_tasks = len(task_counts.get(original_least_id, []))
        person2_tasks = len(task_counts.get(original_most_id, []))
        print(f"  ✗ Swap incomplete! Expected 5ac96c02→10 tasks, ac7a3bd0→3 tasks", file=sys.stderr)
        print(f"    Actual: 5ac96c02→{person1_tasks} tasks, ac7a3bd0→{person2_tasks} tasks", file=sys.stderr)
        verification_passed = False
    
    # Check 3: Total task count is preserved
    total_assigned_tasks = sum(len(tasks) for _, tasks in task_counts.items())
    expected_total = len(tasks) - unassigned_count
    
    if total_assigned_tasks == expected_total:
        print(f"  ✓ Total task count preserved ({total_assigned_tasks} assigned)")
    else:
        print(f"  ✗ Task count mismatch: {total_assigned_tasks} vs {expected_total} expected", file=sys.stderr)
        verification_passed = False
    
    if verification_passed:
        print("\n✅ All verification checks passed!")
        return True
    else:
        print("\n❌ Verification failed", file=sys.stderr)
        return False

def main():
    """
    Executes the verification process and exits with a status code.
    """
    notion = notion_utils.get_notion_client()
    main_id = sys.argv[1] if len(sys.argv) > 1 else None
    if verify(notion, main_id):
        sys.exit(0)
    else:
        sys.exit(1)

if __name__ == "__main__":
    main()