Goals Restructure
Restructure the Current Goals section on the Company In A Box page by adding a new goal heading and converting all goal headings to toggles with content inside.
Model Ranking
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
|---|---|---|---|---|---|---|---|---|
Model | Run Results | Pass@4 | Pass^4 | Avg Time | Avg Turns | Input Tokens | Output Tokens | Total Tokens |
gpt-4-1 | 2 /4 | 43.8s | 7.5 | 81,341 | 1,110 | 82,451 | ||
gpt-5-mini-high | 2 /4 | 513.0s | 31.3 | 818,431 | 49,247 | 867,678 | ||
claude-sonnet-4 | 1 /4 | 216.1s | 25.0 | 629,432 | 4,348 | 633,780 | ||
gpt-5-mini-medium | 1 /4 | 167.5s | 22.0 | 540,321 | 11,612 | 551,934 | ||
kimi-k2-0711 | 1 /4 | 280.4s | 42.0 | 1,476,597 | 4,383 | 1,480,980 | ||
o3 | 1 /4 | 206.5s | 21.0 | 297,910 | 10,105 | 308,014 | ||
qwen-3-coder-plus | 1 /4 | 72.8s | 19.3 | 392,063 | 1,764 | 393,827 | ||
claude-opus-4-1 | 0 /1 | - | - | 286.6s | 19.0 | 374,244 | 3,049 | 377,293 |
claude-sonnet-4-high | 0 /4 | 147.4s | 22.0 | 490,087 | 3,637 | 493,724 | ||
claude-sonnet-4-low | 0 /4 | 174.9s | 25.0 | 762,566 | 3,942 | 766,508 | ||
deepseek-chat | 0 /4 | 238.4s | 21.8 | 461,901 | 1,796 | 463,697 | ||
gemini-2-5-flash | 0 /4 | 78.0s | 4.8 | 104,300 | 13,229 | 117,529 | ||
gemini-2-5-pro | 0 /4 | 112.5s | 5.5 | 115,479 | 9,572 | 125,052 | ||
glm-4-5 | 0 /4 | 159.7s | 20.3 | 393,702 | 3,757 | 397,459 | ||
gpt-4-1-mini | 0 /4 | 42.3s | 10.3 | 226,074 | 848 | 226,922 | ||
gpt-4-1-nano | 0 /4 | 26.4s | 5.8 | 44,641 | 1,692 | 46,333 | ||
gpt-5-high | 0 /4 | 1689.3s | 21.5 | 455,031 | 65,539 | 520,570 | ||
gpt-5-low | 0 /4 | 506.5s | 15.0 | 233,402 | 22,285 | 255,687 | ||
gpt-5-medium | 0 /4 | 527.5s | 13.0 | 223,450 | 29,734 | 253,183 | ||
gpt-5-mini-low | 0 /4 | 24.1s | 4.3 | 24,641 | 1,324 | 25,965 | ||
gpt-5-nano-high | 0 /4 | 265.9s | 4.8 | 43,127 | 55,840 | 98,968 | ||
gpt-5-nano-low | 0 /4 | 19.4s | 1.8 | 7,000 | 2,781 | 9,781 | ||
gpt-5-nano-medium | 0 /4 | 93.8s | 4.0 | 21,401 | 19,076 | 40,477 | ||
gpt-oss-120b | 0 /4 | 27.9s | 5.8 | 52,615 | 1,788 | 54,402 | ||
grok-4 | 0 /4 | 355.3s | 15.5 | 356,396 | 13,726 | 370,122 | ||
grok-code-fast-1 | 0 /4 | 264.6s | 21.8 | 560,299 | 5,919 | 571,064 | ||
kimi-k2-0905 | 0 /4 | 453.9s | 47.8 | 1,477,402 | 3,964 | 1,481,366 | ||
o4-mini | 0 /4 | 257.6s | 11.3 | 158,910 | 17,010 | 175,919 | ||
qwen-3-max | 0 /4 | 95.0s | 20.3 | 395,058 | 1,558 | 396,616 |
Task State
Instruction
Please restructure the Current Goals section on my Company In A Box page as follows:
-
Add a new goal heading — create a new
heading_3block titled:🔄 Digital Transformation Initiative -
Convert all four goal headings to toggles — the three existing goals
- ⚙️ Expand Operations to LATAM
- 🛠️ Push for Enterprise
- 🩶 Boost Employee Engagement
- 🔄 Digital Transformation Initiative
-
Move descriptions inside the toggles — every paragraph or list that originally sat directly under a goal heading should become a child block of that heading after it is made toggleable.
-
Preserve content & order — apart from the changes above, do not modify the text, formatting, or order of existing goal descriptions.
The end result should be a clean Current Goals section containing four toggleable goal headings, each with its corresponding details tucked inside.
Verify
import sys
from typing import List
from notion_client import Client
from tasks.utils import notion_utils
# Expected new goal heading text (including emoji)
NEW_GOAL_HEADING = "🔄 Digital Transformation Initiative"
# Section title to look for
GOALS_SECTION_TITLE = "Current Goals"
def _plain(block) -> str:
"""Return concatenated plain text of a block."""
return notion_utils.get_block_plain_text(block)
# Some Notion rich-text strings may include non-breaking spaces (\xa0) after emoji.
# Normalize them to plain spaces so text matching is robust.
def _normalize_string(s: str) -> str:
return s.replace("\xa0", " ")
def _is_heading(block) -> bool:
return block.get("type") in ["heading_1", "heading_2", "heading_3"]
def _is_toggle(block) -> bool:
"""Determine whether a block is a toggle (standard toggle block or toggle-able heading)."""
btype = block.get("type")
# In our scenario, goal blocks are headings (usually heading_3) marked as toggleable.
if btype in ["heading_1", "heading_2", "heading_3"]:
heading_data = block.get(btype, {})
return heading_data.get("is_toggleable", False)
# Some Notion pages may contain classic toggle blocks (type == "toggle"). They are
# not expected in this task, but keeping this check allows broader compatibility.
return btype == "toggle"
def _get_children(notion: Client, block_id: str) -> List[dict]:
"""Retrieve **direct** children of a block (no pagination handling needed for small test pages)."""
try:
return notion.blocks.children.list(block_id=block_id).get("results", [])
except Exception:
return []
def verify(notion: Client, main_id: str = None) -> bool:
"""Verifies that the Company in a Box page has been updated per the task requirements."""
# 1. Locate the main page
page_id = None
if main_id:
found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id)
if found_id and obj_type == "page":
page_id = found_id
if not page_id:
# Try a few case variations just in case
for title in [
"Company In A Box",
]:
page_id = notion_utils.find_page(notion, title)
if page_id:
break
if not page_id:
print("Error: Could not find the 'Company in a Box' page.", file=sys.stderr)
return False
# 2. Recursively locate the "Current Goals" heading and collect its sibling blocks that
# constitute the section.
def _fetch_children(bid: str) -> List[dict]:
try:
return notion.blocks.children.list(block_id=bid).get("results", [])
except Exception:
return []
goals_section_blocks: List[dict] = []
# Breadth-first traversal to find the heading
queue = [page_id]
found_parent = None
found_index = None
while queue and found_parent is None:
parent_id = queue.pop(0)
children = _fetch_children(parent_id)
for idx, child in enumerate(children):
if (
_is_heading(child)
and GOALS_SECTION_TITLE.lower()
in _normalize_string(_plain(child)).lower()
):
found_parent = parent_id
found_index = idx
break
# enqueue grandchildren for further search
for ch in children:
if ch.get("has_children"):
queue.append(ch["id"])
if found_parent is None:
print(
"Error: Could not find the 'Current Goals' heading anywhere in the page.",
file=sys.stderr,
)
return False
# Retrieve siblings once more to get the final list and slice after heading.
siblings = _fetch_children(found_parent)
if found_index is None or found_index >= len(siblings):
print(
"Error: Internal logic issue when locating Current Goals section.",
file=sys.stderr,
)
return False
goals_section_blocks = siblings[found_index + 1 :]
if not goals_section_blocks:
print("Error: 'Current Goals' section appears to be empty.", file=sys.stderr)
return False
# 3. Identify toggle blocks that represent goals
toggle_blocks = [b for b in goals_section_blocks if _is_toggle(b)]
if len(toggle_blocks) != 4:
print(
f"Error: Expected 4 toggle blocks for goals, found {len(toggle_blocks)}.",
file=sys.stderr,
)
return False
# 4. Ensure the new goal heading exists among the toggles
found_new_goal = False
for tb in toggle_blocks:
if (
_normalize_string(NEW_GOAL_HEADING).lower()
in _normalize_string(_plain(tb)).lower()
):
found_new_goal = True
break
if not found_new_goal:
print(
f"Error: Did not find a toggle block with heading '{NEW_GOAL_HEADING}'.",
file=sys.stderr,
)
return False
# 5. Validate that each toggle has at least one child paragraph/description
for tb in toggle_blocks:
if not tb.get("has_children", False):
print(
f"Error: Toggle '{_normalize_string(_plain(tb))}' has no child blocks (description not moved).",
file=sys.stderr,
)
return False
children = _get_children(notion, tb["id"])
# Ensure there is at least one content child (paragraph, list item, etc.)
content_types = {
"paragraph",
"bulleted_list_item",
"numbered_list_item",
"to_do",
"callout",
"quote",
}
if not any(c.get("type") in content_types for c in children):
print(
f"Error: Toggle '{_normalize_string(_plain(tb))}' seems to lack any description/content inside it.",
file=sys.stderr,
)
return False
# 6. Confirm that there are **no** residual heading_3 blocks (non-toggle) for the goals
non_toggle_headings = [
b
for b in goals_section_blocks
if b.get("type") == "heading_3" and not _is_toggle(b)
]
if non_toggle_headings:
titles = [_normalize_string(_plain(b)) for b in non_toggle_headings]
print(
f"Error: Found heading_3 blocks that were not converted to toggles: {titles}.",
file=sys.stderr,
)
return False
print(
"Success: Verified goal restructuring with new toggle blocks and descriptions."
)
return True
def main():
notion = notion_utils.get_notion_client()
main_id = sys.argv[1] if len(sys.argv) > 1 else None
if verify(notion, main_id):
sys.exit(0)
else:
sys.exit(1)
if __name__ == "__main__":
main()