Customer Analysis Fix

L3
ModelContextProtocolPostgresDVD Rental

Debug and fix customer behavior analysis query producing incorrect rental counts and spending calculations.

Created by Lingxiao Du
2025-08-20
Performance OptimizationData Integrity Enforcement

Model Ranking

Click on the dots to view the trajectory of each task run
Model
Run Results
Pass@4
Pass^4
Avg Time
Avg Turns
Input Tokens
Output Tokens
Total Tokens
OpenAI
gpt-5
3
/4
156.9s
5.0
17,590
10,982
28,572
Claude
claude-4-1-opus
1
/1
--
225.2s
11.0
56,978
4,207
61,185
OpenAI
o3
1
/4
89.5s
7.5
29,681
6,221
35,902
Claude
claude-4-sonnet
0
/4
186.1s
17.3
728,460
6,455
734,914
DeepSeek
deepseek-chat
0
/4
331.1s
22.3
181,805
4,827
186,631
Gemini
gemini-2-5-pro
0
/4
72.0s
2.3
8,057
7,249
15,306
Grok
grok-4
0
/4
192.7s
16.0
-
-
-
MoonshotAI
k2
0
/4
136.3s
14.3
89,597
2,803
92,400
Qwen
qwen-3-coder
0
/4
165.6s
29.5
306,040
5,252
311,292

Task State

Enum "mpaa_rating" { "G" "PG" "PG-13" "R" "NC-17" } Table "customer" { "customer_id" int4 [pk, not null, increment] "store_id" int2 [not null] "first_name" varchar(45) [not null] "last_name" varchar(45) [not null] "email" varchar(50) "address_id" int2 [not null] "activebool" bool [not null, default: true] "create_date" date [not null, default: `('now'::text)::date`] "last_update" timestamp [default: `now()`] "active" int4 Indexes { address_id [type: btree, name: "idx_fk_address_id"] store_id [type: btree, name: "idx_fk_store_id"] last_name [type: btree, name: "idx_last_name"] } } Table "actor" { "actor_id" int4 [pk, not null, increment] "first_name" varchar(45) [not null] "last_name" varchar(45) [not null] "last_update" timestamp [not null, default: `now()`] Indexes { last_name [type: btree, name: "idx_actor_last_name"] } } Table "category" { "category_id" int4 [pk, not null, increment] "name" varchar(25) [not null] "last_update" timestamp [not null, default: `now()`] } Table "film" { "film_id" int4 [pk, not null, increment] "title" varchar(255) [not null] "description" text "release_year" int4 "language_id" int2 [not null] "rental_duration" int2 [not null, default: 3] "rental_rate" numeric(4,2) [not null, default: 4.99] "length" int2 "replacement_cost" numeric(5,2) [not null, default: 19.99] "rating" mpaa_rating [default: 'G'] "last_update" timestamp [not null, default: `now()`] "special_features" "text[]" "fulltext" tsvector [not null] Indexes { fulltext [type: gist, name: "film_fulltext_idx"] language_id [type: btree, name: "idx_fk_language_id"] title [type: btree, name: "idx_title"] } } Table "film_actor" { "actor_id" int2 [not null] "film_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (actor_id, film_id) [type: btree, name: "film_actor_pkey"] film_id [type: btree, name: "idx_fk_film_id"] } } Table "film_category" { "film_id" int2 [not null] "category_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (film_id, category_id) [type: btree, name: "film_category_pkey"] } } Table "address" { "address_id" int4 [pk, not null, increment] "address" varchar(50) [not null] "address2" varchar(50) "district" varchar(20) [not null] "city_id" int2 [not null] "postal_code" varchar(10) "phone" varchar(20) [not null] "last_update" timestamp [not null, default: `now()`] Indexes { city_id [type: btree, name: "idx_fk_city_id"] } } Table "city" { "city_id" int4 [pk, not null, increment] "city" varchar(50) [not null] "country_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { country_id [type: btree, name: "idx_fk_country_id"] } } Table "country" { "country_id" int4 [pk, not null, increment] "country" varchar(50) [not null] "last_update" timestamp [not null, default: `now()`] } Table "inventory" { "inventory_id" int4 [pk, not null, increment] "film_id" int2 [not null] "store_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (store_id, film_id) [type: btree, name: "idx_store_id_film_id"] } } Table "language" { "language_id" int4 [pk, not null, increment] "name" bpchar(20) [not null] "last_update" timestamp [not null, default: `now()`] } Table "payment" { "payment_id" int4 [pk, not null, increment] "customer_id" int2 [not null] "staff_id" int2 [not null] "rental_id" int4 [not null] "amount" numeric(5,2) [not null] "payment_date" timestamp [not null] Indexes { rental_id [type: btree, name: "idx_fk_rental_id"] staff_id [type: btree, name: "idx_fk_staff_id"] } } Table "rental" { "rental_id" int4 [pk, not null, increment] "rental_date" timestamp [not null] "inventory_id" int4 [not null] "customer_id" int2 [not null] "return_date" timestamp "staff_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (rental_date, inventory_id, customer_id) [type: btree, name: "idx_unq_rental_rental_date_inventory_id_customer_id"] inventory_id [type: btree, name: "idx_fk_inventory_id"] } } Table "staff" { "staff_id" int4 [pk, not null, increment] "first_name" varchar(45) [not null] "last_name" varchar(45) [not null] "address_id" int2 [not null] "email" varchar(50) "store_id" int2 [not null] "active" bool [not null, default: true] "username" varchar(16) [not null] "password" varchar(40) "last_update" timestamp [not null, default: `now()`] "picture" bytea } Table "store" { "store_id" int4 [pk, not null, increment] "manager_staff_id" int2 [unique, not null] "address_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] } Ref "fk_address_city":"city"."city_id" < "address"."city_id" Ref "fk_city":"country"."country_id" < "city"."country_id" Ref "customer_address_id_fkey":"address"."address_id" < "customer"."address_id" [update: cascade, delete: restrict] Ref "film_language_id_fkey":"language"."language_id" < "film"."language_id" [update: cascade, delete: restrict] Ref "film_actor_actor_id_fkey":"actor"."actor_id" < "film_actor"."actor_id" [update: cascade, delete: restrict] Ref "film_actor_film_id_fkey":"film"."film_id" < "film_actor"."film_id" [update: cascade, delete: restrict] Ref "film_category_category_id_fkey":"category"."category_id" < "film_category"."category_id" [update: cascade, delete: restrict] Ref "film_category_film_id_fkey":"film"."film_id" < "film_category"."film_id" [update: cascade, delete: restrict] Ref "inventory_film_id_fkey":"film"."film_id" < "inventory"."film_id" [update: cascade, delete: restrict] Ref "payment_customer_id_fkey":"customer"."customer_id" < "payment"."customer_id" [update: cascade, delete: restrict] Ref "payment_rental_id_fkey":"rental"."rental_id" < "payment"."rental_id" [update: cascade, delete: set null] Ref "payment_staff_id_fkey":"staff"."staff_id" < "payment"."staff_id" [update: cascade, delete: restrict] Ref "rental_customer_id_fkey":"customer"."customer_id" < "rental"."customer_id" [update: cascade, delete: restrict] Ref "rental_inventory_id_fkey":"inventory"."inventory_id" < "rental"."inventory_id" [update: cascade, delete: restrict] Ref "rental_staff_id_key":"staff"."staff_id" < "rental"."staff_id" Ref "staff_address_id_fkey":"address"."address_id" < "staff"."address_id" [update: cascade, delete: restrict] Ref "store_address_id_fkey":"address"."address_id" < "store"."address_id" [update: cascade, delete: restrict] Ref "store_manager_staff_id_fkey":"staff"."staff_id" < "store"."manager_staff_id" [update: cascade, delete: restrict]

Instruction



Verify

*.py
Python
"""
Verification script for PostgreSQL Task 3: Fix Customer Analysis Query
"""

import os
import sys
import psycopg2
from decimal import Decimal

def get_connection_params() -> dict:
    """Get database connection parameters."""
    return {
        "host": os.getenv("POSTGRES_HOST", "localhost"),
        "port": int(os.getenv("POSTGRES_PORT", 5432)),
        "database": os.getenv("POSTGRES_DATABASE"),
        "user": os.getenv("POSTGRES_USERNAME"),
        "password": os.getenv("POSTGRES_PASSWORD")
    }

def rows_match(actual_row, expected_row):
    """Compare two rows with appropriate tolerance for decimals and floats."""
    if len(actual_row) != len(expected_row):
        return False
    
    for actual, expected in zip(actual_row, expected_row):
        if isinstance(actual, (Decimal, float)) and isinstance(expected, (Decimal, float)):
            # Use higher tolerance for floating point comparisons
            if abs(float(actual) - float(expected)) > 0.1:
                return False
        elif actual != expected:
            return False
    
    return True

def verify_customer_analysis_fixed_table(conn) -> bool:
    """Verify the customer_analysis_fixed table results."""
    with conn.cursor() as cur:
        # Get actual results from the created table
        cur.execute("""
            SELECT customer_id, customer_name, customer_city, customer_country,
                   total_rentals, unique_films, total_spent, favorite_category,
                   favorite_actor, avg_rental_duration, customer_tier,
                   most_popular_film_in_region, regional_film_rental_count
            FROM customer_analysis_fixed
            ORDER BY total_spent DESC, total_rentals DESC, customer_name ASC
        """)
        actual_results = cur.fetchall()
        
        # Execute ground truth query (the corrected version)
        cur.execute("""
            WITH paid_rentals AS (
            SELECT DISTINCT
                    r.rental_id,
                    r.customer_id,
                    r.inventory_id,
                    r.rental_date,
                    r.return_date
            FROM rental r
            JOIN payment p ON p.rental_id = r.rental_id
            ),
            payments_by_customer AS (
            SELECT pr.customer_id, SUM(p.amount) AS total_spent
            FROM paid_rentals pr
            JOIN payment p ON p.rental_id = pr.rental_id
            GROUP BY pr.customer_id
            ),
            customer_basic_stats AS (
            SELECT
                c.customer_id,
                c.first_name || ' ' || c.last_name AS customer_name,
                ci.city AS customer_city,
                co.country AS customer_country,
                COUNT(DISTINCT pr.rental_id) AS total_rentals,
                COUNT(DISTINCT i.film_id) AS unique_films,
                pbc.total_spent,
                AVG(EXTRACT(EPOCH FROM (pr.return_date - pr.rental_date)) / 86400.0) AS avg_rental_duration
            FROM customer c
            JOIN address a ON c.address_id = a.address_id
            JOIN city ci ON a.city_id = ci.city_id
            JOIN country co ON ci.country_id = co.country_id
            JOIN paid_rentals pr ON pr.customer_id = c.customer_id
            JOIN inventory i ON pr.inventory_id = i.inventory_id
            JOIN payments_by_customer pbc ON pbc.customer_id = c.customer_id
            WHERE c.email IS NOT NULL
            GROUP BY c.customer_id, c.first_name, c.last_name, ci.city, co.country, pbc.total_spent
            HAVING COUNT(DISTINCT pr.rental_id) >= 15
            ),
            customer_categories AS (
            SELECT
                pr.customer_id,
                cat.name AS category_name,
                COUNT(*) AS category_count,
                ROW_NUMBER() OVER (
                    PARTITION BY pr.customer_id
                    ORDER BY COUNT(*) DESC, cat.name ASC
                ) AS rn
            FROM paid_rentals pr
            JOIN inventory i ON pr.inventory_id = i.inventory_id
            JOIN film f ON i.film_id = f.film_id
            JOIN film_category fc ON f.film_id = fc.film_id
            JOIN category cat ON fc.category_id = cat.category_id
            JOIN customer c ON pr.customer_id = c.customer_id
            WHERE c.email IS NOT NULL
            GROUP BY pr.customer_id, cat.name
            ),
            customer_actors AS (
            SELECT
                pr.customer_id,
                (a.first_name || ' ' || a.last_name) AS actor_name,
                COUNT(*) AS actor_count,
                ROW_NUMBER() OVER (
                    PARTITION BY pr.customer_id
                    ORDER BY COUNT(*) DESC, (a.first_name || ' ' || a.last_name) ASC
                ) AS rn
            FROM paid_rentals pr
            JOIN inventory i ON pr.inventory_id = i.inventory_id
            JOIN film f ON i.film_id = f.film_id
            JOIN film_actor fa ON f.film_id = fa.film_id
            JOIN actor a ON fa.actor_id = a.actor_id
            JOIN customer c ON pr.customer_id = c.customer_id
            WHERE c.email IS NOT NULL
            GROUP BY pr.customer_id, a.first_name, a.last_name
            ),
            regional_popular_films AS (
            SELECT
                co.country,
                f.title,
                COUNT(DISTINCT pr.rental_id) AS rental_count,
                ROW_NUMBER() OVER (
                    PARTITION BY co.country
                    ORDER BY COUNT(DISTINCT pr.rental_id) DESC, f.title ASC
                ) AS rn
            FROM paid_rentals pr
            JOIN customer c ON pr.customer_id = c.customer_id
            JOIN address a ON c.address_id = a.address_id
            JOIN city ci ON a.city_id = ci.city_id
            JOIN country co ON ci.country_id = co.country_id
            JOIN inventory i ON pr.inventory_id = i.inventory_id
            JOIN film f ON i.film_id = f.film_id
            WHERE c.email IS NOT NULL
            GROUP BY co.country, f.title
            )
            SELECT
                cbs.customer_id,
                cbs.customer_name,
                cbs.customer_city,
                cbs.customer_country,
                cbs.total_rentals,
                cbs.unique_films,
                cbs.total_spent,
                cc.category_name AS favorite_category,
                ca.actor_name AS favorite_actor,
                cbs.avg_rental_duration,
                CASE
                WHEN cbs.total_spent >= 150 THEN 'Premium'
                WHEN cbs.total_spent >= 75  THEN 'Standard'
                ELSE 'Basic'
                END AS customer_tier,
                rpf.title AS most_popular_film_in_region,
                rpf.rental_count AS regional_film_rental_count
            FROM customer_basic_stats cbs
            LEFT JOIN customer_categories cc
            ON cbs.customer_id = cc.customer_id AND cc.rn = 1
            LEFT JOIN customer_actors ca
            ON cbs.customer_id = ca.customer_id AND ca.rn = 1
            LEFT JOIN regional_popular_films rpf
            ON cbs.customer_country = rpf.country AND rpf.rn = 1
            ORDER BY cbs.total_spent DESC, cbs.total_rentals DESC, cbs.customer_name ASC;
        """)
        expected_results = cur.fetchall()

        if len(actual_results) != len(expected_results):
            print(f"❌ Expected {len(expected_results)} rows, got {len(actual_results)}")
            return False

        mismatches = 0
        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):
            if not rows_match(actual, expected):
                if mismatches < 5:  # Only show first 5 mismatches
                    print(f"❌ Row {i+1} mismatch:")
                    print(f"   Expected: {expected}")
                    print(f"   Actual:   {actual}")
                mismatches += 1

        if mismatches > 0:
            print(f"❌ Total mismatches: {mismatches}")
            return False

        print(f"✅ Query results are correct ({len(actual_results)} rows)")
        return True

def main():
    """Main verification function."""
    print("=" * 70)
    print("PostgreSQL Task 3 Verification: Fix Customer Analysis Query")
    print("=" * 70)

    # Get connection parameters
    conn_params = get_connection_params()

    if not conn_params["database"]:
        print("❌ No database specified")
        sys.exit(1)

    try:
        # Connect to database
        conn = psycopg2.connect(**conn_params)


        # Verify results
        success = verify_customer_analysis_fixed_table(conn)

        conn.close()

        if success:
            print("\n🎉 Task verification: PASS")
            print("   - Query was successfully debugged and fixed")
            print("   - All 587 rows match the expected results")
            sys.exit(0)
        else:
            print("\n❌ Task verification: FAIL")
            print("   - The query still has issues")
            print("   - Please review the duplicate counting problem")
            sys.exit(1)

    except psycopg2.Error as e:
        print(f"❌ Database error: {e}")
        sys.exit(1)
    except Exception as e:
        print(f"❌ Verification error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()