Customer Analytics Optimization

L3
ModelContextProtocolPostgresDVD Rental

Optimize slow customer analytics query with correlated subqueries causing timeout issues in reporting dashboard.

Created by Lingxiao Du
2025-08-20
Performance Optimization

Model Ranking

Click on the dots to view the trajectory of each task run
Model
Run Results
Pass@4
Pass^4
Avg Time
Avg Turns
Input Tokens
Output Tokens
Total Tokens
Claude
claude-sonnet-4
4
/4
157.1s
19.5
226,481
5,220
231,700
Claude
claude-sonnet-4-high
4
/4
138.3s
19.5
661,468
5,342
666,810
Claude
claude-sonnet-4-low
4
/4
127.0s
20.0
242,051
5,792
247,843
DeepSeek
deepseek-chat
4
/4
280.8s
20.3
293,968
3,473
297,441
OpenAI
gpt-5-medium
4
/4
291.2s
10.8
66,059
13,207
79,265
OpenAI
gpt-5-mini-medium
4
/4
61.5s
9.8
53,886
6,310
60,196
Grok
grok-code-fast-1
4
/4
38.4s
12.0
90,317
4,100
94,416
MoonshotAI
kimi-k2-0711
4
/4
181.3s
16.3
96,654
3,695
100,349
MoonshotAI
kimi-k2-0905
4
/4
266.9s
15.5
119,757
3,307
123,064
Qwen
qwen-3-coder-plus
4
/4
62.2s
16.8
177,347
3,066
180,413
Qwen
qwen-3-max
4
/4
52.6s
13.0
97,283
1,566
98,849
OpenAI
gpt-5-low
3
/4
275.3s
10.0
41,760
15,116
56,875
OpenAI
gpt-5-mini-high
3
/4
214.4s
13.3
110,004
25,494
135,498
Grok
grok-4
3
/4
159.0s
11.0
87,368
6,118
93,486
OpenAI
gpt-5-high
2
/4
748.5s
11.5
83,212
27,715
110,927
Claude
claude-opus-4-1
1
/1
--
263.7s
14.0
362,694
4,122
366,816
OpenAI
gpt-5-nano-high
1
/4
115.3s
3.8
13,728
24,473
38,201
OpenAI
gpt-oss-120b
1
/4
25.1s
3.5
10,181
1,355
11,536
OpenAI
o3
1
/4
53.0s
6.3
16,688
3,329
20,017
Gemini
gemini-2-5-flash
0
/4
21.2s
4.5
8,447
2,291
10,738
Gemini
gemini-2-5-pro
0
/4
60.5s
3.8
11,830
5,112
16,942
Z.ai
glm-4-5
0
/4
164.3s
23.3
179,739
4,378
184,117
OpenAI
gpt-4-1
0
/4
14.7s
4.3
6,816
480
7,296
OpenAI
gpt-4-1-mini
0
/4
15.0s
4.5
6,895
358
7,252
OpenAI
gpt-4-1-nano
0
/4
12.7s
4.3
7,098
533
7,631
OpenAI
gpt-5-mini-low
0
/4
18.2s
3.0
2,989
974
3,963
OpenAI
gpt-5-nano-low
0
/4
50.4s
4.8
7,931
9,637
17,568
OpenAI
gpt-5-nano-medium
0
/4
55.7s
3.0
7,998
11,178
19,175
OpenAI
o4-mini
0
/4
40.5s
5.8
10,920
3,294
14,214

Task State

Enum "mpaa_rating" { "G" "PG" "PG-13" "R" "NC-17" } Table "customer" { "customer_id" int4 [pk, not null, increment] "store_id" int2 [not null] "first_name" varchar(45) [not null] "last_name" varchar(45) [not null] "email" varchar(50) "address_id" int2 [not null] "activebool" bool [not null, default: true] "create_date" date [not null, default: `('now'::text)::date`] "last_update" timestamp [default: `now()`] "active" int4 Indexes { address_id [type: btree, name: "idx_fk_address_id"] store_id [type: btree, name: "idx_fk_store_id"] last_name [type: btree, name: "idx_last_name"] } } Table "actor" { "actor_id" int4 [pk, not null, increment] "first_name" varchar(45) [not null] "last_name" varchar(45) [not null] "last_update" timestamp [not null, default: `now()`] Indexes { last_name [type: btree, name: "idx_actor_last_name"] } } Table "category" { "category_id" int4 [pk, not null, increment] "name" varchar(25) [not null] "last_update" timestamp [not null, default: `now()`] } Table "film" { "film_id" int4 [pk, not null, increment] "title" varchar(255) [not null] "description" text "release_year" int4 "language_id" int2 [not null] "rental_duration" int2 [not null, default: 3] "rental_rate" numeric(4,2) [not null, default: 4.99] "length" int2 "replacement_cost" numeric(5,2) [not null, default: 19.99] "rating" mpaa_rating [default: 'G'] "last_update" timestamp [not null, default: `now()`] "special_features" "text[]" "fulltext" tsvector [not null] Indexes { fulltext [type: gist, name: "film_fulltext_idx"] language_id [type: btree, name: "idx_fk_language_id"] title [type: btree, name: "idx_title"] } } Table "film_actor" { "actor_id" int2 [not null] "film_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (actor_id, film_id) [type: btree, name: "film_actor_pkey"] film_id [type: btree, name: "idx_fk_film_id"] } } Table "film_category" { "film_id" int2 [not null] "category_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (film_id, category_id) [type: btree, name: "film_category_pkey"] } } Table "address" { "address_id" int4 [pk, not null, increment] "address" varchar(50) [not null] "address2" varchar(50) "district" varchar(20) [not null] "city_id" int2 [not null] "postal_code" varchar(10) "phone" varchar(20) [not null] "last_update" timestamp [not null, default: `now()`] Indexes { city_id [type: btree, name: "idx_fk_city_id"] } } Table "city" { "city_id" int4 [pk, not null, increment] "city" varchar(50) [not null] "country_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { country_id [type: btree, name: "idx_fk_country_id"] } } Table "country" { "country_id" int4 [pk, not null, increment] "country" varchar(50) [not null] "last_update" timestamp [not null, default: `now()`] } Table "inventory" { "inventory_id" int4 [pk, not null, increment] "film_id" int2 [not null] "store_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (store_id, film_id) [type: btree, name: "idx_store_id_film_id"] } } Table "language" { "language_id" int4 [pk, not null, increment] "name" bpchar(20) [not null] "last_update" timestamp [not null, default: `now()`] } Table "payment" { "payment_id" int4 [pk, not null, increment] "customer_id" int2 [not null] "staff_id" int2 [not null] "rental_id" int4 [not null] "amount" numeric(5,2) [not null] "payment_date" timestamp [not null] Indexes { rental_id [type: btree, name: "idx_fk_rental_id"] staff_id [type: btree, name: "idx_fk_staff_id"] } } Table "rental" { "rental_id" int4 [pk, not null, increment] "rental_date" timestamp [not null] "inventory_id" int4 [not null] "customer_id" int2 [not null] "return_date" timestamp "staff_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] Indexes { (rental_date, inventory_id, customer_id) [type: btree, name: "idx_unq_rental_rental_date_inventory_id_customer_id"] inventory_id [type: btree, name: "idx_fk_inventory_id"] } } Table "staff" { "staff_id" int4 [pk, not null, increment] "first_name" varchar(45) [not null] "last_name" varchar(45) [not null] "address_id" int2 [not null] "email" varchar(50) "store_id" int2 [not null] "active" bool [not null, default: true] "username" varchar(16) [not null] "password" varchar(40) "last_update" timestamp [not null, default: `now()`] "picture" bytea } Table "store" { "store_id" int4 [pk, not null, increment] "manager_staff_id" int2 [unique, not null] "address_id" int2 [not null] "last_update" timestamp [not null, default: `now()`] } Ref "fk_address_city":"city"."city_id" < "address"."city_id" Ref "fk_city":"country"."country_id" < "city"."country_id" Ref "customer_address_id_fkey":"address"."address_id" < "customer"."address_id" [update: cascade, delete: restrict] Ref "film_language_id_fkey":"language"."language_id" < "film"."language_id" [update: cascade, delete: restrict] Ref "film_actor_actor_id_fkey":"actor"."actor_id" < "film_actor"."actor_id" [update: cascade, delete: restrict] Ref "film_actor_film_id_fkey":"film"."film_id" < "film_actor"."film_id" [update: cascade, delete: restrict] Ref "film_category_category_id_fkey":"category"."category_id" < "film_category"."category_id" [update: cascade, delete: restrict] Ref "film_category_film_id_fkey":"film"."film_id" < "film_category"."film_id" [update: cascade, delete: restrict] Ref "inventory_film_id_fkey":"film"."film_id" < "inventory"."film_id" [update: cascade, delete: restrict] Ref "payment_customer_id_fkey":"customer"."customer_id" < "payment"."customer_id" [update: cascade, delete: restrict] Ref "payment_rental_id_fkey":"rental"."rental_id" < "payment"."rental_id" [update: cascade, delete: set null] Ref "payment_staff_id_fkey":"staff"."staff_id" < "payment"."staff_id" [update: cascade, delete: restrict] Ref "rental_customer_id_fkey":"customer"."customer_id" < "rental"."customer_id" [update: cascade, delete: restrict] Ref "rental_inventory_id_fkey":"inventory"."inventory_id" < "rental"."inventory_id" [update: cascade, delete: restrict] Ref "rental_staff_id_key":"staff"."staff_id" < "rental"."staff_id" Ref "staff_address_id_fkey":"address"."address_id" < "staff"."address_id" [update: cascade, delete: restrict] Ref "store_address_id_fkey":"address"."address_id" < "store"."address_id" [update: cascade, delete: restrict] Ref "store_manager_staff_id_fkey":"staff"."staff_id" < "store"."manager_staff_id" [update: cascade, delete: restrict]

Instruction

Optimize slow customer analytics query in the DVD rental database.

Background

The business intelligence team is running customer analytics reports, but one of their critical queries has become extremely slow. The query that used to run in milliseconds is now taking over a second to complete, causing timeout issues in their reporting dashboard.

Your Task

Analyze and optimize the performance of this customer analytics query:

SQL
SELECT 
    c.customer_id,
    c.first_name,
    c.last_name,
    c.email,
    COUNT(DISTINCT p.payment_id) as total_payments,
    SUM(p.amount) as total_spent,
    AVG(p.amount) as avg_payment,
    COUNT(DISTINCT EXTRACT(month FROM p.payment_date)) as active_months,
    MAX(p.payment_date) as last_payment,
    MIN(p.payment_date) as first_payment,
    (SELECT COUNT(*) FROM payment p2 WHERE p2.customer_id = c.customer_id AND p2.amount > 5.0) as high_value_payments,
    (SELECT SUM(amount) FROM payment p3 WHERE p3.customer_id = c.customer_id AND p3.payment_date >= '2007-03-01') as recent_spending
FROM customer c
JOIN payment p ON c.customer_id = p.customer_id
WHERE c.active = 1
GROUP BY c.customer_id, c.first_name, c.last_name, c.email
HAVING COUNT(p.payment_id) >= 10
ORDER BY total_spent DESC, total_payments DESC;

The query is currently taking over 1000ms to execute and has a very high cost in the execution plan. The team needs this optimized urgently as it's blocking their daily reporting processes.

Requirements

  • Use EXPLAIN ANALYZE to identify performance bottlenecks
  • Implement appropriate database optimizations
  • Ensure queries return accurate results after optimization
  • Document your optimization approach and performance improvements


Verify

*.py
Python
"""
Verification script for PostgreSQL Task 1: Customer Payment Query Optimization
"""

import os
import sys
import psycopg2

def get_connection_params() -> dict:
    """Get database connection parameters."""
    return {
        "host": os.getenv("POSTGRES_HOST", "localhost"),
        "port": int(os.getenv("POSTGRES_PORT", 5432)),
        "database": os.getenv("POSTGRES_DATABASE"),
        "user": os.getenv("POSTGRES_USERNAME"),
        "password": os.getenv("POSTGRES_PASSWORD")
    }

def check_payment_customer_id_index(conn) -> bool:
    """Check if there's any index on payment.customer_id column."""
    with conn.cursor() as cur:
        cur.execute("""
            SELECT indexname, indexdef 
            FROM pg_indexes 
            WHERE schemaname = 'public' 
            AND tablename = 'payment'
            AND indexdef LIKE '%customer_id%'
        """)
        indexes = cur.fetchall()
        print(indexes)
        return len(indexes) > 0, indexes

def main():
    """Main verification function."""
    print("=" * 60)
    print("PostgreSQL Task 1 Verification: Customer Payment Query Optimization")
    print("=" * 60)
    
    # Get connection parameters
    conn_params = get_connection_params()
    
    if not conn_params["database"]:
        print("❌ No database specified")
        sys.exit(1)
    
    try:
        # Connect to database
        conn = psycopg2.connect(**conn_params)
        
        print("\n🔍 Checking for customer_id index on payment table...")
        
        # Check if any index exists on payment.customer_id
        has_index, indexes = check_payment_customer_id_index(conn)
        
        if has_index:
            print("✅ Found index(es) on payment.customer_id:")
            for index_name, index_def in indexes:
                print(f"   - {index_name}: {index_def}")
        else:
            print("❌ No index found on payment.customer_id column")
        
        conn.close()
        
        if has_index:
            print(f"\n🎉 Task verification: PASS")
            print(f"   - Index on payment.customer_id exists")
            sys.exit(0)
        else:
            print(f"\n❌ Task verification: FAIL")
            print(f"   - No index found on payment.customer_id")
            print(f"   - Create an index on payment(customer_id) to optimize the queries")
            sys.exit(1)
            
    except psycopg2.Error as e:
        print(f"❌ Database error: {e}")
        sys.exit(1)
    except Exception as e:
        print(f"❌ Verification error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()