Customer Analytics Optimization
L3
PostgresDVD Rental
Optimize slow customer analytics query with correlated subqueries causing timeout issues in reporting dashboard.
Created by Lingxiao Du
2025-08-20
Performance Optimization
Model Ranking
Click on the dots to view the trajectory of each task run
Task State
Enum "mpaa_rating" {
"G"
"PG"
"PG-13"
"R"
"NC-17"
}
Table "customer" {
"customer_id" int4 [pk, not null, increment]
"store_id" int2 [not null]
"first_name" varchar(45) [not null]
"last_name" varchar(45) [not null]
"email" varchar(50)
"address_id" int2 [not null]
"activebool" bool [not null, default: true]
"create_date" date [not null, default: `('now'::text)::date`]
"last_update" timestamp [default: `now()`]
"active" int4
Indexes {
address_id [type: btree, name: "idx_fk_address_id"]
store_id [type: btree, name: "idx_fk_store_id"]
last_name [type: btree, name: "idx_last_name"]
}
}
Table "actor" {
"actor_id" int4 [pk, not null, increment]
"first_name" varchar(45) [not null]
"last_name" varchar(45) [not null]
"last_update" timestamp [not null, default: `now()`]
Indexes {
last_name [type: btree, name: "idx_actor_last_name"]
}
}
Table "category" {
"category_id" int4 [pk, not null, increment]
"name" varchar(25) [not null]
"last_update" timestamp [not null, default: `now()`]
}
Table "film" {
"film_id" int4 [pk, not null, increment]
"title" varchar(255) [not null]
"description" text
"release_year" int4
"language_id" int2 [not null]
"rental_duration" int2 [not null, default: 3]
"rental_rate" numeric(4,2) [not null, default: 4.99]
"length" int2
"replacement_cost" numeric(5,2) [not null, default: 19.99]
"rating" mpaa_rating [default: 'G']
"last_update" timestamp [not null, default: `now()`]
"special_features" "text[]"
"fulltext" tsvector [not null]
Indexes {
fulltext [type: gist, name: "film_fulltext_idx"]
language_id [type: btree, name: "idx_fk_language_id"]
title [type: btree, name: "idx_title"]
}
}
Table "film_actor" {
"actor_id" int2 [not null]
"film_id" int2 [not null]
"last_update" timestamp [not null, default: `now()`]
Indexes {
(actor_id, film_id) [type: btree, name: "film_actor_pkey"]
film_id [type: btree, name: "idx_fk_film_id"]
}
}
Table "film_category" {
"film_id" int2 [not null]
"category_id" int2 [not null]
"last_update" timestamp [not null, default: `now()`]
Indexes {
(film_id, category_id) [type: btree, name: "film_category_pkey"]
}
}
Table "address" {
"address_id" int4 [pk, not null, increment]
"address" varchar(50) [not null]
"address2" varchar(50)
"district" varchar(20) [not null]
"city_id" int2 [not null]
"postal_code" varchar(10)
"phone" varchar(20) [not null]
"last_update" timestamp [not null, default: `now()`]
Indexes {
city_id [type: btree, name: "idx_fk_city_id"]
}
}
Table "city" {
"city_id" int4 [pk, not null, increment]
"city" varchar(50) [not null]
"country_id" int2 [not null]
"last_update" timestamp [not null, default: `now()`]
Indexes {
country_id [type: btree, name: "idx_fk_country_id"]
}
}
Table "country" {
"country_id" int4 [pk, not null, increment]
"country" varchar(50) [not null]
"last_update" timestamp [not null, default: `now()`]
}
Table "inventory" {
"inventory_id" int4 [pk, not null, increment]
"film_id" int2 [not null]
"store_id" int2 [not null]
"last_update" timestamp [not null, default: `now()`]
Indexes {
(store_id, film_id) [type: btree, name: "idx_store_id_film_id"]
}
}
Table "language" {
"language_id" int4 [pk, not null, increment]
"name" bpchar(20) [not null]
"last_update" timestamp [not null, default: `now()`]
}
Table "payment" {
"payment_id" int4 [pk, not null, increment]
"customer_id" int2 [not null]
"staff_id" int2 [not null]
"rental_id" int4 [not null]
"amount" numeric(5,2) [not null]
"payment_date" timestamp [not null]
Indexes {
rental_id [type: btree, name: "idx_fk_rental_id"]
staff_id [type: btree, name: "idx_fk_staff_id"]
}
}
Table "rental" {
"rental_id" int4 [pk, not null, increment]
"rental_date" timestamp [not null]
"inventory_id" int4 [not null]
"customer_id" int2 [not null]
"return_date" timestamp
"staff_id" int2 [not null]
"last_update" timestamp [not null, default: `now()`]
Indexes {
(rental_date, inventory_id, customer_id) [type: btree, name: "idx_unq_rental_rental_date_inventory_id_customer_id"]
inventory_id [type: btree, name: "idx_fk_inventory_id"]
}
}
Table "staff" {
"staff_id" int4 [pk, not null, increment]
"first_name" varchar(45) [not null]
"last_name" varchar(45) [not null]
"address_id" int2 [not null]
"email" varchar(50)
"store_id" int2 [not null]
"active" bool [not null, default: true]
"username" varchar(16) [not null]
"password" varchar(40)
"last_update" timestamp [not null, default: `now()`]
"picture" bytea
}
Table "store" {
"store_id" int4 [pk, not null, increment]
"manager_staff_id" int2 [unique, not null]
"address_id" int2 [not null]
"last_update" timestamp [not null, default: `now()`]
}
Ref "fk_address_city":"city"."city_id" < "address"."city_id"
Ref "fk_city":"country"."country_id" < "city"."country_id"
Ref "customer_address_id_fkey":"address"."address_id" < "customer"."address_id" [update: cascade, delete: restrict]
Ref "film_language_id_fkey":"language"."language_id" < "film"."language_id" [update: cascade, delete: restrict]
Ref "film_actor_actor_id_fkey":"actor"."actor_id" < "film_actor"."actor_id" [update: cascade, delete: restrict]
Ref "film_actor_film_id_fkey":"film"."film_id" < "film_actor"."film_id" [update: cascade, delete: restrict]
Ref "film_category_category_id_fkey":"category"."category_id" < "film_category"."category_id" [update: cascade, delete: restrict]
Ref "film_category_film_id_fkey":"film"."film_id" < "film_category"."film_id" [update: cascade, delete: restrict]
Ref "inventory_film_id_fkey":"film"."film_id" < "inventory"."film_id" [update: cascade, delete: restrict]
Ref "payment_customer_id_fkey":"customer"."customer_id" < "payment"."customer_id" [update: cascade, delete: restrict]
Ref "payment_rental_id_fkey":"rental"."rental_id" < "payment"."rental_id" [update: cascade, delete: set null]
Ref "payment_staff_id_fkey":"staff"."staff_id" < "payment"."staff_id" [update: cascade, delete: restrict]
Ref "rental_customer_id_fkey":"customer"."customer_id" < "rental"."customer_id" [update: cascade, delete: restrict]
Ref "rental_inventory_id_fkey":"inventory"."inventory_id" < "rental"."inventory_id" [update: cascade, delete: restrict]
Ref "rental_staff_id_key":"staff"."staff_id" < "rental"."staff_id"
Ref "staff_address_id_fkey":"address"."address_id" < "staff"."address_id" [update: cascade, delete: restrict]
Ref "store_address_id_fkey":"address"."address_id" < "store"."address_id" [update: cascade, delete: restrict]
Ref "store_manager_staff_id_fkey":"staff"."staff_id" < "store"."manager_staff_id" [update: cascade, delete: restrict]
Instruction
Optimize slow customer analytics query in the DVD rental database.
Background
The business intelligence team is running customer analytics reports, but one of their critical queries has become extremely slow. The query that used to run in milliseconds is now taking over a second to complete, causing timeout issues in their reporting dashboard.
Your Task
Analyze and optimize the performance of this customer analytics query:
SELECT
c.customer_id,
c.first_name,
c.last_name,
c.email,
COUNT(DISTINCT p.payment_id) as total_payments,
SUM(p.amount) as total_spent,
AVG(p.amount) as avg_payment,
COUNT(DISTINCT EXTRACT(month FROM p.payment_date)) as active_months,
MAX(p.payment_date) as last_payment,
MIN(p.payment_date) as first_payment,
(SELECT COUNT(*) FROM payment p2 WHERE p2.customer_id = c.customer_id AND p2.amount > 5.0) as high_value_payments,
(SELECT SUM(amount) FROM payment p3 WHERE p3.customer_id = c.customer_id AND p3.payment_date >= '2007-03-01') as recent_spending
FROM customer c
JOIN payment p ON c.customer_id = p.customer_id
WHERE c.active = 1
GROUP BY c.customer_id, c.first_name, c.last_name, c.email
HAVING COUNT(p.payment_id) >= 10
ORDER BY total_spent DESC, total_payments DESC;The query is currently taking over 1000ms to execute and has a very high cost in the execution plan. The team needs this optimized urgently as it's blocking their daily reporting processes.
Requirements
- Use
EXPLAIN ANALYZEto identify performance bottlenecks - Implement appropriate database optimizations
- Ensure queries return accurate results after optimization
- Document your optimization approach and performance improvements
Verify
Python