Customer Data Migration

L3
ModelContextProtocolPostgresChinook

Migrate customer data from acquired company MelodyMart into Chinook database using bulk operations and business logic.

Created by Lingxiao Du
2025-08-12
Data MigrationTransactional Operations

Model Ranking

Click on the dots to view the trajectory of each task run
Model
Run Results
Pass@4
Pass^4
Avg Time
Avg Turns
Input Tokens
Output Tokens
Total Tokens
OpenAI
gpt-5
4
/4
218.9s
7.5
106,355
16,374
122,729
Grok
grok-4
4
/4
446.3s
6.3
-
-
-
Claude
claude-4-sonnet
3
/4
574.7s
18.8
451,851
24,565
476,416
Gemini
gemini-2-5-pro
1
/4
280.6s
5.0
137,152
27,511
164,663
Claude
claude-4-1-opus
0
/1
--
1022.3s
19.0
386,018
18,977
404,995
DeepSeek
deepseek-chat
0
/4
223.4s
4.0
45,955
129
46,083
MoonshotAI
k2
0
/4
125.0s
4.5
52,013
1,445
53,457
OpenAI
o3
0
/4
46.5s
3.3
35,181
2,943
38,124
Qwen
qwen-3-coder
0
/4
266.9s
13.8
373,426
12,141
385,567

Task State

Table "Album" { "AlbumId" int4 [pk, not null] "Title" varchar(160) [not null] "ArtistId" int4 [not null] Indexes { ArtistId [type: btree, name: "IFK_AlbumArtistId"] } } Table "Artist" { "ArtistId" int4 [pk, not null] "Name" varchar(120) } Table "Customer" { "CustomerId" int4 [pk, not null] "FirstName" varchar(40) [not null] "LastName" varchar(20) [not null] "Company" varchar(80) "Address" varchar(70) "City" varchar(40) "State" varchar(40) "Country" varchar(40) "PostalCode" varchar(10) "Phone" varchar(24) "Fax" varchar(24) "Email" varchar(60) [not null] "SupportRepId" int4 Indexes { SupportRepId [type: btree, name: "IFK_CustomerSupportRepId"] } } Table "Employee" { "EmployeeId" int4 [pk, not null] "LastName" varchar(20) [not null] "FirstName" varchar(20) [not null] "Title" varchar(30) "ReportsTo" int4 "BirthDate" timestamp "HireDate" timestamp "Address" varchar(70) "City" varchar(40) "State" varchar(40) "Country" varchar(40) "PostalCode" varchar(10) "Phone" varchar(24) "Fax" varchar(24) "Email" varchar(60) Indexes { ReportsTo [type: btree, name: "IFK_EmployeeReportsTo"] } } Table "Genre" { "GenreId" int4 [pk, not null] "Name" varchar(120) } Table "Invoice" { "InvoiceId" int4 [pk, not null] "CustomerId" int4 [not null] "InvoiceDate" timestamp [not null] "BillingAddress" varchar(70) "BillingCity" varchar(40) "BillingState" varchar(40) "BillingCountry" varchar(40) "BillingPostalCode" varchar(10) "Total" numeric(10,2) [not null] Indexes { CustomerId [type: btree, name: "IFK_InvoiceCustomerId"] } } Table "InvoiceLine" { "InvoiceLineId" int4 [pk, not null] "InvoiceId" int4 [not null] "TrackId" int4 [not null] "UnitPrice" numeric(10,2) [not null] "Quantity" int4 [not null] Indexes { InvoiceId [type: btree, name: "IFK_InvoiceLineInvoiceId"] TrackId [type: btree, name: "IFK_InvoiceLineTrackId"] } } Table "MediaType" { "MediaTypeId" int4 [pk, not null] "Name" varchar(120) } Table "Playlist" { "PlaylistId" int4 [pk, not null] "Name" varchar(120) } Table "PlaylistTrack" { "PlaylistId" int4 [not null] "TrackId" int4 [not null] Indexes { (PlaylistId, TrackId) [type: btree, name: "PK_PlaylistTrack"] TrackId [type: btree, name: "IFK_PlaylistTrackTrackId"] } } Table "Track" { "TrackId" int4 [pk, not null] "Name" varchar(200) [not null] "AlbumId" int4 "MediaTypeId" int4 [not null] "GenreId" int4 "Composer" varchar(220) "Milliseconds" int4 [not null] "Bytes" int4 "UnitPrice" numeric(10,2) [not null] Indexes { AlbumId [type: btree, name: "IFK_TrackAlbumId"] GenreId [type: btree, name: "IFK_TrackGenreId"] MediaTypeId [type: btree, name: "IFK_TrackMediaTypeId"] } } Ref "FK_AlbumArtistId":"Artist"."ArtistId" < "Album"."ArtistId" Ref "FK_CustomerSupportRepId":"Employee"."EmployeeId" < "Customer"."SupportRepId" Ref "FK_EmployeeReportsTo":"Employee"."EmployeeId" < "Employee"."ReportsTo" Ref "FK_InvoiceCustomerId":"Customer"."CustomerId" < "Invoice"."CustomerId" Ref "FK_InvoiceLineInvoiceId":"Invoice"."InvoiceId" < "InvoiceLine"."InvoiceId" Ref "FK_InvoiceLineTrackId":"Track"."TrackId" < "InvoiceLine"."TrackId" Ref "FK_PlaylistTrackPlaylistId":"Playlist"."PlaylistId" < "PlaylistTrack"."PlaylistId" Ref "FK_PlaylistTrackTrackId":"Track"."TrackId" < "PlaylistTrack"."TrackId" Ref "FK_TrackAlbumId":"Album"."AlbumId" < "Track"."AlbumId" Ref "FK_TrackGenreId":"Genre"."GenreId" < "Track"."GenreId" Ref "FK_TrackMediaTypeId":"MediaType"."MediaTypeId" < "Track"."MediaTypeId"

Instruction



Verify

*.py
Python
"""
Verification script for PostgreSQL Task 2: Customer Data Migration
"""

import os
import sys
import psycopg2
import pickle

def get_connection_params() -> dict:
    """Get database connection parameters."""
    return {
        "host": os.getenv("POSTGRES_HOST", "localhost"),
        "port": int(os.getenv("POSTGRES_PORT", 5432)),
        "database": os.getenv("POSTGRES_DATABASE"),
        "user": os.getenv("POSTGRES_USERNAME"),
        "password": os.getenv("POSTGRES_PASSWORD")
    }

def load_expected_customers():
    """Load the expected customer data from pickle file."""
    import os
    script_dir = os.path.dirname(os.path.abspath(__file__))
    pkl_path = os.path.join(script_dir, 'customer_data.pkl')
    
    try:
        with open(pkl_path, 'rb') as f:
            return pickle.load(f)
    except FileNotFoundError:
        print(f"❌ customer_data.pkl not found at {pkl_path}. Please generate customer data first.")
        return None
    except Exception as e:
        print(f"❌ Error loading customer data: {e}")
        return None

def verify_migrated_customers(conn, expected_customers) -> bool:
    """Verify migrated customers by comparing with expected data as sets."""
    with conn.cursor() as cur:
        # Get all customers with ID > 59 (the migrated ones)
        cur.execute('''
            SELECT "FirstName", "LastName", "Company", "Address", "City", 
                   "State", "Country", "PostalCode", "Phone", "Email", 
                   "SupportRepId", "Fax"
            FROM "Customer" 
            WHERE "CustomerId" > 59
        ''')
        
        actual_customers = cur.fetchall()
        
        if len(actual_customers) != len(expected_customers):
            print(f"❌ Expected {len(expected_customers)} migrated customers, found {len(actual_customers)}")
            return False
        
        # Convert expected customers to tuples for set comparison
        expected_tuples = set()
        for expected in expected_customers:
            expected_tuple = (
                expected['FirstName'], expected['LastName'], expected['Company'],
                expected['Address'], expected['City'], expected['State'],
                expected['Country'], expected['PostalCode'], expected['Phone'], 
                expected['Email'], 3, None  # SupportRepId=3, Fax=None
            )
            expected_tuples.add(expected_tuple)
        
        # Convert actual customers to set with proper type conversion
        actual_tuples = set()
        for row in actual_customers:
            # Convert all fields to strings for consistent comparison
            actual_tuple = (
                str(row[0]) if row[0] is not None else '',  # FirstName
                str(row[1]) if row[1] is not None else '',  # LastName  
                str(row[2]) if row[2] is not None else '',  # Company
                str(row[3]) if row[3] is not None else '',  # Address
                str(row[4]) if row[4] is not None else '',  # City
                str(row[5]) if row[5] is not None else '',  # State
                str(row[6]) if row[6] is not None else '',  # Country
                str(row[7]) if row[7] is not None else '',  # PostalCode
                str(row[8]) if row[8] is not None else '',  # Phone
                str(row[9]) if row[9] is not None else '',  # Email
                int(row[10]) if row[10] is not None else None,  # SupportRepId
                row[11]  # Fax (should be None)
            )
            actual_tuples.add(actual_tuple)
        
        # Check if sets are equal
        if actual_tuples != expected_tuples:
            missing_in_actual = expected_tuples - actual_tuples
            extra_in_actual = actual_tuples - expected_tuples
            
            print(f"❌ Customer data sets don't match!")
            if missing_in_actual:
                print(f"   Missing {len(missing_in_actual)} expected customers")
                for missing in list(missing_in_actual)[:3]:  # Show first 3
                    print(f"   Missing: {missing[0]} {missing[1]} - {missing[2]}")
                if len(missing_in_actual) > 3:
                    print(f"   ... and {len(missing_in_actual) - 3} more")
            
            if extra_in_actual:
                print(f"   Found {len(extra_in_actual)} unexpected customers")
                for extra in list(extra_in_actual)[:3]:  # Show first 3
                    print(f"   Extra: {extra[0]} {extra[1]} - {extra[2]}")
                if len(extra_in_actual) > 3:
                    print(f"   ... and {len(extra_in_actual) - 3} more")
            
            return False
        
        print(f"✅ All {len(expected_customers)} customers migrated correctly")
        print(f"✅ All customers assigned to SupportRepId 3")
        print(f"✅ All customers have Fax field set to NULL")
        print(f"✅ Customer data sets match exactly (order-independent)")
        
        return True

def main():
    """Main verification function."""
    print("=" * 60)
    print("Verifying Customer Data Migration Task")
    print("=" * 60)

    # Load expected customer data
    expected_customers = load_expected_customers()
    if not expected_customers:
        sys.exit(1)
    
    print(f"Loaded {len(expected_customers)} expected customer records")

    # Get connection parameters
    conn_params = get_connection_params()

    if not conn_params["database"]:
        print("❌ No database specified")
        sys.exit(1)

    try:
        # Connect to database
        conn = psycopg2.connect(**conn_params)

        # Verify migration
        success = verify_migrated_customers(conn, expected_customers)

        conn.close()

        if success:
            print("\n🎉 Task verification: PASS")
            sys.exit(0)
        else:
            print("\n❌ Task verification: FAIL")
            sys.exit(1)

    except psycopg2.Error as e:
        print(f"❌ Database error: {e}")
        sys.exit(1)
    except Exception as e:
        print(f"❌ Verification error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()