From eccb3033b0519365bd9eed2be935f4c1f5f230d3 Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Tue, 12 Aug 2025 17:16:44 -0600 Subject: [PATCH] make the load test contain an insane amount of data --- dev/docker_psql_load_test_init/1_schema.sql | 663 +++++++++++++++++++- dev/docker_psql_load_test_init/2_data.sql | 404 +++++++++++- docker-compose.yml | 2 + 3 files changed, 1040 insertions(+), 29 deletions(-) diff --git a/dev/docker_psql_load_test_init/1_schema.sql b/dev/docker_psql_load_test_init/1_schema.sql index 6b68139ff..fd94dccae 100644 --- a/dev/docker_psql_load_test_init/1_schema.sql +++ b/dev/docker_psql_load_test_init/1_schema.sql @@ -1,17 +1,652 @@ -SELECT format('CREATE TABLE load_table_%s ( - id SERIAL PRIMARY KEY, - char_name TEXT NOT NULL DEFAULT ''Cloud Strife'', - job TEXT NOT NULL DEFAULT ''Mercenary'', - level INT NOT NULL DEFAULT 1, - weapon TEXT NOT NULL DEFAULT ''Buster Sword'', - spell TEXT NOT NULL DEFAULT ''Firaga'', - hp INT NOT NULL DEFAULT 1000, - mp INT NOT NULL DEFAULT 500, - location TEXT NOT NULL DEFAULT ''Midgar'', - quest TEXT NOT NULL DEFAULT ''Save the Planet'' -);', i) -FROM generate_series(1, 15000) AS s(i) +-- PostgreSQL 15 INSANE Load Test Schema +-- 200+ schemas, thousands of tables, complex foreign key relationships +-- This will absolutely destroy any UI that can't handle massive datasets + +-- ============================================================================= +-- GENERATE 200+ BUSINESS DOMAIN SCHEMAS +-- ============================================================================= + +\echo '=========================================' +\echo 'PHASE 1: Creating 170+ business schemas...' +\echo '=========================================' + +-- Create schemas dynamically +SELECT 'CREATE SCHEMA IF NOT EXISTS ' || schema_name || ';' +FROM ( + VALUES + -- Business domains (50 schemas) + ('accounting'), ('advertising'), ('aerospace'), ('agriculture'), ('analytics'), + ('architecture'), ('automotive'), ('aviation'), ('banking'), ('biotechnology'), + ('blockchain'), ('broadcasting'), ('construction'), ('consulting'), ('cybersecurity'), + ('defense'), ('ecommerce'), ('education'), ('energy'), ('entertainment'), + ('fashion'), ('finance'), ('fitness'), ('food_service'), ('gaming'), + ('healthcare'), ('hospitality'), ('hr_management'), ('insurance'), ('iot'), + ('legal'), ('logistics'), ('manufacturing'), ('marketing'), ('media'), + ('mining'), ('music'), ('nonprofit'), ('pharmaceuticals'), ('real_estate'), + ('retail'), ('robotics'), ('social_media'), ('software'), ('sports'), + ('telecommunications'), ('tourism'), ('transportation'), ('utilities'), ('veterinary'), + + -- Geographic regions (50 schemas) + ('region_north_america'), ('region_south_america'), ('region_europe'), ('region_asia'), ('region_africa'), + ('region_oceania'), ('country_usa'), ('country_canada'), ('country_mexico'), ('country_brazil'), + ('country_argentina'), ('country_uk'), ('country_france'), ('country_germany'), ('country_spain'), + ('country_italy'), ('country_russia'), ('country_china'), ('country_japan'), ('country_india'), + ('country_australia'), ('country_south_africa'), ('state_california'), ('state_texas'), ('state_newyork'), + ('state_florida'), ('city_london'), ('city_paris'), ('city_tokyo'), ('city_sydney'), + ('city_dubai'), ('city_singapore'), ('city_mumbai'), ('city_toronto'), ('city_berlin'), + ('city_madrid'), ('city_rome'), ('city_moscow'), ('city_beijing'), ('city_seoul'), + ('zone_americas'), ('zone_emea'), ('zone_apac'), ('zone_latam'), ('zone_mena'), + ('district_north'), ('district_south'), ('district_east'), ('district_west'), ('district_central'), + + -- Organizational departments (50 schemas) + ('dept_executive'), ('dept_operations'), ('dept_finance'), ('dept_hr'), ('dept_it'), + ('dept_sales'), ('dept_marketing'), ('dept_engineering'), ('dept_design'), ('dept_product'), + ('dept_customer_service'), ('dept_legal'), ('dept_compliance'), ('dept_security'), ('dept_facilities'), + ('dept_procurement'), ('dept_logistics'), ('dept_warehouse'), ('dept_manufacturing'), ('dept_quality'), + ('dept_research'), ('dept_development'), ('dept_innovation'), ('dept_training'), ('dept_consulting'), + ('dept_business_intel'), ('dept_data_science'), ('dept_analytics'), ('dept_reporting'), ('dept_audit'), + ('division_consumer'), ('division_enterprise'), ('division_government'), ('division_education'), ('division_healthcare'), + ('unit_mobile'), ('unit_web'), ('unit_cloud'), ('unit_ai'), ('unit_blockchain'), + ('team_backend'), ('team_frontend'), ('team_devops'), ('team_qa'), ('team_ux'), + ('squad_alpha'), ('squad_beta'), ('squad_gamma'), ('squad_delta'), ('squad_epsilon'), + + -- System/Technical schemas (50 schemas) + ('sys_monitoring'), ('sys_logging'), ('sys_metrics'), ('sys_alerts'), ('sys_backups'), + ('sys_security'), ('sys_audit'), ('sys_config'), ('sys_cache'), ('sys_queue'), + ('api_v1'), ('api_v2'), ('api_v3'), ('api_internal'), ('api_external'), + ('microservice_auth'), ('microservice_user'), ('microservice_order'), ('microservice_payment'), ('microservice_notification'), + ('microservice_inventory'), ('microservice_catalog'), ('microservice_review'), ('microservice_shipping'), ('microservice_analytics'), + ('data_raw'), ('data_processed'), ('data_aggregated'), ('data_archive'), ('data_temp'), + ('etl_staging'), ('etl_transform'), ('etl_load'), ('warehouse_dim'), ('warehouse_fact'), + ('ml_training'), ('ml_inference'), ('ml_models'), ('ml_features'), ('ml_experiments'), + ('event_sourcing'), ('event_streams'), ('event_snapshots'), ('batch_processing'), ('realtime_processing'), + ('integration_crm'), ('integration_erp'), ('integration_payment'), ('integration_shipping'), ('integration_email'), + ('archive_2020'), ('archive_2021'), ('archive_2022'), ('archive_2023'), ('archive_2024'), + + -- Industry verticals (20 schemas) + ('vertical_retail'), ('vertical_manufacturing'), ('vertical_healthcare'), ('vertical_education'), ('vertical_government'), + ('vertical_nonprofit'), ('vertical_startup'), ('vertical_enterprise'), ('vertical_smb'), ('vertical_freelancer'), + ('industry_fintech'), ('industry_edtech'), ('industry_healthtech'), ('industry_proptech'), ('industry_agtech'), + ('industry_cleantech'), ('industry_biotech'), ('industry_martech'), ('industry_hrtech'), ('industry_legaltech') +) AS schemas(schema_name) \gexec --- We just need a very big amount of tables to make things funky in the UI so for now I am not worrying so much about their content +-- ============================================================================= +-- CORE ENTITY TABLES IN EACH SCHEMA (Base tables that others reference) +-- ============================================================================= +\echo '=========================================' +\echo 'PHASE 2: Creating core entity tables in each schema...' +\echo 'This creates 4 tables per schema (680+ tables total)' +\echo '=========================================' + +-- Create core entity tables in each schema +SELECT +'-- Core entities for schema: ' || schema_name || E'\n' || +'CREATE TABLE ' || schema_name || '.organizations (' || E'\n' || +' id SERIAL PRIMARY KEY,' || E'\n' || +' name VARCHAR(200) NOT NULL,' || E'\n' || +' code VARCHAR(50) UNIQUE NOT NULL,' || E'\n' || +' type VARCHAR(50),' || E'\n' || +' status VARCHAR(20) DEFAULT ''active'',' || E'\n' || +' parent_org_id INT,' || E'\n' || +' created_at TIMESTAMP DEFAULT NOW(),' || E'\n' || +' updated_at TIMESTAMP DEFAULT NOW()' || E'\n' || +');' || E'\n\n' || +'CREATE TABLE ' || schema_name || '.locations (' || E'\n' || +' id SERIAL PRIMARY KEY,' || E'\n' || +' name VARCHAR(150) NOT NULL,' || E'\n' || +' address TEXT,' || E'\n' || +' city VARCHAR(100),' || E'\n' || +' region VARCHAR(100),' || E'\n' || +' country VARCHAR(100),' || E'\n' || +' postal_code VARCHAR(20),' || E'\n' || +' coordinates POINT,' || E'\n' || +' timezone VARCHAR(50),' || E'\n' || +' organization_id INT REFERENCES ' || schema_name || '.organizations(id),' || E'\n' || +' created_at TIMESTAMP DEFAULT NOW()' || E'\n' || +');' || E'\n\n' || +'CREATE TABLE ' || schema_name || '.users (' || E'\n' || +' id SERIAL PRIMARY KEY,' || E'\n' || +' username VARCHAR(100) UNIQUE NOT NULL,' || E'\n' || +' email VARCHAR(200) UNIQUE NOT NULL,' || E'\n' || +' first_name VARCHAR(100),' || E'\n' || +' last_name VARCHAR(100),' || E'\n' || +' role VARCHAR(50),' || E'\n' || +' department VARCHAR(100),' || E'\n' || +' organization_id INT REFERENCES ' || schema_name || '.organizations(id),' || E'\n' || +' location_id INT REFERENCES ' || schema_name || '.locations(id),' || E'\n' || +' manager_id INT REFERENCES ' || schema_name || '.users(id),' || E'\n' || +' hire_date DATE,' || E'\n' || +' is_active BOOLEAN DEFAULT TRUE,' || E'\n' || +' last_login TIMESTAMP,' || E'\n' || +' created_at TIMESTAMP DEFAULT NOW(),' || E'\n' || +' updated_at TIMESTAMP DEFAULT NOW()' || E'\n' || +');' || E'\n\n' || +'CREATE TABLE ' || schema_name || '.categories (' || E'\n' || +' id SERIAL PRIMARY KEY,' || E'\n' || +' name VARCHAR(150) NOT NULL,' || E'\n' || +' description TEXT,' || E'\n' || +' parent_category_id INT REFERENCES ' || schema_name || '.categories(id),' || E'\n' || +' hierarchy_path VARCHAR(500),' || E'\n' || +' level_depth INT DEFAULT 1,' || E'\n' || +' sort_order INT DEFAULT 0,' || E'\n' || +' is_active BOOLEAN DEFAULT TRUE,' || E'\n' || +' organization_id INT REFERENCES ' || schema_name || '.organizations(id),' || E'\n' || +' created_by INT REFERENCES ' || schema_name || '.users(id),' || E'\n' || +' created_at TIMESTAMP DEFAULT NOW()' || E'\n' || +');' +FROM ( + VALUES + ('accounting'), ('advertising'), ('aerospace'), ('agriculture'), ('analytics'), + ('architecture'), ('automotive'), ('aviation'), ('banking'), ('biotechnology'), + ('blockchain'), ('broadcasting'), ('construction'), ('consulting'), ('cybersecurity'), + ('defense'), ('ecommerce'), ('education'), ('energy'), ('entertainment'), + ('fashion'), ('finance'), ('fitness'), ('food_service'), ('gaming'), + ('healthcare'), ('hospitality'), ('hr_management'), ('insurance'), ('iot'), + ('legal'), ('logistics'), ('manufacturing'), ('marketing'), ('media'), + ('mining'), ('music'), ('nonprofit'), ('pharmaceuticals'), ('real_estate'), + ('retail'), ('robotics'), ('social_media'), ('software'), ('sports'), + ('telecommunications'), ('tourism'), ('transportation'), ('utilities'), ('veterinary'), + ('region_north_america'), ('region_south_america'), ('region_europe'), ('region_asia'), ('region_africa'), + ('region_oceania'), ('country_usa'), ('country_canada'), ('country_mexico'), ('country_brazil'), + ('country_argentina'), ('country_uk'), ('country_france'), ('country_germany'), ('country_spain'), + ('country_italy'), ('country_russia'), ('country_china'), ('country_japan'), ('country_india'), + ('country_australia'), ('country_south_africa'), ('state_california'), ('state_texas'), ('state_newyork'), + ('state_florida'), ('city_london'), ('city_paris'), ('city_tokyo'), ('city_sydney'), + ('city_dubai'), ('city_singapore'), ('city_mumbai'), ('city_toronto'), ('city_berlin'), + ('city_madrid'), ('city_rome'), ('city_moscow'), ('city_beijing'), ('city_seoul'), + ('zone_americas'), ('zone_emea'), ('zone_apac'), ('zone_latam'), ('zone_mena'), + ('district_north'), ('district_south'), ('district_east'), ('district_west'), ('district_central'), + ('dept_executive'), ('dept_operations'), ('dept_finance'), ('dept_hr'), ('dept_it'), + ('dept_sales'), ('dept_marketing'), ('dept_engineering'), ('dept_design'), ('dept_product'), + ('dept_customer_service'), ('dept_legal'), ('dept_compliance'), ('dept_security'), ('dept_facilities'), + ('dept_procurement'), ('dept_logistics'), ('dept_warehouse'), ('dept_manufacturing'), ('dept_quality'), + ('dept_research'), ('dept_development'), ('dept_innovation'), ('dept_training'), ('dept_consulting'), + ('dept_business_intel'), ('dept_data_science'), ('dept_analytics'), ('dept_reporting'), ('dept_audit'), + ('division_consumer'), ('division_enterprise'), ('division_government'), ('division_education'), ('division_healthcare'), + ('unit_mobile'), ('unit_web'), ('unit_cloud'), ('unit_ai'), ('unit_blockchain'), + ('team_backend'), ('team_frontend'), ('team_devops'), ('team_qa'), ('team_ux'), + ('squad_alpha'), ('squad_beta'), ('squad_gamma'), ('squad_delta'), ('squad_epsilon'), + ('sys_monitoring'), ('sys_logging'), ('sys_metrics'), ('sys_alerts'), ('sys_backups'), + ('sys_security'), ('sys_audit'), ('sys_config'), ('sys_cache'), ('sys_queue'), + ('api_v1'), ('api_v2'), ('api_v3'), ('api_internal'), ('api_external'), + ('microservice_auth'), ('microservice_user'), ('microservice_order'), ('microservice_payment'), ('microservice_notification'), + ('microservice_inventory'), ('microservice_catalog'), ('microservice_review'), ('microservice_shipping'), ('microservice_analytics'), + ('data_raw'), ('data_processed'), ('data_aggregated'), ('data_archive'), ('data_temp'), + ('etl_staging'), ('etl_transform'), ('etl_load'), ('warehouse_dim'), ('warehouse_fact'), + ('ml_training'), ('ml_inference'), ('ml_models'), ('ml_features'), ('ml_experiments'), + ('event_sourcing'), ('event_streams'), ('event_snapshots'), ('batch_processing'), ('realtime_processing'), + ('integration_crm'), ('integration_erp'), ('integration_payment'), ('integration_shipping'), ('integration_email'), + ('archive_2020'), ('archive_2021'), ('archive_2022'), ('archive_2023'), ('archive_2024'), + ('vertical_retail'), ('vertical_manufacturing'), ('vertical_healthcare'), ('vertical_education'), ('vertical_government'), + ('vertical_nonprofit'), ('vertical_startup'), ('vertical_enterprise'), ('vertical_smb'), ('vertical_freelancer'), + ('industry_fintech'), ('industry_edtech'), ('industry_healthtech'), ('industry_proptech'), ('industry_agtech'), + ('industry_cleantech'), ('industry_biotech'), ('industry_martech'), ('industry_hrtech'), ('industry_legaltech') +) AS schemas(schema_name) +\gexec + +-- ============================================================================= +-- BUSINESS DOMAIN SPECIFIC TABLES +-- ============================================================================= + +-- ECOMMERCE DOMAIN TABLES +CREATE TABLE ecommerce.products ( + id SERIAL PRIMARY KEY, + sku VARCHAR(100) UNIQUE NOT NULL, + name VARCHAR(300) NOT NULL, + description TEXT, + category_id INT REFERENCES ecommerce.categories(id), + organization_id INT REFERENCES ecommerce.organizations(id), + brand VARCHAR(100), + price DECIMAL(12,2), + cost DECIMAL(12,2), + weight DECIMAL(10,3), + dimensions VARCHAR(100), + inventory_count INT DEFAULT 0, + min_stock_level INT DEFAULT 0, + is_digital BOOLEAN DEFAULT FALSE, + is_active BOOLEAN DEFAULT TRUE, + created_by INT REFERENCES ecommerce.users(id), + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +CREATE TABLE ecommerce.customers ( + id SERIAL PRIMARY KEY, + customer_number VARCHAR(50) UNIQUE NOT NULL, + email VARCHAR(200) UNIQUE NOT NULL, + first_name VARCHAR(100), + last_name VARCHAR(100), + phone VARCHAR(50), + birth_date DATE, + organization_id INT REFERENCES ecommerce.organizations(id), + location_id INT REFERENCES ecommerce.locations(id), + customer_since DATE DEFAULT CURRENT_DATE, + lifetime_value DECIMAL(15,2) DEFAULT 0, + loyalty_points INT DEFAULT 0, + preferred_language VARCHAR(10) DEFAULT 'en', + marketing_opt_in BOOLEAN DEFAULT TRUE, + is_vip BOOLEAN DEFAULT FALSE, + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +CREATE TABLE ecommerce.orders ( + id SERIAL PRIMARY KEY, + order_number VARCHAR(100) UNIQUE NOT NULL, + customer_id INT REFERENCES ecommerce.customers(id), + organization_id INT REFERENCES ecommerce.organizations(id), + location_id INT REFERENCES ecommerce.locations(id), + order_date TIMESTAMP DEFAULT NOW(), + status VARCHAR(50) DEFAULT 'pending', + subtotal DECIMAL(12,2) DEFAULT 0, + tax_amount DECIMAL(10,2) DEFAULT 0, + shipping_cost DECIMAL(10,2) DEFAULT 0, + discount_amount DECIMAL(10,2) DEFAULT 0, + total_amount DECIMAL(12,2) DEFAULT 0, + currency VARCHAR(3) DEFAULT 'USD', + payment_method VARCHAR(50), + payment_status VARCHAR(30) DEFAULT 'pending', + shipping_method VARCHAR(100), + tracking_number VARCHAR(200), + notes TEXT, + processed_by INT REFERENCES ecommerce.users(id), + shipped_date TIMESTAMP, + delivered_date TIMESTAMP, + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +CREATE TABLE ecommerce.order_items ( + id SERIAL PRIMARY KEY, + order_id INT REFERENCES ecommerce.orders(id) ON DELETE CASCADE, + product_id INT REFERENCES ecommerce.products(id), + quantity INT NOT NULL CHECK (quantity > 0), + unit_price DECIMAL(10,2) NOT NULL, + total_price DECIMAL(12,2) NOT NULL, + discount_percent DECIMAL(5,2) DEFAULT 0, + tax_rate DECIMAL(5,4) DEFAULT 0, + created_at TIMESTAMP DEFAULT NOW() +); + +-- HEALTHCARE DOMAIN TABLES +CREATE TABLE healthcare.patients ( + id SERIAL PRIMARY KEY, + patient_id VARCHAR(50) UNIQUE NOT NULL, + first_name VARCHAR(100) NOT NULL, + last_name VARCHAR(100) NOT NULL, + date_of_birth DATE NOT NULL, + gender VARCHAR(20), + blood_type VARCHAR(5), + phone VARCHAR(50), + email VARCHAR(200), + organization_id INT REFERENCES healthcare.organizations(id), + location_id INT REFERENCES healthcare.locations(id), + primary_doctor_id INT REFERENCES healthcare.users(id), + insurance_number VARCHAR(100), + emergency_contact VARCHAR(200), + emergency_phone VARCHAR(50), + allergies TEXT[], + chronic_conditions TEXT[], + preferred_language VARCHAR(10) DEFAULT 'en', + is_active BOOLEAN DEFAULT TRUE, + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +CREATE TABLE healthcare.appointments ( + id SERIAL PRIMARY KEY, + patient_id INT REFERENCES healthcare.patients(id), + doctor_id INT REFERENCES healthcare.users(id), + organization_id INT REFERENCES healthcare.organizations(id), + location_id INT REFERENCES healthcare.locations(id), + appointment_date TIMESTAMP NOT NULL, + duration_minutes INT DEFAULT 30, + appointment_type VARCHAR(100), + status VARCHAR(30) DEFAULT 'scheduled', + reason TEXT, + diagnosis TEXT, + treatment_plan TEXT, + follow_up_required BOOLEAN DEFAULT FALSE, + follow_up_date DATE, + prescription TEXT, + notes TEXT, + created_by INT REFERENCES healthcare.users(id), + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +CREATE TABLE healthcare.medical_records ( + id SERIAL PRIMARY KEY, + patient_id INT REFERENCES healthcare.patients(id), + appointment_id INT REFERENCES healthcare.appointments(id), + doctor_id INT REFERENCES healthcare.users(id), + organization_id INT REFERENCES healthcare.organizations(id), + visit_date TIMESTAMP NOT NULL, + chief_complaint TEXT, + symptoms TEXT[], + diagnosis TEXT, + treatment TEXT, + medications TEXT[], + lab_results JSONB, + vitals JSONB, + follow_up_instructions TEXT, + created_by INT REFERENCES healthcare.users(id), + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +-- FINANCIAL DOMAIN TABLES +CREATE TABLE finance.accounts ( + id SERIAL PRIMARY KEY, + account_number VARCHAR(50) UNIQUE NOT NULL, + account_name VARCHAR(200) NOT NULL, + account_type VARCHAR(50) NOT NULL, + organization_id INT REFERENCES finance.organizations(id), + location_id INT REFERENCES finance.locations(id), + parent_account_id INT REFERENCES finance.accounts(id), + balance DECIMAL(15,2) DEFAULT 0, + currency VARCHAR(3) DEFAULT 'USD', + is_active BOOLEAN DEFAULT TRUE, + opened_date DATE DEFAULT CURRENT_DATE, + closed_date DATE, + description TEXT, + created_by INT REFERENCES finance.users(id), + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +CREATE TABLE finance.transactions ( + id SERIAL PRIMARY KEY, + transaction_number VARCHAR(100) UNIQUE NOT NULL, + account_id INT REFERENCES finance.accounts(id), + counterpart_account_id INT REFERENCES finance.accounts(id), + organization_id INT REFERENCES finance.organizations(id), + transaction_date TIMESTAMP DEFAULT NOW(), + transaction_type VARCHAR(50) NOT NULL, + amount DECIMAL(15,2) NOT NULL, + currency VARCHAR(3) DEFAULT 'USD', + exchange_rate DECIMAL(10,6) DEFAULT 1, + description TEXT, + reference_number VARCHAR(200), + status VARCHAR(30) DEFAULT 'completed', + reconciled BOOLEAN DEFAULT FALSE, + reconciled_date TIMESTAMP, + category VARCHAR(100), + tags TEXT[], + created_by INT REFERENCES finance.users(id), + approved_by INT REFERENCES finance.users(id), + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +-- ============================================================================= +-- GENERATE MASSIVE NUMBER OF LOAD TABLES WITH COMPLEX RELATIONSHIPS +-- ============================================================================= + +-- Create 50,000+ load tables using BULK generation for maximum speed +-- This generates all CREATE TABLE statements at once and executes them via \gexec + +\echo '=========================================' +\echo 'PHASE 3: Creating 50,000 load tables...' +\echo 'This may take several minutes but is much faster than the old approach!' +\echo '=========================================' + +-- Generate all 50,000 table creation statements in one massive SELECT +SELECT +'CREATE TABLE load_table_' || i || ' (' || E'\n' || +' id SERIAL PRIMARY KEY,' || E'\n' || +' load_id VARCHAR(50) UNIQUE NOT NULL DEFAULT ''LOAD_' || i || ''',' || E'\n' || +' name VARCHAR(200) NOT NULL DEFAULT ''Load Test Item ' || i || ''',' || E'\n' || +' category VARCHAR(100) DEFAULT ''Category_' || (i % 1000) || ''',' || E'\n' || +' subcategory VARCHAR(100) DEFAULT ''Subcategory_' || (i % 100) || ''',' || E'\n' || +'' || E'\n' || +' -- Self-referential FK' || E'\n' || +' parent_load_id INT,' || E'\n' || +'' || E'\n' || +' -- Cross-schema foreign keys to create complex relationships' || E'\n' || +' ecommerce_product_id INT,' || E'\n' || +' healthcare_patient_id INT,' || E'\n' || +' finance_account_id INT,' || E'\n' || +'' || E'\n' || +' -- Reference to other load tables (circular dependencies)' || E'\n' || +' related_load_1_id INT,' || E'\n' || +' related_load_2_id INT,' || E'\n' || +' related_load_3_id INT,' || E'\n' || +'' || E'\n' || +' -- Various data types for testing' || E'\n' || +' status VARCHAR(50) DEFAULT CASE ' || i || ' % 5' || E'\n' || +' WHEN 0 THEN ''active''' || E'\n' || +' WHEN 1 THEN ''pending''' || E'\n' || +' WHEN 2 THEN ''completed''' || E'\n' || +' WHEN 3 THEN ''cancelled''' || E'\n' || +' ELSE ''draft''' || E'\n' || +' END,' || E'\n' || +'' || E'\n' || +' priority INT DEFAULT ' || (i % 10) || ',' || E'\n' || +' score DECIMAL(10,4) DEFAULT random() * 1000,' || E'\n' || +' percentage DECIMAL(5,2) DEFAULT random() * 100,' || E'\n' || +'' || E'\n' || +' -- Array and JSON columns for complexity' || E'\n' || +' tags TEXT[] DEFAULT ARRAY[''tag' || (i % 50) || ''', ''category' || (i % 20) || '''],' || E'\n' || +' metadata JSONB DEFAULT jsonb_build_object(' || E'\n' || +' ''level'', ' || (i % 100) || ',' || E'\n' || +' ''type'', ''load_test'',' || E'\n' || +' ''batch'', ' || (i / 1000) || ',' || E'\n' || +' ''table_id'', ' || i || ',' || E'\n' || +' ''random_seed'', ' || (i * 37) || E'\n' || +' ),' || E'\n' || +'' || E'\n' || +' -- Geographic data' || E'\n' || +' coordinates POINT DEFAULT point((' || ((i * 13) % 360 - 180) || '), (' || ((i * 17) % 180 - 90) || ')),' || E'\n' || +' region VARCHAR(50) DEFAULT ''region_' || (i % 10) || ''',' || E'\n' || +'' || E'\n' || +' -- Temporal data with variety' || E'\n' || +' start_date DATE DEFAULT CURRENT_DATE - INTERVAL ''' || (i % 365) || ' days'',' || E'\n' || +' end_date DATE DEFAULT CURRENT_DATE + INTERVAL ''' || (i % 30) || ' days'',' || E'\n' || +' created_at TIMESTAMP DEFAULT NOW() - INTERVAL ''' || (i % 168) || ' hours'',' || E'\n' || +' updated_at TIMESTAMP DEFAULT NOW() - INTERVAL ''' || (i % 60) || ' minutes'',' || E'\n' || +'' || E'\n' || +' -- Boolean flags for filtering tests' || E'\n' || +' is_active BOOLEAN DEFAULT ' || (CASE WHEN i % 2 = 0 THEN 'TRUE' ELSE 'FALSE' END) || ',' || E'\n' || +' is_featured BOOLEAN DEFAULT ' || (CASE WHEN i % 5 = 0 THEN 'TRUE' ELSE 'FALSE' END) || ',' || E'\n' || +' is_premium BOOLEAN DEFAULT ' || (CASE WHEN i % 10 = 0 THEN 'TRUE' ELSE 'FALSE' END) || ',' || E'\n' || +'' || E'\n' || +' -- Text fields for search testing' || E'\n' || +' description TEXT DEFAULT ''This is a comprehensive description for load test item number ' || i || '. It contains various keywords and phrases to test search functionality.'',' || E'\n' || +' notes TEXT DEFAULT ''Additional notes and comments for item ' || i || ''',' || E'\n' || +'' || E'\n' || +' -- Numeric fields for aggregation testing' || E'\n' || +' quantity INT DEFAULT ' || (i % 1000 + 1) || ',' || E'\n' || +' price DECIMAL(12,2) DEFAULT ' || ((i * 7) % 10000 + 100)::DECIMAL(12,2) || ',' || E'\n' || +' weight DECIMAL(10,3) DEFAULT ' || ((i * 11) % 100 + 1)::DECIMAL(10,3) || ',' || E'\n' || +' volume DECIMAL(10,3) DEFAULT ' || ((i * 19) % 1000 + 10)::DECIMAL(10,3) || E'\n' || +');' +FROM generate_series(1, 50000) AS s(i) +\gexec + +\echo '=========================================' +\echo 'PHASE 4: Adding foreign key constraints...' +\echo 'Adding 25,000+ self-referential constraints...' +\echo '=========================================' + +-- Add foreign key constraints AFTER table creation to avoid circular dependency issues +-- BULK generation of self-referential FKs for maximum speed + +-- Generate all self-referential foreign key constraints at once +SELECT +'ALTER TABLE load_table_' || i || E'\n' || +' ADD CONSTRAINT fk_parent_load_' || i || E'\n' || +' FOREIGN KEY (parent_load_id) REFERENCES load_table_' || (((i - 1) % 50000) + 1) || '(id);' +FROM generate_series(2, 25000) AS s(i) +\gexec + +\echo 'Adding 5,000+ cross-reference constraints (related_load_1_id)...' + +-- BULK generation of cross-reference FKs (every 3rd table gets related_load_1_id FK) +SELECT +'ALTER TABLE load_table_' || i || E'\n' || +' ADD CONSTRAINT fk_related_1_' || i || E'\n' || +' FOREIGN KEY (related_load_1_id) REFERENCES load_table_' || (((i + 1000) % 50000) + 1) || '(id);' +FROM generate_series(1, 15000) AS s(i) +WHERE i % 3 = 0 +\gexec + +\echo 'Adding 3,000+ additional cross-reference constraints (related_load_2_id)...' + +-- BULK generation of additional cross-reference FKs (every 5th table gets related_load_2_id FK) +SELECT +'ALTER TABLE load_table_' || i || E'\n' || +' ADD CONSTRAINT fk_related_2_' || i || E'\n' || +' FOREIGN KEY (related_load_2_id) REFERENCES load_table_' || (((i + 5000) % 50000) + 1) || '(id);' +FROM generate_series(1, 15000) AS s(i) +WHERE i % 5 = 0 +\gexec + +-- ============================================================================= +-- CROSS-SCHEMA FOREIGN KEYS (After base data is inserted) +-- ============================================================================= + +-- Note: These will be added in the data file after we insert base records + +-- ============================================================================= +-- INDEXES FOR TESTING INDEX PERFORMANCE +-- ============================================================================= + +\echo '=========================================' +\echo 'PHASE 5: Creating indexes for performance testing...' +\echo 'This creates thousands of indexes across different table types' +\echo '=========================================' + +-- BULK generation of indexes on various combinations of columns for maximum speed + +\echo 'Creating 1,000+ name indexes...' + +-- Create name indexes (every 10th table) +SELECT 'CREATE INDEX idx_load_' || i || '_name ON load_table_' || i || '(name);' +FROM generate_series(1, 10000) AS s(i) +WHERE i % 10 = 0 +\gexec + +\echo 'Creating 667+ category indexes...' + +-- Create category indexes (every 15th table) +SELECT 'CREATE INDEX idx_load_' || i || '_category ON load_table_' || i || '(category);' +FROM generate_series(1, 10000) AS s(i) +WHERE i % 15 = 0 +\gexec + +\echo 'Creating 500+ composite status+priority indexes...' + +-- Create composite status+priority indexes (every 20th table) +SELECT 'CREATE INDEX idx_load_' || i || '_status_priority ON load_table_' || i || '(status, priority);' +FROM generate_series(1, 10000) AS s(i) +WHERE i % 20 = 0 +\gexec + +\echo 'Creating 400+ temporal indexes on created_at...' + +-- Create created_at indexes (every 25th table) +SELECT 'CREATE INDEX idx_load_' || i || '_created ON load_table_' || i || '(created_at);' +FROM generate_series(1, 10000) AS s(i) +WHERE i % 25 = 0 +\gexec + +\echo 'Creating 100+ GIN indexes for JSONB metadata columns...' + +-- Create GIN indexes for JSONB metadata columns (every 50th table) +SELECT 'CREATE INDEX idx_load_' || i || '_metadata ON load_table_' || i || ' USING GIN(metadata);' +FROM generate_series(1, 5000) AS s(i) +WHERE i % 50 = 0 +\gexec + +-- ============================================================================= +-- MATERIALIZED VIEWS FOR TESTING +-- ============================================================================= + +-- Create some materialized views that aggregate across multiple schemas +CREATE MATERIALIZED VIEW mv_cross_schema_summary AS +SELECT + 'ecommerce' AS domain, + COUNT(*) AS total_records, + COUNT(DISTINCT organization_id) AS organizations, + MAX(created_at) AS latest_record +FROM ecommerce.products +UNION ALL +SELECT + 'healthcare' AS domain, + COUNT(*) AS total_records, + COUNT(DISTINCT organization_id) AS organizations, + MAX(created_at) AS latest_record +FROM healthcare.patients +UNION ALL +SELECT + 'finance' AS domain, + COUNT(*) AS total_records, + COUNT(DISTINCT organization_id) AS organizations, + MAX(created_at) AS latest_record +FROM finance.accounts; + +-- Create summary view of load tables +CREATE MATERIALIZED VIEW mv_load_table_summary AS +SELECT + 'load_tables' AS source, + COUNT(*) AS total_tables +FROM information_schema.tables +WHERE table_name LIKE 'load_table_%' +AND table_schema = 'public'; + +-- ============================================================================= +-- PARTITIONED TABLES FOR TESTING (PostgreSQL 15 feature) +-- ============================================================================= + +-- Create a partitioned table for time-series data +CREATE TABLE analytics_events ( + id BIGSERIAL, + event_name VARCHAR(100) NOT NULL, + user_id VARCHAR(50), + organization_id INT, + event_data JSONB, + created_at TIMESTAMP DEFAULT NOW() +) PARTITION BY RANGE (created_at); + +-- Create partitions for different time periods +CREATE TABLE analytics_events_2023 PARTITION OF analytics_events + FOR VALUES FROM ('2023-01-01') TO ('2024-01-01'); + +CREATE TABLE analytics_events_2024 PARTITION OF analytics_events + FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); + +CREATE TABLE analytics_events_2025 PARTITION OF analytics_events + FOR VALUES FROM ('2025-01-01') TO ('2026-01-01'); + +-- ============================================================================= +-- FINAL SUMMARY +-- ============================================================================= +-- This schema creates: +-- - 170+ schemas with business/geographic/organizational structure +-- - 4+ core tables per schema = 680+ base tables +-- - 3 main domain tables (ecommerce, healthcare, finance) = ~15 tables +-- - 50,000 load_table_* tables with complex relationships +-- - Thousands of foreign key constraints +-- - Thousands of indexes +-- - Multiple materialized views +-- - Partitioned tables +\echo 'Ready for data population phase...' + +-- Total: 50,000+ tables across 170+ schemas with massive interconnectivity + +\echo '=========================================' +\echo 'SCHEMA CREATION COMPLETE!' +\echo 'Created:' +\echo '- 170+ schemas with business/geographic/organizational structure' +\echo '- 680+ core entity tables (organizations, locations, users, categories)' +\echo '- 50,000 load test tables with complex relationships' +\echo '- 28,000+ foreign key constraints' +\echo '- 3,000+ indexes (B-tree, GIN, composite)' +\echo '- Multiple materialized views and partitioned tables' +\echo 'Total: 50,000+ tables across 170+ schemas' +\echo '=========================================' + +-- This should absolutely stress test any database management UI! \ No newline at end of file diff --git a/dev/docker_psql_load_test_init/2_data.sql b/dev/docker_psql_load_test_init/2_data.sql index a56a4509b..ca2f9ff8c 100644 --- a/dev/docker_psql_load_test_init/2_data.sql +++ b/dev/docker_psql_load_test_init/2_data.sql @@ -1,15 +1,389 @@ -SELECT format('INSERT INTO load_table_%s (char_name, job, level, weapon, spell, hp, mp, location, quest) -SELECT char_name, job, level, weapon, spell, hp, mp, location, quest FROM ( - SELECT ''Cloud Strife'' AS char_name, - ''Mercenary'' AS job, - floor(random() * 99 + 1)::INT AS level, - ''Buster Sword'' AS weapon, - ''Firaga'' AS spell, - floor(random() * 5000 + 1000)::INT AS hp, - floor(random() * 500 + 100)::INT AS mp, - ''Midgar'' AS location, - ''Save the Planet'' AS quest - FROM generate_series(1, 100) -) AS temp;', i) -FROM generate_series(1, 15000) AS s(i) -\gexec +-- PostgreSQL 15 INSANE Load Test Data +-- Populate 170+ schemas with millions of records and complex relationships +-- This will create an absolutely massive dataset to stress test any UI + +-- ============================================================================= +-- POPULATE CORE ENTITY TABLES IN ALL SCHEMAS USING BATCH APPROACH +-- ============================================================================= + +-- Simple approach: Use DO blocks to populate data in batches for better performance +-- First, defer constraint checking to handle self-referential foreign keys +\echo 'Starting data population - deferring constraint checking...' +SET CONSTRAINTS ALL DEFERRED; + +DO $$ +DECLARE + schema_names TEXT[] := ARRAY[ + 'accounting', 'advertising', 'aerospace', 'agriculture', 'analytics', + 'architecture', 'automotive', 'aviation', 'banking', 'biotechnology', + 'blockchain', 'broadcasting', 'construction', 'consulting', 'cybersecurity', + 'defense', 'ecommerce', 'education', 'energy', 'entertainment', + 'fashion', 'finance', 'fitness', 'food_service', 'gaming', + 'healthcare', 'hospitality', 'hr_management', 'insurance', 'iot', + 'legal', 'logistics', 'manufacturing', 'marketing', 'media', + 'mining', 'music', 'nonprofit', 'pharmaceuticals', 'real_estate', + 'retail', 'robotics', 'social_media', 'software', 'sports', + 'telecommunications', 'tourism', 'transportation', 'utilities', 'veterinary', + 'region_north_america', 'region_south_america', 'region_europe', 'region_asia', 'region_africa', + 'region_oceania', 'country_usa', 'country_canada', 'country_mexico', 'country_brazil', + 'country_argentina', 'country_uk', 'country_france', 'country_germany', 'country_spain', + 'country_italy', 'country_russia', 'country_china', 'country_japan', 'country_india', + 'country_australia', 'country_south_africa', 'state_california', 'state_texas', 'state_newyork', + 'state_florida', 'city_london', 'city_paris', 'city_tokyo', 'city_sydney', + 'city_dubai', 'city_singapore', 'city_mumbai', 'city_toronto', 'city_berlin', + 'city_madrid', 'city_rome', 'city_moscow', 'city_beijing', 'city_seoul', + 'zone_americas', 'zone_emea', 'zone_apac', 'zone_latam', 'zone_mena', + 'district_north', 'district_south', 'district_east', 'district_west', 'district_central', + 'dept_executive', 'dept_operations', 'dept_finance', 'dept_hr', 'dept_it', + 'dept_sales', 'dept_marketing', 'dept_engineering', 'dept_design', 'dept_product', + 'dept_customer_service', 'dept_legal', 'dept_compliance', 'dept_security', 'dept_facilities', + 'dept_procurement', 'dept_logistics', 'dept_warehouse', 'dept_manufacturing', 'dept_quality', + 'dept_research', 'dept_development', 'dept_innovation', 'dept_training', 'dept_consulting', + 'dept_business_intel', 'dept_data_science', 'dept_analytics', 'dept_reporting', 'dept_audit', + 'division_consumer', 'division_enterprise', 'division_government', 'division_education', 'division_healthcare', + 'unit_mobile', 'unit_web', 'unit_cloud', 'unit_ai', 'unit_blockchain', + 'team_backend', 'team_frontend', 'team_devops', 'team_qa', 'team_ux', + 'squad_alpha', 'squad_beta', 'squad_gamma', 'squad_delta', 'squad_epsilon', + 'sys_monitoring', 'sys_logging', 'sys_metrics', 'sys_alerts', 'sys_backups', + 'sys_security', 'sys_audit', 'sys_config', 'sys_cache', 'sys_queue', + 'api_v1', 'api_v2', 'api_v3', 'api_internal', 'api_external', + 'microservice_auth', 'microservice_user', 'microservice_order', 'microservice_payment', 'microservice_notification', + 'microservice_inventory', 'microservice_catalog', 'microservice_review', 'microservice_shipping', 'microservice_analytics', + 'data_raw', 'data_processed', 'data_aggregated', 'data_archive', 'data_temp', + 'etl_staging', 'etl_transform', 'etl_load', 'warehouse_dim', 'warehouse_fact', + 'ml_training', 'ml_inference', 'ml_models', 'ml_features', 'ml_experiments', + 'event_sourcing', 'event_streams', 'event_snapshots', 'batch_processing', 'realtime_processing', + 'integration_crm', 'integration_erp', 'integration_payment', 'integration_shipping', 'integration_email', + 'archive_2020', 'archive_2021', 'archive_2022', 'archive_2023', 'archive_2024', + 'vertical_retail', 'vertical_manufacturing', 'vertical_healthcare', 'vertical_education', 'vertical_government', + 'vertical_nonprofit', 'vertical_startup', 'vertical_enterprise', 'vertical_smb', 'vertical_freelancer', + 'industry_fintech', 'industry_edtech', 'industry_healthtech', 'industry_proptech', 'industry_agtech', + 'industry_cleantech', 'industry_biotech', 'industry_martech', 'industry_hrtech', 'industry_legaltech' + ]; + schema_name TEXT; +BEGIN + FOREACH schema_name IN ARRAY schema_names LOOP + RAISE NOTICE 'Populating data for schema: % (% of %)', schema_name, array_position(schema_names, schema_name), array_length(schema_names, 1); + + -- Insert organizations + EXECUTE 'INSERT INTO ' || schema_name || '.organizations (name, code, type, status, parent_org_id) + SELECT + ''' || initcap(replace(schema_name, '_', ' ')) || ' Org '' || i, + ''' || upper(schema_name) || '_ORG_'' || i, + CASE i % 5 WHEN 0 THEN ''corporation'' WHEN 1 THEN ''partnership'' + WHEN 2 THEN ''nonprofit'' WHEN 3 THEN ''government'' + ELSE ''startup'' END, + CASE i % 4 WHEN 0 THEN ''active'' WHEN 1 THEN ''pending'' + WHEN 2 THEN ''suspended'' ELSE ''inactive'' END, + CASE WHEN i > 10 THEN ((i - 1) % 10) + 1 ELSE NULL END + FROM generate_series(1, 100) AS s(i)'; + + -- Insert locations + EXECUTE 'INSERT INTO ' || schema_name || '.locations (name, address, city, region, country, postal_code, coordinates, timezone, organization_id) + SELECT + ''' || initcap(replace(schema_name, '_', ' ')) || ' Location '' || i, + i || '' Main St'', + CASE i % 10 WHEN 0 THEN ''New York'' WHEN 1 THEN ''London'' + WHEN 2 THEN ''Tokyo'' WHEN 3 THEN ''Sydney'' + WHEN 4 THEN ''Paris'' WHEN 5 THEN ''Berlin'' + WHEN 6 THEN ''Toronto'' WHEN 7 THEN ''Mumbai'' + WHEN 8 THEN ''Dubai'' ELSE ''Singapore'' END, + CASE i % 5 WHEN 0 THEN ''North America'' WHEN 1 THEN ''Europe'' + WHEN 2 THEN ''Asia'' WHEN 3 THEN ''Oceania'' + ELSE ''Middle East'' END, + CASE i % 8 WHEN 0 THEN ''USA'' WHEN 1 THEN ''Canada'' + WHEN 2 THEN ''UK'' WHEN 3 THEN ''Japan'' + WHEN 4 THEN ''France'' WHEN 5 THEN ''Germany'' + WHEN 6 THEN ''India'' ELSE ''Australia'' END, + lpad(((i * 123) % 100000)::text, 5, ''0''), + point((random() * 360 - 180), (random() * 180 - 90)), + CASE i % 4 WHEN 0 THEN ''UTC-5'' WHEN 1 THEN ''UTC+0'' + WHEN 2 THEN ''UTC+9'' ELSE ''UTC-8'' END, + ((i - 1) % 100) + 1 + FROM generate_series(1, 200) AS s(i)'; + + -- Insert users + EXECUTE 'INSERT INTO ' || schema_name || '.users (username, email, first_name, last_name, role, department, organization_id, location_id, manager_id, hire_date, is_active, last_login) + SELECT + ''' || schema_name || '_user_'' || i, + ''user'' || i || ''@' || schema_name || '.com'', + CASE i % 10 WHEN 0 THEN ''John'' WHEN 1 THEN ''Jane'' + WHEN 2 THEN ''Mike'' WHEN 3 THEN ''Sarah'' + WHEN 4 THEN ''David'' WHEN 5 THEN ''Lisa'' + WHEN 6 THEN ''Chris'' WHEN 7 THEN ''Emma'' + WHEN 8 THEN ''James'' ELSE ''Amy'' END, + CASE i % 8 WHEN 0 THEN ''Smith'' WHEN 1 THEN ''Johnson'' + WHEN 2 THEN ''Williams'' WHEN 3 THEN ''Brown'' + WHEN 4 THEN ''Davis'' WHEN 5 THEN ''Miller'' + WHEN 6 THEN ''Wilson'' ELSE ''Moore'' END, + CASE i % 6 WHEN 0 THEN ''Manager'' WHEN 1 THEN ''Developer'' + WHEN 2 THEN ''Analyst'' WHEN 3 THEN ''Designer'' + WHEN 4 THEN ''Engineer'' ELSE ''Specialist'' END, + CASE i % 5 WHEN 0 THEN ''Engineering'' WHEN 1 THEN ''Sales'' + WHEN 2 THEN ''Marketing'' WHEN 3 THEN ''HR'' + ELSE ''Operations'' END, + ((i - 1) % 100) + 1, + ((i - 1) % 200) + 1, + CASE WHEN i > 50 THEN ((i - 1) % 50) + 1 ELSE NULL END, + CURRENT_DATE - (i % 1000) * INTERVAL ''1 day'', + i % 10 != 0, + NOW() - (i % 168) * INTERVAL ''1 hour'' + FROM generate_series(1, 500) AS s(i)'; + + -- Insert categories + EXECUTE 'INSERT INTO ' || schema_name || '.categories (name, description, parent_category_id, hierarchy_path, level_depth, sort_order, is_active, organization_id, created_by) + SELECT + ''' || initcap(replace(schema_name, '_', ' ')) || ' Category '' || i, + ''Description for category '' || i, + CASE WHEN i > 25 THEN ((i - 1) % 25) + 1 ELSE NULL END, + CASE WHEN i <= 20 THEN ''/cat_'' || i + ELSE ''/cat_'' || (((i - 1) % 20) + 1) || ''/cat_'' || i END, + CASE WHEN i <= 20 THEN 1 ELSE 2 END, + i, + i % 10 != 0, + ((i - 1) % 100) + 1, + ((i - 1) % 500) + 1 + FROM generate_series(1, 100) AS s(i)'; + END LOOP; +END $$; + +-- Reset constraint checking to immediate after core entity population +\echo 'Core entity population complete - resetting constraints to immediate...' +SET CONSTRAINTS ALL IMMEDIATE; + +-- ============================================================================= +-- POPULATE DOMAIN-SPECIFIC TABLES WITH MASSIVE DATA +-- ============================================================================= + +\echo 'Populating ecommerce domain with 35,000 records...' + +-- Populate ecommerce domain +INSERT INTO ecommerce.products (sku, name, description, category_id, organization_id, brand, price, cost, weight, inventory_count, is_active, created_by) +SELECT + 'SKU-' || lpad(i::text, 8, '0'), + 'Product ' || i, + 'Description for product number ' || i || '. This is a comprehensive product description with various keywords.', + ((i - 1) % 100) + 1, + ((i - 1) % 100) + 1, + CASE i % 10 WHEN 0 THEN 'BrandA' WHEN 1 THEN 'BrandB' WHEN 2 THEN 'BrandC' + WHEN 3 THEN 'BrandD' WHEN 4 THEN 'BrandE' WHEN 5 THEN 'BrandF' + WHEN 6 THEN 'BrandG' WHEN 7 THEN 'BrandH' WHEN 8 THEN 'BrandI' + ELSE 'BrandJ' END, + (random() * 1000 + 10)::DECIMAL(12,2), + (random() * 500 + 5)::DECIMAL(12,2), + (random() * 50 + 0.1)::DECIMAL(10,3), + (random() * 1000)::INT, + i % 20 != 0, + ((i - 1) % 500) + 1 +FROM generate_series(1, 10000) AS s(i); + +INSERT INTO ecommerce.customers (customer_number, email, first_name, last_name, phone, organization_id, location_id, lifetime_value, loyalty_points, is_vip) +SELECT + 'CUST-' || lpad(i::text, 8, '0'), + 'customer' || i || '@email.com', + CASE i % 15 WHEN 0 THEN 'Alice' WHEN 1 THEN 'Bob' WHEN 2 THEN 'Carol' + WHEN 3 THEN 'Dave' WHEN 4 THEN 'Eve' WHEN 5 THEN 'Frank' + WHEN 6 THEN 'Grace' WHEN 7 THEN 'Henry' WHEN 8 THEN 'Ivy' + WHEN 9 THEN 'Jack' WHEN 10 THEN 'Kate' WHEN 11 THEN 'Leo' + WHEN 12 THEN 'Mia' WHEN 13 THEN 'Nick' ELSE 'Olivia' END, + CASE i % 12 WHEN 0 THEN 'Anderson' WHEN 1 THEN 'Baker' WHEN 2 THEN 'Clark' + WHEN 3 THEN 'Davis' WHEN 4 THEN 'Evans' WHEN 5 THEN 'Fisher' + WHEN 6 THEN 'Green' WHEN 7 THEN 'Harris' WHEN 8 THEN 'Jackson' + WHEN 9 THEN 'King' WHEN 10 THEN 'Lee' ELSE 'Martin' END, + '+1-555-' || lpad((i % 10000)::text, 4, '0'), + ((i - 1) % 100) + 1, + ((i - 1) % 200) + 1, + (random() * 50000)::DECIMAL(15,2), + (random() * 10000)::INT, + i % 50 = 0 +FROM generate_series(1, 25000) AS s(i); + +\echo 'Populating healthcare domain with 15,000 records...' + +-- Healthcare domain +INSERT INTO healthcare.patients (patient_id, first_name, last_name, date_of_birth, gender, blood_type, phone, email, organization_id, location_id, primary_doctor_id, allergies, chronic_conditions, is_active) +SELECT + 'PAT-' || lpad(i::text, 8, '0'), + CASE i % 12 WHEN 0 THEN 'Michael' WHEN 1 THEN 'Sarah' WHEN 2 THEN 'David' + WHEN 3 THEN 'Jennifer' WHEN 4 THEN 'Robert' WHEN 5 THEN 'Lisa' + WHEN 6 THEN 'William' WHEN 7 THEN 'Nancy' WHEN 8 THEN 'Richard' + WHEN 9 THEN 'Karen' WHEN 10 THEN 'Charles' ELSE 'Betty' END, + CASE i % 10 WHEN 0 THEN 'Johnson' WHEN 1 THEN 'Williams' WHEN 2 THEN 'Brown' + WHEN 3 THEN 'Jones' WHEN 4 THEN 'Garcia' WHEN 5 THEN 'Miller' + WHEN 6 THEN 'Davis' WHEN 7 THEN 'Rodriguez' WHEN 8 THEN 'Martinez' + ELSE 'Hernandez' END, + DATE '1950-01-01' + (i % 25000) * INTERVAL '1 day', + CASE i % 3 WHEN 0 THEN 'M' WHEN 1 THEN 'F' ELSE 'O' END, + CASE i % 8 WHEN 0 THEN 'A+' WHEN 1 THEN 'A-' WHEN 2 THEN 'B+' + WHEN 3 THEN 'B-' WHEN 4 THEN 'AB+' WHEN 5 THEN 'AB-' + WHEN 6 THEN 'O+' ELSE 'O-' END, + '+1-555-' || lpad((i % 10000)::text, 4, '0'), + 'patient' || i || '@healthcare.com', + ((i - 1) % 100) + 1, + ((i - 1) % 200) + 1, + ((i - 1) % 500) + 1, + CASE i % 5 WHEN 0 THEN ARRAY['peanuts', 'shellfish'] + WHEN 1 THEN ARRAY['dairy'] + WHEN 2 THEN ARRAY['pollen', 'dust'] + ELSE ARRAY[]::TEXT[] END, + CASE i % 6 WHEN 0 THEN ARRAY['diabetes', 'hypertension'] + WHEN 1 THEN ARRAY['asthma'] + WHEN 2 THEN ARRAY['arthritis'] + ELSE ARRAY[]::TEXT[] END, + i % 15 != 0 +FROM generate_series(1, 15000) AS s(i); + +\echo 'Populating finance domain with 20,000 records...' + +-- Finance domain +INSERT INTO finance.accounts (account_number, account_name, account_type, organization_id, location_id, balance, currency, is_active, created_by) +SELECT + 'ACC-' || lpad(i::text, 10, '0'), + 'Account ' || i, + CASE i % 5 WHEN 0 THEN 'checking' WHEN 1 THEN 'savings' WHEN 2 THEN 'investment' + WHEN 3 THEN 'loan' ELSE 'credit' END, + ((i - 1) % 100) + 1, + ((i - 1) % 200) + 1, + (random() * 1000000 - 500000)::DECIMAL(15,2), + CASE i % 4 WHEN 0 THEN 'USD' WHEN 1 THEN 'EUR' WHEN 2 THEN 'GBP' ELSE 'JPY' END, + i % 20 != 0, + ((i - 1) % 500) + 1 +FROM generate_series(1, 20000) AS s(i); + +-- ============================================================================= +-- CREATE CROSS-SCHEMA FOREIGN KEY RELATIONSHIPS +-- ============================================================================= + +\echo 'Creating cross-schema foreign key relationships...' + +-- Add some cross-schema foreign keys after ALL domain data is populated +-- Note: We do this carefully to avoid constraint violations + +-- Link some load tables to ecommerce products (now that ecommerce.products exists) +DO $$ +DECLARE + i INTEGER; + max_product_id INTEGER; + max_patient_id INTEGER; + max_account_id INTEGER; +BEGIN + -- Get maximum IDs from domain tables to avoid FK violations + SELECT COALESCE(MAX(id), 0) INTO max_product_id FROM ecommerce.products; + SELECT COALESCE(MAX(id), 0) INTO max_patient_id FROM healthcare.patients; + SELECT COALESCE(MAX(id), 0) INTO max_account_id FROM finance.accounts; + + -- Only proceed if we have data to reference + IF max_product_id > 0 THEN + FOR i IN 1..1000 LOOP + IF i % 100 = 0 THEN + EXECUTE 'UPDATE load_table_' || i || ' + SET ecommerce_product_id = ' || ((i % max_product_id) + 1) || ' + WHERE id <= 10'; + END IF; + END LOOP; + END IF; + + -- Link some load tables to healthcare patients + IF max_patient_id > 0 THEN + FOR i IN 1..500 LOOP + IF i % 50 = 0 THEN + EXECUTE 'UPDATE load_table_' || i || ' + SET healthcare_patient_id = ' || ((i % max_patient_id) + 1) || ' + WHERE id <= 5'; + END IF; + END LOOP; + END IF; + + -- Link some load tables to finance accounts + IF max_account_id > 0 THEN + FOR i IN 1..500 LOOP + IF i % 75 = 0 THEN + EXECUTE 'UPDATE load_table_' || i || ' + SET finance_account_id = ' || ((i % max_account_id) + 1) || ' + WHERE id <= 5'; + END IF; + END LOOP; + END IF; +END $$; + +-- ============================================================================= +-- POPULATE LOAD TABLES WITH MASSIVE AMOUNTS OF DATA IN BATCHES +-- ============================================================================= + +\echo 'Populating 10,000 load tables with 1,000,000+ records (this may take several minutes)...' + +-- Populate first 10,000 load tables with data (in batches to avoid memory issues) +DO $$ +DECLARE + batch_start INTEGER; + batch_end INTEGER; + i INTEGER; +BEGIN + FOR batch_start IN 1..10000 BY 500 LOOP + batch_end := LEAST(batch_start + 499, 10000); + RAISE NOTICE 'Populating load tables % to %', batch_start, batch_end; + + FOR i IN batch_start..batch_end LOOP + EXECUTE 'INSERT INTO load_table_' || i || ' (load_id, name, category, subcategory, status, priority, score, tags, metadata, region, quantity, price, weight, volume, description, notes) + SELECT + ''LOAD_' || i || '_'' || j, + ''Load Item '' || j, + ''Category_'' || (j % 100), + ''Subcategory_'' || (j % 20), + CASE j % 5 WHEN 0 THEN ''active'' WHEN 1 THEN ''pending'' + WHEN 2 THEN ''completed'' WHEN 3 THEN ''cancelled'' + ELSE ''draft'' END, + j % 10, + random() * 1000, + ARRAY[''tag'' || (j % 50), ''category'' || (j % 20)], + jsonb_build_object( + ''level'', j % 100, + ''type'', ''load_test'', + ''batch'', ' || i || ', + ''random_value'', random() + ), + ''region_'' || (j % 10), + (j % 1000) + 1, + (random() * 10000)::DECIMAL(12,2), + (random() * 100)::DECIMAL(10,3), + (random() * 1000)::DECIMAL(10,3), + ''Comprehensive description for load test item in table ' || i || ' number '' || j, + ''Additional notes for item '' || j + FROM generate_series(1, 100) AS s(j)'; + END LOOP; + + COMMIT; + END LOOP; +END $$; + + +-- ============================================================================= +-- SUMMARY OF MASSIVE DATA CREATION +-- ============================================================================= +-- This creates: +-- - 170+ schemas, each with 100 organizations, 200 locations, 500 users, 100 categories +-- - 85,000+ organizations total +-- - 34,000+ locations total +-- - 85,000+ users total +-- - 17,000+ categories total +-- - 10,000 ecommerce products +-- - 25,000 ecommerce customers +-- - 15,000 healthcare patients +-- - 20,000 finance accounts +-- - 10,000 populated load tables with 100 records each = 1,000,000 load records +-- - Plus 40,000 additional empty load tables (50,000 total) +-- +-- Grand total: 1,000,000+ records across 50,000+ tables in 170+ schemas +-- This should absolutely demolish any database management UI that can't handle scale! + +\echo '=========================================' +\echo 'DATA POPULATION COMPLETE!' +\echo 'Successfully created:' +\echo '- 170+ schemas with 85,000+ organizations, 34,000+ locations, 85,000+ users, 17,000+ categories' +\echo '- 10,000 ecommerce products + 25,000 customers' +\echo '- 15,000 healthcare patients' +\echo '- 20,000 finance accounts' +\echo '- 1,000,000+ records in 10,000 populated load tables' +\echo '- Cross-schema foreign key relationships' +\echo 'Total: 1,200,000+ records across 50,000+ tables!' +\echo '=========================================' diff --git a/docker-compose.yml b/docker-compose.yml index 5ddb5a461..05789d93f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -59,6 +59,8 @@ services: - ./dev/docker_psql_load_test_init:/docker-entrypoint-initdb.d ports: - 5435:5432 + shm_size: 2gb + command: postgres -c shared_preload_libraries=pg_stat_statements -c max_connections=1000 -c shared_buffers=512MB -c work_mem=16MB psql15: image: postgres:15