make the load test contain an insane amount of data

This commit is contained in:
Day Matchullis
2025-08-12 17:16:44 -06:00
parent 410d4c02d3
commit eccb3033b0
3 changed files with 1040 additions and 29 deletions

View File

@@ -1,17 +1,652 @@
SELECT format('CREATE TABLE load_table_%s (
id SERIAL PRIMARY KEY,
char_name TEXT NOT NULL DEFAULT ''Cloud Strife'',
job TEXT NOT NULL DEFAULT ''Mercenary'',
level INT NOT NULL DEFAULT 1,
weapon TEXT NOT NULL DEFAULT ''Buster Sword'',
spell TEXT NOT NULL DEFAULT ''Firaga'',
hp INT NOT NULL DEFAULT 1000,
mp INT NOT NULL DEFAULT 500,
location TEXT NOT NULL DEFAULT ''Midgar'',
quest TEXT NOT NULL DEFAULT ''Save the Planet''
);', i)
FROM generate_series(1, 15000) AS s(i)
-- PostgreSQL 15 INSANE Load Test Schema
-- 200+ schemas, thousands of tables, complex foreign key relationships
-- This will absolutely destroy any UI that can't handle massive datasets
-- =============================================================================
-- GENERATE 200+ BUSINESS DOMAIN SCHEMAS
-- =============================================================================
\echo '========================================='
\echo 'PHASE 1: Creating 170+ business schemas...'
\echo '========================================='
-- Create schemas dynamically
SELECT 'CREATE SCHEMA IF NOT EXISTS ' || schema_name || ';'
FROM (
VALUES
-- Business domains (50 schemas)
('accounting'), ('advertising'), ('aerospace'), ('agriculture'), ('analytics'),
('architecture'), ('automotive'), ('aviation'), ('banking'), ('biotechnology'),
('blockchain'), ('broadcasting'), ('construction'), ('consulting'), ('cybersecurity'),
('defense'), ('ecommerce'), ('education'), ('energy'), ('entertainment'),
('fashion'), ('finance'), ('fitness'), ('food_service'), ('gaming'),
('healthcare'), ('hospitality'), ('hr_management'), ('insurance'), ('iot'),
('legal'), ('logistics'), ('manufacturing'), ('marketing'), ('media'),
('mining'), ('music'), ('nonprofit'), ('pharmaceuticals'), ('real_estate'),
('retail'), ('robotics'), ('social_media'), ('software'), ('sports'),
('telecommunications'), ('tourism'), ('transportation'), ('utilities'), ('veterinary'),
-- Geographic regions (50 schemas)
('region_north_america'), ('region_south_america'), ('region_europe'), ('region_asia'), ('region_africa'),
('region_oceania'), ('country_usa'), ('country_canada'), ('country_mexico'), ('country_brazil'),
('country_argentina'), ('country_uk'), ('country_france'), ('country_germany'), ('country_spain'),
('country_italy'), ('country_russia'), ('country_china'), ('country_japan'), ('country_india'),
('country_australia'), ('country_south_africa'), ('state_california'), ('state_texas'), ('state_newyork'),
('state_florida'), ('city_london'), ('city_paris'), ('city_tokyo'), ('city_sydney'),
('city_dubai'), ('city_singapore'), ('city_mumbai'), ('city_toronto'), ('city_berlin'),
('city_madrid'), ('city_rome'), ('city_moscow'), ('city_beijing'), ('city_seoul'),
('zone_americas'), ('zone_emea'), ('zone_apac'), ('zone_latam'), ('zone_mena'),
('district_north'), ('district_south'), ('district_east'), ('district_west'), ('district_central'),
-- Organizational departments (50 schemas)
('dept_executive'), ('dept_operations'), ('dept_finance'), ('dept_hr'), ('dept_it'),
('dept_sales'), ('dept_marketing'), ('dept_engineering'), ('dept_design'), ('dept_product'),
('dept_customer_service'), ('dept_legal'), ('dept_compliance'), ('dept_security'), ('dept_facilities'),
('dept_procurement'), ('dept_logistics'), ('dept_warehouse'), ('dept_manufacturing'), ('dept_quality'),
('dept_research'), ('dept_development'), ('dept_innovation'), ('dept_training'), ('dept_consulting'),
('dept_business_intel'), ('dept_data_science'), ('dept_analytics'), ('dept_reporting'), ('dept_audit'),
('division_consumer'), ('division_enterprise'), ('division_government'), ('division_education'), ('division_healthcare'),
('unit_mobile'), ('unit_web'), ('unit_cloud'), ('unit_ai'), ('unit_blockchain'),
('team_backend'), ('team_frontend'), ('team_devops'), ('team_qa'), ('team_ux'),
('squad_alpha'), ('squad_beta'), ('squad_gamma'), ('squad_delta'), ('squad_epsilon'),
-- System/Technical schemas (50 schemas)
('sys_monitoring'), ('sys_logging'), ('sys_metrics'), ('sys_alerts'), ('sys_backups'),
('sys_security'), ('sys_audit'), ('sys_config'), ('sys_cache'), ('sys_queue'),
('api_v1'), ('api_v2'), ('api_v3'), ('api_internal'), ('api_external'),
('microservice_auth'), ('microservice_user'), ('microservice_order'), ('microservice_payment'), ('microservice_notification'),
('microservice_inventory'), ('microservice_catalog'), ('microservice_review'), ('microservice_shipping'), ('microservice_analytics'),
('data_raw'), ('data_processed'), ('data_aggregated'), ('data_archive'), ('data_temp'),
('etl_staging'), ('etl_transform'), ('etl_load'), ('warehouse_dim'), ('warehouse_fact'),
('ml_training'), ('ml_inference'), ('ml_models'), ('ml_features'), ('ml_experiments'),
('event_sourcing'), ('event_streams'), ('event_snapshots'), ('batch_processing'), ('realtime_processing'),
('integration_crm'), ('integration_erp'), ('integration_payment'), ('integration_shipping'), ('integration_email'),
('archive_2020'), ('archive_2021'), ('archive_2022'), ('archive_2023'), ('archive_2024'),
-- Industry verticals (20 schemas)
('vertical_retail'), ('vertical_manufacturing'), ('vertical_healthcare'), ('vertical_education'), ('vertical_government'),
('vertical_nonprofit'), ('vertical_startup'), ('vertical_enterprise'), ('vertical_smb'), ('vertical_freelancer'),
('industry_fintech'), ('industry_edtech'), ('industry_healthtech'), ('industry_proptech'), ('industry_agtech'),
('industry_cleantech'), ('industry_biotech'), ('industry_martech'), ('industry_hrtech'), ('industry_legaltech')
) AS schemas(schema_name)
\gexec
-- We just need a very big amount of tables to make things funky in the UI so for now I am not worrying so much about their content
-- =============================================================================
-- CORE ENTITY TABLES IN EACH SCHEMA (Base tables that others reference)
-- =============================================================================
\echo '========================================='
\echo 'PHASE 2: Creating core entity tables in each schema...'
\echo 'This creates 4 tables per schema (680+ tables total)'
\echo '========================================='
-- Create core entity tables in each schema
SELECT
'-- Core entities for schema: ' || schema_name || E'\n' ||
'CREATE TABLE ' || schema_name || '.organizations (' || E'\n' ||
' id SERIAL PRIMARY KEY,' || E'\n' ||
' name VARCHAR(200) NOT NULL,' || E'\n' ||
' code VARCHAR(50) UNIQUE NOT NULL,' || E'\n' ||
' type VARCHAR(50),' || E'\n' ||
' status VARCHAR(20) DEFAULT ''active'',' || E'\n' ||
' parent_org_id INT,' || E'\n' ||
' created_at TIMESTAMP DEFAULT NOW(),' || E'\n' ||
' updated_at TIMESTAMP DEFAULT NOW()' || E'\n' ||
');' || E'\n\n' ||
'CREATE TABLE ' || schema_name || '.locations (' || E'\n' ||
' id SERIAL PRIMARY KEY,' || E'\n' ||
' name VARCHAR(150) NOT NULL,' || E'\n' ||
' address TEXT,' || E'\n' ||
' city VARCHAR(100),' || E'\n' ||
' region VARCHAR(100),' || E'\n' ||
' country VARCHAR(100),' || E'\n' ||
' postal_code VARCHAR(20),' || E'\n' ||
' coordinates POINT,' || E'\n' ||
' timezone VARCHAR(50),' || E'\n' ||
' organization_id INT REFERENCES ' || schema_name || '.organizations(id),' || E'\n' ||
' created_at TIMESTAMP DEFAULT NOW()' || E'\n' ||
');' || E'\n\n' ||
'CREATE TABLE ' || schema_name || '.users (' || E'\n' ||
' id SERIAL PRIMARY KEY,' || E'\n' ||
' username VARCHAR(100) UNIQUE NOT NULL,' || E'\n' ||
' email VARCHAR(200) UNIQUE NOT NULL,' || E'\n' ||
' first_name VARCHAR(100),' || E'\n' ||
' last_name VARCHAR(100),' || E'\n' ||
' role VARCHAR(50),' || E'\n' ||
' department VARCHAR(100),' || E'\n' ||
' organization_id INT REFERENCES ' || schema_name || '.organizations(id),' || E'\n' ||
' location_id INT REFERENCES ' || schema_name || '.locations(id),' || E'\n' ||
' manager_id INT REFERENCES ' || schema_name || '.users(id),' || E'\n' ||
' hire_date DATE,' || E'\n' ||
' is_active BOOLEAN DEFAULT TRUE,' || E'\n' ||
' last_login TIMESTAMP,' || E'\n' ||
' created_at TIMESTAMP DEFAULT NOW(),' || E'\n' ||
' updated_at TIMESTAMP DEFAULT NOW()' || E'\n' ||
');' || E'\n\n' ||
'CREATE TABLE ' || schema_name || '.categories (' || E'\n' ||
' id SERIAL PRIMARY KEY,' || E'\n' ||
' name VARCHAR(150) NOT NULL,' || E'\n' ||
' description TEXT,' || E'\n' ||
' parent_category_id INT REFERENCES ' || schema_name || '.categories(id),' || E'\n' ||
' hierarchy_path VARCHAR(500),' || E'\n' ||
' level_depth INT DEFAULT 1,' || E'\n' ||
' sort_order INT DEFAULT 0,' || E'\n' ||
' is_active BOOLEAN DEFAULT TRUE,' || E'\n' ||
' organization_id INT REFERENCES ' || schema_name || '.organizations(id),' || E'\n' ||
' created_by INT REFERENCES ' || schema_name || '.users(id),' || E'\n' ||
' created_at TIMESTAMP DEFAULT NOW()' || E'\n' ||
');'
FROM (
VALUES
('accounting'), ('advertising'), ('aerospace'), ('agriculture'), ('analytics'),
('architecture'), ('automotive'), ('aviation'), ('banking'), ('biotechnology'),
('blockchain'), ('broadcasting'), ('construction'), ('consulting'), ('cybersecurity'),
('defense'), ('ecommerce'), ('education'), ('energy'), ('entertainment'),
('fashion'), ('finance'), ('fitness'), ('food_service'), ('gaming'),
('healthcare'), ('hospitality'), ('hr_management'), ('insurance'), ('iot'),
('legal'), ('logistics'), ('manufacturing'), ('marketing'), ('media'),
('mining'), ('music'), ('nonprofit'), ('pharmaceuticals'), ('real_estate'),
('retail'), ('robotics'), ('social_media'), ('software'), ('sports'),
('telecommunications'), ('tourism'), ('transportation'), ('utilities'), ('veterinary'),
('region_north_america'), ('region_south_america'), ('region_europe'), ('region_asia'), ('region_africa'),
('region_oceania'), ('country_usa'), ('country_canada'), ('country_mexico'), ('country_brazil'),
('country_argentina'), ('country_uk'), ('country_france'), ('country_germany'), ('country_spain'),
('country_italy'), ('country_russia'), ('country_china'), ('country_japan'), ('country_india'),
('country_australia'), ('country_south_africa'), ('state_california'), ('state_texas'), ('state_newyork'),
('state_florida'), ('city_london'), ('city_paris'), ('city_tokyo'), ('city_sydney'),
('city_dubai'), ('city_singapore'), ('city_mumbai'), ('city_toronto'), ('city_berlin'),
('city_madrid'), ('city_rome'), ('city_moscow'), ('city_beijing'), ('city_seoul'),
('zone_americas'), ('zone_emea'), ('zone_apac'), ('zone_latam'), ('zone_mena'),
('district_north'), ('district_south'), ('district_east'), ('district_west'), ('district_central'),
('dept_executive'), ('dept_operations'), ('dept_finance'), ('dept_hr'), ('dept_it'),
('dept_sales'), ('dept_marketing'), ('dept_engineering'), ('dept_design'), ('dept_product'),
('dept_customer_service'), ('dept_legal'), ('dept_compliance'), ('dept_security'), ('dept_facilities'),
('dept_procurement'), ('dept_logistics'), ('dept_warehouse'), ('dept_manufacturing'), ('dept_quality'),
('dept_research'), ('dept_development'), ('dept_innovation'), ('dept_training'), ('dept_consulting'),
('dept_business_intel'), ('dept_data_science'), ('dept_analytics'), ('dept_reporting'), ('dept_audit'),
('division_consumer'), ('division_enterprise'), ('division_government'), ('division_education'), ('division_healthcare'),
('unit_mobile'), ('unit_web'), ('unit_cloud'), ('unit_ai'), ('unit_blockchain'),
('team_backend'), ('team_frontend'), ('team_devops'), ('team_qa'), ('team_ux'),
('squad_alpha'), ('squad_beta'), ('squad_gamma'), ('squad_delta'), ('squad_epsilon'),
('sys_monitoring'), ('sys_logging'), ('sys_metrics'), ('sys_alerts'), ('sys_backups'),
('sys_security'), ('sys_audit'), ('sys_config'), ('sys_cache'), ('sys_queue'),
('api_v1'), ('api_v2'), ('api_v3'), ('api_internal'), ('api_external'),
('microservice_auth'), ('microservice_user'), ('microservice_order'), ('microservice_payment'), ('microservice_notification'),
('microservice_inventory'), ('microservice_catalog'), ('microservice_review'), ('microservice_shipping'), ('microservice_analytics'),
('data_raw'), ('data_processed'), ('data_aggregated'), ('data_archive'), ('data_temp'),
('etl_staging'), ('etl_transform'), ('etl_load'), ('warehouse_dim'), ('warehouse_fact'),
('ml_training'), ('ml_inference'), ('ml_models'), ('ml_features'), ('ml_experiments'),
('event_sourcing'), ('event_streams'), ('event_snapshots'), ('batch_processing'), ('realtime_processing'),
('integration_crm'), ('integration_erp'), ('integration_payment'), ('integration_shipping'), ('integration_email'),
('archive_2020'), ('archive_2021'), ('archive_2022'), ('archive_2023'), ('archive_2024'),
('vertical_retail'), ('vertical_manufacturing'), ('vertical_healthcare'), ('vertical_education'), ('vertical_government'),
('vertical_nonprofit'), ('vertical_startup'), ('vertical_enterprise'), ('vertical_smb'), ('vertical_freelancer'),
('industry_fintech'), ('industry_edtech'), ('industry_healthtech'), ('industry_proptech'), ('industry_agtech'),
('industry_cleantech'), ('industry_biotech'), ('industry_martech'), ('industry_hrtech'), ('industry_legaltech')
) AS schemas(schema_name)
\gexec
-- =============================================================================
-- BUSINESS DOMAIN SPECIFIC TABLES
-- =============================================================================
-- ECOMMERCE DOMAIN TABLES
CREATE TABLE ecommerce.products (
id SERIAL PRIMARY KEY,
sku VARCHAR(100) UNIQUE NOT NULL,
name VARCHAR(300) NOT NULL,
description TEXT,
category_id INT REFERENCES ecommerce.categories(id),
organization_id INT REFERENCES ecommerce.organizations(id),
brand VARCHAR(100),
price DECIMAL(12,2),
cost DECIMAL(12,2),
weight DECIMAL(10,3),
dimensions VARCHAR(100),
inventory_count INT DEFAULT 0,
min_stock_level INT DEFAULT 0,
is_digital BOOLEAN DEFAULT FALSE,
is_active BOOLEAN DEFAULT TRUE,
created_by INT REFERENCES ecommerce.users(id),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE ecommerce.customers (
id SERIAL PRIMARY KEY,
customer_number VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(200) UNIQUE NOT NULL,
first_name VARCHAR(100),
last_name VARCHAR(100),
phone VARCHAR(50),
birth_date DATE,
organization_id INT REFERENCES ecommerce.organizations(id),
location_id INT REFERENCES ecommerce.locations(id),
customer_since DATE DEFAULT CURRENT_DATE,
lifetime_value DECIMAL(15,2) DEFAULT 0,
loyalty_points INT DEFAULT 0,
preferred_language VARCHAR(10) DEFAULT 'en',
marketing_opt_in BOOLEAN DEFAULT TRUE,
is_vip BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE ecommerce.orders (
id SERIAL PRIMARY KEY,
order_number VARCHAR(100) UNIQUE NOT NULL,
customer_id INT REFERENCES ecommerce.customers(id),
organization_id INT REFERENCES ecommerce.organizations(id),
location_id INT REFERENCES ecommerce.locations(id),
order_date TIMESTAMP DEFAULT NOW(),
status VARCHAR(50) DEFAULT 'pending',
subtotal DECIMAL(12,2) DEFAULT 0,
tax_amount DECIMAL(10,2) DEFAULT 0,
shipping_cost DECIMAL(10,2) DEFAULT 0,
discount_amount DECIMAL(10,2) DEFAULT 0,
total_amount DECIMAL(12,2) DEFAULT 0,
currency VARCHAR(3) DEFAULT 'USD',
payment_method VARCHAR(50),
payment_status VARCHAR(30) DEFAULT 'pending',
shipping_method VARCHAR(100),
tracking_number VARCHAR(200),
notes TEXT,
processed_by INT REFERENCES ecommerce.users(id),
shipped_date TIMESTAMP,
delivered_date TIMESTAMP,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE ecommerce.order_items (
id SERIAL PRIMARY KEY,
order_id INT REFERENCES ecommerce.orders(id) ON DELETE CASCADE,
product_id INT REFERENCES ecommerce.products(id),
quantity INT NOT NULL CHECK (quantity > 0),
unit_price DECIMAL(10,2) NOT NULL,
total_price DECIMAL(12,2) NOT NULL,
discount_percent DECIMAL(5,2) DEFAULT 0,
tax_rate DECIMAL(5,4) DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW()
);
-- HEALTHCARE DOMAIN TABLES
CREATE TABLE healthcare.patients (
id SERIAL PRIMARY KEY,
patient_id VARCHAR(50) UNIQUE NOT NULL,
first_name VARCHAR(100) NOT NULL,
last_name VARCHAR(100) NOT NULL,
date_of_birth DATE NOT NULL,
gender VARCHAR(20),
blood_type VARCHAR(5),
phone VARCHAR(50),
email VARCHAR(200),
organization_id INT REFERENCES healthcare.organizations(id),
location_id INT REFERENCES healthcare.locations(id),
primary_doctor_id INT REFERENCES healthcare.users(id),
insurance_number VARCHAR(100),
emergency_contact VARCHAR(200),
emergency_phone VARCHAR(50),
allergies TEXT[],
chronic_conditions TEXT[],
preferred_language VARCHAR(10) DEFAULT 'en',
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE healthcare.appointments (
id SERIAL PRIMARY KEY,
patient_id INT REFERENCES healthcare.patients(id),
doctor_id INT REFERENCES healthcare.users(id),
organization_id INT REFERENCES healthcare.organizations(id),
location_id INT REFERENCES healthcare.locations(id),
appointment_date TIMESTAMP NOT NULL,
duration_minutes INT DEFAULT 30,
appointment_type VARCHAR(100),
status VARCHAR(30) DEFAULT 'scheduled',
reason TEXT,
diagnosis TEXT,
treatment_plan TEXT,
follow_up_required BOOLEAN DEFAULT FALSE,
follow_up_date DATE,
prescription TEXT,
notes TEXT,
created_by INT REFERENCES healthcare.users(id),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE healthcare.medical_records (
id SERIAL PRIMARY KEY,
patient_id INT REFERENCES healthcare.patients(id),
appointment_id INT REFERENCES healthcare.appointments(id),
doctor_id INT REFERENCES healthcare.users(id),
organization_id INT REFERENCES healthcare.organizations(id),
visit_date TIMESTAMP NOT NULL,
chief_complaint TEXT,
symptoms TEXT[],
diagnosis TEXT,
treatment TEXT,
medications TEXT[],
lab_results JSONB,
vitals JSONB,
follow_up_instructions TEXT,
created_by INT REFERENCES healthcare.users(id),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- FINANCIAL DOMAIN TABLES
CREATE TABLE finance.accounts (
id SERIAL PRIMARY KEY,
account_number VARCHAR(50) UNIQUE NOT NULL,
account_name VARCHAR(200) NOT NULL,
account_type VARCHAR(50) NOT NULL,
organization_id INT REFERENCES finance.organizations(id),
location_id INT REFERENCES finance.locations(id),
parent_account_id INT REFERENCES finance.accounts(id),
balance DECIMAL(15,2) DEFAULT 0,
currency VARCHAR(3) DEFAULT 'USD',
is_active BOOLEAN DEFAULT TRUE,
opened_date DATE DEFAULT CURRENT_DATE,
closed_date DATE,
description TEXT,
created_by INT REFERENCES finance.users(id),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE finance.transactions (
id SERIAL PRIMARY KEY,
transaction_number VARCHAR(100) UNIQUE NOT NULL,
account_id INT REFERENCES finance.accounts(id),
counterpart_account_id INT REFERENCES finance.accounts(id),
organization_id INT REFERENCES finance.organizations(id),
transaction_date TIMESTAMP DEFAULT NOW(),
transaction_type VARCHAR(50) NOT NULL,
amount DECIMAL(15,2) NOT NULL,
currency VARCHAR(3) DEFAULT 'USD',
exchange_rate DECIMAL(10,6) DEFAULT 1,
description TEXT,
reference_number VARCHAR(200),
status VARCHAR(30) DEFAULT 'completed',
reconciled BOOLEAN DEFAULT FALSE,
reconciled_date TIMESTAMP,
category VARCHAR(100),
tags TEXT[],
created_by INT REFERENCES finance.users(id),
approved_by INT REFERENCES finance.users(id),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- =============================================================================
-- GENERATE MASSIVE NUMBER OF LOAD TABLES WITH COMPLEX RELATIONSHIPS
-- =============================================================================
-- Create 50,000+ load tables using BULK generation for maximum speed
-- This generates all CREATE TABLE statements at once and executes them via \gexec
\echo '========================================='
\echo 'PHASE 3: Creating 50,000 load tables...'
\echo 'This may take several minutes but is much faster than the old approach!'
\echo '========================================='
-- Generate all 50,000 table creation statements in one massive SELECT
SELECT
'CREATE TABLE load_table_' || i || ' (' || E'\n' ||
' id SERIAL PRIMARY KEY,' || E'\n' ||
' load_id VARCHAR(50) UNIQUE NOT NULL DEFAULT ''LOAD_' || i || ''',' || E'\n' ||
' name VARCHAR(200) NOT NULL DEFAULT ''Load Test Item ' || i || ''',' || E'\n' ||
' category VARCHAR(100) DEFAULT ''Category_' || (i % 1000) || ''',' || E'\n' ||
' subcategory VARCHAR(100) DEFAULT ''Subcategory_' || (i % 100) || ''',' || E'\n' ||
'' || E'\n' ||
' -- Self-referential FK' || E'\n' ||
' parent_load_id INT,' || E'\n' ||
'' || E'\n' ||
' -- Cross-schema foreign keys to create complex relationships' || E'\n' ||
' ecommerce_product_id INT,' || E'\n' ||
' healthcare_patient_id INT,' || E'\n' ||
' finance_account_id INT,' || E'\n' ||
'' || E'\n' ||
' -- Reference to other load tables (circular dependencies)' || E'\n' ||
' related_load_1_id INT,' || E'\n' ||
' related_load_2_id INT,' || E'\n' ||
' related_load_3_id INT,' || E'\n' ||
'' || E'\n' ||
' -- Various data types for testing' || E'\n' ||
' status VARCHAR(50) DEFAULT CASE ' || i || ' % 5' || E'\n' ||
' WHEN 0 THEN ''active''' || E'\n' ||
' WHEN 1 THEN ''pending''' || E'\n' ||
' WHEN 2 THEN ''completed''' || E'\n' ||
' WHEN 3 THEN ''cancelled''' || E'\n' ||
' ELSE ''draft''' || E'\n' ||
' END,' || E'\n' ||
'' || E'\n' ||
' priority INT DEFAULT ' || (i % 10) || ',' || E'\n' ||
' score DECIMAL(10,4) DEFAULT random() * 1000,' || E'\n' ||
' percentage DECIMAL(5,2) DEFAULT random() * 100,' || E'\n' ||
'' || E'\n' ||
' -- Array and JSON columns for complexity' || E'\n' ||
' tags TEXT[] DEFAULT ARRAY[''tag' || (i % 50) || ''', ''category' || (i % 20) || '''],' || E'\n' ||
' metadata JSONB DEFAULT jsonb_build_object(' || E'\n' ||
' ''level'', ' || (i % 100) || ',' || E'\n' ||
' ''type'', ''load_test'',' || E'\n' ||
' ''batch'', ' || (i / 1000) || ',' || E'\n' ||
' ''table_id'', ' || i || ',' || E'\n' ||
' ''random_seed'', ' || (i * 37) || E'\n' ||
' ),' || E'\n' ||
'' || E'\n' ||
' -- Geographic data' || E'\n' ||
' coordinates POINT DEFAULT point((' || ((i * 13) % 360 - 180) || '), (' || ((i * 17) % 180 - 90) || ')),' || E'\n' ||
' region VARCHAR(50) DEFAULT ''region_' || (i % 10) || ''',' || E'\n' ||
'' || E'\n' ||
' -- Temporal data with variety' || E'\n' ||
' start_date DATE DEFAULT CURRENT_DATE - INTERVAL ''' || (i % 365) || ' days'',' || E'\n' ||
' end_date DATE DEFAULT CURRENT_DATE + INTERVAL ''' || (i % 30) || ' days'',' || E'\n' ||
' created_at TIMESTAMP DEFAULT NOW() - INTERVAL ''' || (i % 168) || ' hours'',' || E'\n' ||
' updated_at TIMESTAMP DEFAULT NOW() - INTERVAL ''' || (i % 60) || ' minutes'',' || E'\n' ||
'' || E'\n' ||
' -- Boolean flags for filtering tests' || E'\n' ||
' is_active BOOLEAN DEFAULT ' || (CASE WHEN i % 2 = 0 THEN 'TRUE' ELSE 'FALSE' END) || ',' || E'\n' ||
' is_featured BOOLEAN DEFAULT ' || (CASE WHEN i % 5 = 0 THEN 'TRUE' ELSE 'FALSE' END) || ',' || E'\n' ||
' is_premium BOOLEAN DEFAULT ' || (CASE WHEN i % 10 = 0 THEN 'TRUE' ELSE 'FALSE' END) || ',' || E'\n' ||
'' || E'\n' ||
' -- Text fields for search testing' || E'\n' ||
' description TEXT DEFAULT ''This is a comprehensive description for load test item number ' || i || '. It contains various keywords and phrases to test search functionality.'',' || E'\n' ||
' notes TEXT DEFAULT ''Additional notes and comments for item ' || i || ''',' || E'\n' ||
'' || E'\n' ||
' -- Numeric fields for aggregation testing' || E'\n' ||
' quantity INT DEFAULT ' || (i % 1000 + 1) || ',' || E'\n' ||
' price DECIMAL(12,2) DEFAULT ' || ((i * 7) % 10000 + 100)::DECIMAL(12,2) || ',' || E'\n' ||
' weight DECIMAL(10,3) DEFAULT ' || ((i * 11) % 100 + 1)::DECIMAL(10,3) || ',' || E'\n' ||
' volume DECIMAL(10,3) DEFAULT ' || ((i * 19) % 1000 + 10)::DECIMAL(10,3) || E'\n' ||
');'
FROM generate_series(1, 50000) AS s(i)
\gexec
\echo '========================================='
\echo 'PHASE 4: Adding foreign key constraints...'
\echo 'Adding 25,000+ self-referential constraints...'
\echo '========================================='
-- Add foreign key constraints AFTER table creation to avoid circular dependency issues
-- BULK generation of self-referential FKs for maximum speed
-- Generate all self-referential foreign key constraints at once
SELECT
'ALTER TABLE load_table_' || i || E'\n' ||
' ADD CONSTRAINT fk_parent_load_' || i || E'\n' ||
' FOREIGN KEY (parent_load_id) REFERENCES load_table_' || (((i - 1) % 50000) + 1) || '(id);'
FROM generate_series(2, 25000) AS s(i)
\gexec
\echo 'Adding 5,000+ cross-reference constraints (related_load_1_id)...'
-- BULK generation of cross-reference FKs (every 3rd table gets related_load_1_id FK)
SELECT
'ALTER TABLE load_table_' || i || E'\n' ||
' ADD CONSTRAINT fk_related_1_' || i || E'\n' ||
' FOREIGN KEY (related_load_1_id) REFERENCES load_table_' || (((i + 1000) % 50000) + 1) || '(id);'
FROM generate_series(1, 15000) AS s(i)
WHERE i % 3 = 0
\gexec
\echo 'Adding 3,000+ additional cross-reference constraints (related_load_2_id)...'
-- BULK generation of additional cross-reference FKs (every 5th table gets related_load_2_id FK)
SELECT
'ALTER TABLE load_table_' || i || E'\n' ||
' ADD CONSTRAINT fk_related_2_' || i || E'\n' ||
' FOREIGN KEY (related_load_2_id) REFERENCES load_table_' || (((i + 5000) % 50000) + 1) || '(id);'
FROM generate_series(1, 15000) AS s(i)
WHERE i % 5 = 0
\gexec
-- =============================================================================
-- CROSS-SCHEMA FOREIGN KEYS (After base data is inserted)
-- =============================================================================
-- Note: These will be added in the data file after we insert base records
-- =============================================================================
-- INDEXES FOR TESTING INDEX PERFORMANCE
-- =============================================================================
\echo '========================================='
\echo 'PHASE 5: Creating indexes for performance testing...'
\echo 'This creates thousands of indexes across different table types'
\echo '========================================='
-- BULK generation of indexes on various combinations of columns for maximum speed
\echo 'Creating 1,000+ name indexes...'
-- Create name indexes (every 10th table)
SELECT 'CREATE INDEX idx_load_' || i || '_name ON load_table_' || i || '(name);'
FROM generate_series(1, 10000) AS s(i)
WHERE i % 10 = 0
\gexec
\echo 'Creating 667+ category indexes...'
-- Create category indexes (every 15th table)
SELECT 'CREATE INDEX idx_load_' || i || '_category ON load_table_' || i || '(category);'
FROM generate_series(1, 10000) AS s(i)
WHERE i % 15 = 0
\gexec
\echo 'Creating 500+ composite status+priority indexes...'
-- Create composite status+priority indexes (every 20th table)
SELECT 'CREATE INDEX idx_load_' || i || '_status_priority ON load_table_' || i || '(status, priority);'
FROM generate_series(1, 10000) AS s(i)
WHERE i % 20 = 0
\gexec
\echo 'Creating 400+ temporal indexes on created_at...'
-- Create created_at indexes (every 25th table)
SELECT 'CREATE INDEX idx_load_' || i || '_created ON load_table_' || i || '(created_at);'
FROM generate_series(1, 10000) AS s(i)
WHERE i % 25 = 0
\gexec
\echo 'Creating 100+ GIN indexes for JSONB metadata columns...'
-- Create GIN indexes for JSONB metadata columns (every 50th table)
SELECT 'CREATE INDEX idx_load_' || i || '_metadata ON load_table_' || i || ' USING GIN(metadata);'
FROM generate_series(1, 5000) AS s(i)
WHERE i % 50 = 0
\gexec
-- =============================================================================
-- MATERIALIZED VIEWS FOR TESTING
-- =============================================================================
-- Create some materialized views that aggregate across multiple schemas
CREATE MATERIALIZED VIEW mv_cross_schema_summary AS
SELECT
'ecommerce' AS domain,
COUNT(*) AS total_records,
COUNT(DISTINCT organization_id) AS organizations,
MAX(created_at) AS latest_record
FROM ecommerce.products
UNION ALL
SELECT
'healthcare' AS domain,
COUNT(*) AS total_records,
COUNT(DISTINCT organization_id) AS organizations,
MAX(created_at) AS latest_record
FROM healthcare.patients
UNION ALL
SELECT
'finance' AS domain,
COUNT(*) AS total_records,
COUNT(DISTINCT organization_id) AS organizations,
MAX(created_at) AS latest_record
FROM finance.accounts;
-- Create summary view of load tables
CREATE MATERIALIZED VIEW mv_load_table_summary AS
SELECT
'load_tables' AS source,
COUNT(*) AS total_tables
FROM information_schema.tables
WHERE table_name LIKE 'load_table_%'
AND table_schema = 'public';
-- =============================================================================
-- PARTITIONED TABLES FOR TESTING (PostgreSQL 15 feature)
-- =============================================================================
-- Create a partitioned table for time-series data
CREATE TABLE analytics_events (
id BIGSERIAL,
event_name VARCHAR(100) NOT NULL,
user_id VARCHAR(50),
organization_id INT,
event_data JSONB,
created_at TIMESTAMP DEFAULT NOW()
) PARTITION BY RANGE (created_at);
-- Create partitions for different time periods
CREATE TABLE analytics_events_2023 PARTITION OF analytics_events
FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
CREATE TABLE analytics_events_2024 PARTITION OF analytics_events
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');
CREATE TABLE analytics_events_2025 PARTITION OF analytics_events
FOR VALUES FROM ('2025-01-01') TO ('2026-01-01');
-- =============================================================================
-- FINAL SUMMARY
-- =============================================================================
-- This schema creates:
-- - 170+ schemas with business/geographic/organizational structure
-- - 4+ core tables per schema = 680+ base tables
-- - 3 main domain tables (ecommerce, healthcare, finance) = ~15 tables
-- - 50,000 load_table_* tables with complex relationships
-- - Thousands of foreign key constraints
-- - Thousands of indexes
-- - Multiple materialized views
-- - Partitioned tables
\echo 'Ready for data population phase...'
-- Total: 50,000+ tables across 170+ schemas with massive interconnectivity
\echo '========================================='
\echo 'SCHEMA CREATION COMPLETE!'
\echo 'Created:'
\echo '- 170+ schemas with business/geographic/organizational structure'
\echo '- 680+ core entity tables (organizations, locations, users, categories)'
\echo '- 50,000 load test tables with complex relationships'
\echo '- 28,000+ foreign key constraints'
\echo '- 3,000+ indexes (B-tree, GIN, composite)'
\echo '- Multiple materialized views and partitioned tables'
\echo 'Total: 50,000+ tables across 170+ schemas'
\echo '========================================='
-- This should absolutely stress test any database management UI!

View File

@@ -1,15 +1,389 @@
SELECT format('INSERT INTO load_table_%s (char_name, job, level, weapon, spell, hp, mp, location, quest)
SELECT char_name, job, level, weapon, spell, hp, mp, location, quest FROM (
SELECT ''Cloud Strife'' AS char_name,
''Mercenary'' AS job,
floor(random() * 99 + 1)::INT AS level,
''Buster Sword'' AS weapon,
''Firaga'' AS spell,
floor(random() * 5000 + 1000)::INT AS hp,
floor(random() * 500 + 100)::INT AS mp,
''Midgar'' AS location,
''Save the Planet'' AS quest
FROM generate_series(1, 100)
) AS temp;', i)
FROM generate_series(1, 15000) AS s(i)
\gexec
-- PostgreSQL 15 INSANE Load Test Data
-- Populate 170+ schemas with millions of records and complex relationships
-- This will create an absolutely massive dataset to stress test any UI
-- =============================================================================
-- POPULATE CORE ENTITY TABLES IN ALL SCHEMAS USING BATCH APPROACH
-- =============================================================================
-- Simple approach: Use DO blocks to populate data in batches for better performance
-- First, defer constraint checking to handle self-referential foreign keys
\echo 'Starting data population - deferring constraint checking...'
SET CONSTRAINTS ALL DEFERRED;
DO $$
DECLARE
schema_names TEXT[] := ARRAY[
'accounting', 'advertising', 'aerospace', 'agriculture', 'analytics',
'architecture', 'automotive', 'aviation', 'banking', 'biotechnology',
'blockchain', 'broadcasting', 'construction', 'consulting', 'cybersecurity',
'defense', 'ecommerce', 'education', 'energy', 'entertainment',
'fashion', 'finance', 'fitness', 'food_service', 'gaming',
'healthcare', 'hospitality', 'hr_management', 'insurance', 'iot',
'legal', 'logistics', 'manufacturing', 'marketing', 'media',
'mining', 'music', 'nonprofit', 'pharmaceuticals', 'real_estate',
'retail', 'robotics', 'social_media', 'software', 'sports',
'telecommunications', 'tourism', 'transportation', 'utilities', 'veterinary',
'region_north_america', 'region_south_america', 'region_europe', 'region_asia', 'region_africa',
'region_oceania', 'country_usa', 'country_canada', 'country_mexico', 'country_brazil',
'country_argentina', 'country_uk', 'country_france', 'country_germany', 'country_spain',
'country_italy', 'country_russia', 'country_china', 'country_japan', 'country_india',
'country_australia', 'country_south_africa', 'state_california', 'state_texas', 'state_newyork',
'state_florida', 'city_london', 'city_paris', 'city_tokyo', 'city_sydney',
'city_dubai', 'city_singapore', 'city_mumbai', 'city_toronto', 'city_berlin',
'city_madrid', 'city_rome', 'city_moscow', 'city_beijing', 'city_seoul',
'zone_americas', 'zone_emea', 'zone_apac', 'zone_latam', 'zone_mena',
'district_north', 'district_south', 'district_east', 'district_west', 'district_central',
'dept_executive', 'dept_operations', 'dept_finance', 'dept_hr', 'dept_it',
'dept_sales', 'dept_marketing', 'dept_engineering', 'dept_design', 'dept_product',
'dept_customer_service', 'dept_legal', 'dept_compliance', 'dept_security', 'dept_facilities',
'dept_procurement', 'dept_logistics', 'dept_warehouse', 'dept_manufacturing', 'dept_quality',
'dept_research', 'dept_development', 'dept_innovation', 'dept_training', 'dept_consulting',
'dept_business_intel', 'dept_data_science', 'dept_analytics', 'dept_reporting', 'dept_audit',
'division_consumer', 'division_enterprise', 'division_government', 'division_education', 'division_healthcare',
'unit_mobile', 'unit_web', 'unit_cloud', 'unit_ai', 'unit_blockchain',
'team_backend', 'team_frontend', 'team_devops', 'team_qa', 'team_ux',
'squad_alpha', 'squad_beta', 'squad_gamma', 'squad_delta', 'squad_epsilon',
'sys_monitoring', 'sys_logging', 'sys_metrics', 'sys_alerts', 'sys_backups',
'sys_security', 'sys_audit', 'sys_config', 'sys_cache', 'sys_queue',
'api_v1', 'api_v2', 'api_v3', 'api_internal', 'api_external',
'microservice_auth', 'microservice_user', 'microservice_order', 'microservice_payment', 'microservice_notification',
'microservice_inventory', 'microservice_catalog', 'microservice_review', 'microservice_shipping', 'microservice_analytics',
'data_raw', 'data_processed', 'data_aggregated', 'data_archive', 'data_temp',
'etl_staging', 'etl_transform', 'etl_load', 'warehouse_dim', 'warehouse_fact',
'ml_training', 'ml_inference', 'ml_models', 'ml_features', 'ml_experiments',
'event_sourcing', 'event_streams', 'event_snapshots', 'batch_processing', 'realtime_processing',
'integration_crm', 'integration_erp', 'integration_payment', 'integration_shipping', 'integration_email',
'archive_2020', 'archive_2021', 'archive_2022', 'archive_2023', 'archive_2024',
'vertical_retail', 'vertical_manufacturing', 'vertical_healthcare', 'vertical_education', 'vertical_government',
'vertical_nonprofit', 'vertical_startup', 'vertical_enterprise', 'vertical_smb', 'vertical_freelancer',
'industry_fintech', 'industry_edtech', 'industry_healthtech', 'industry_proptech', 'industry_agtech',
'industry_cleantech', 'industry_biotech', 'industry_martech', 'industry_hrtech', 'industry_legaltech'
];
schema_name TEXT;
BEGIN
FOREACH schema_name IN ARRAY schema_names LOOP
RAISE NOTICE 'Populating data for schema: % (% of %)', schema_name, array_position(schema_names, schema_name), array_length(schema_names, 1);
-- Insert organizations
EXECUTE 'INSERT INTO ' || schema_name || '.organizations (name, code, type, status, parent_org_id)
SELECT
''' || initcap(replace(schema_name, '_', ' ')) || ' Org '' || i,
''' || upper(schema_name) || '_ORG_'' || i,
CASE i % 5 WHEN 0 THEN ''corporation'' WHEN 1 THEN ''partnership''
WHEN 2 THEN ''nonprofit'' WHEN 3 THEN ''government''
ELSE ''startup'' END,
CASE i % 4 WHEN 0 THEN ''active'' WHEN 1 THEN ''pending''
WHEN 2 THEN ''suspended'' ELSE ''inactive'' END,
CASE WHEN i > 10 THEN ((i - 1) % 10) + 1 ELSE NULL END
FROM generate_series(1, 100) AS s(i)';
-- Insert locations
EXECUTE 'INSERT INTO ' || schema_name || '.locations (name, address, city, region, country, postal_code, coordinates, timezone, organization_id)
SELECT
''' || initcap(replace(schema_name, '_', ' ')) || ' Location '' || i,
i || '' Main St'',
CASE i % 10 WHEN 0 THEN ''New York'' WHEN 1 THEN ''London''
WHEN 2 THEN ''Tokyo'' WHEN 3 THEN ''Sydney''
WHEN 4 THEN ''Paris'' WHEN 5 THEN ''Berlin''
WHEN 6 THEN ''Toronto'' WHEN 7 THEN ''Mumbai''
WHEN 8 THEN ''Dubai'' ELSE ''Singapore'' END,
CASE i % 5 WHEN 0 THEN ''North America'' WHEN 1 THEN ''Europe''
WHEN 2 THEN ''Asia'' WHEN 3 THEN ''Oceania''
ELSE ''Middle East'' END,
CASE i % 8 WHEN 0 THEN ''USA'' WHEN 1 THEN ''Canada''
WHEN 2 THEN ''UK'' WHEN 3 THEN ''Japan''
WHEN 4 THEN ''France'' WHEN 5 THEN ''Germany''
WHEN 6 THEN ''India'' ELSE ''Australia'' END,
lpad(((i * 123) % 100000)::text, 5, ''0''),
point((random() * 360 - 180), (random() * 180 - 90)),
CASE i % 4 WHEN 0 THEN ''UTC-5'' WHEN 1 THEN ''UTC+0''
WHEN 2 THEN ''UTC+9'' ELSE ''UTC-8'' END,
((i - 1) % 100) + 1
FROM generate_series(1, 200) AS s(i)';
-- Insert users
EXECUTE 'INSERT INTO ' || schema_name || '.users (username, email, first_name, last_name, role, department, organization_id, location_id, manager_id, hire_date, is_active, last_login)
SELECT
''' || schema_name || '_user_'' || i,
''user'' || i || ''@' || schema_name || '.com'',
CASE i % 10 WHEN 0 THEN ''John'' WHEN 1 THEN ''Jane''
WHEN 2 THEN ''Mike'' WHEN 3 THEN ''Sarah''
WHEN 4 THEN ''David'' WHEN 5 THEN ''Lisa''
WHEN 6 THEN ''Chris'' WHEN 7 THEN ''Emma''
WHEN 8 THEN ''James'' ELSE ''Amy'' END,
CASE i % 8 WHEN 0 THEN ''Smith'' WHEN 1 THEN ''Johnson''
WHEN 2 THEN ''Williams'' WHEN 3 THEN ''Brown''
WHEN 4 THEN ''Davis'' WHEN 5 THEN ''Miller''
WHEN 6 THEN ''Wilson'' ELSE ''Moore'' END,
CASE i % 6 WHEN 0 THEN ''Manager'' WHEN 1 THEN ''Developer''
WHEN 2 THEN ''Analyst'' WHEN 3 THEN ''Designer''
WHEN 4 THEN ''Engineer'' ELSE ''Specialist'' END,
CASE i % 5 WHEN 0 THEN ''Engineering'' WHEN 1 THEN ''Sales''
WHEN 2 THEN ''Marketing'' WHEN 3 THEN ''HR''
ELSE ''Operations'' END,
((i - 1) % 100) + 1,
((i - 1) % 200) + 1,
CASE WHEN i > 50 THEN ((i - 1) % 50) + 1 ELSE NULL END,
CURRENT_DATE - (i % 1000) * INTERVAL ''1 day'',
i % 10 != 0,
NOW() - (i % 168) * INTERVAL ''1 hour''
FROM generate_series(1, 500) AS s(i)';
-- Insert categories
EXECUTE 'INSERT INTO ' || schema_name || '.categories (name, description, parent_category_id, hierarchy_path, level_depth, sort_order, is_active, organization_id, created_by)
SELECT
''' || initcap(replace(schema_name, '_', ' ')) || ' Category '' || i,
''Description for category '' || i,
CASE WHEN i > 25 THEN ((i - 1) % 25) + 1 ELSE NULL END,
CASE WHEN i <= 20 THEN ''/cat_'' || i
ELSE ''/cat_'' || (((i - 1) % 20) + 1) || ''/cat_'' || i END,
CASE WHEN i <= 20 THEN 1 ELSE 2 END,
i,
i % 10 != 0,
((i - 1) % 100) + 1,
((i - 1) % 500) + 1
FROM generate_series(1, 100) AS s(i)';
END LOOP;
END $$;
-- Reset constraint checking to immediate after core entity population
\echo 'Core entity population complete - resetting constraints to immediate...'
SET CONSTRAINTS ALL IMMEDIATE;
-- =============================================================================
-- POPULATE DOMAIN-SPECIFIC TABLES WITH MASSIVE DATA
-- =============================================================================
\echo 'Populating ecommerce domain with 35,000 records...'
-- Populate ecommerce domain
INSERT INTO ecommerce.products (sku, name, description, category_id, organization_id, brand, price, cost, weight, inventory_count, is_active, created_by)
SELECT
'SKU-' || lpad(i::text, 8, '0'),
'Product ' || i,
'Description for product number ' || i || '. This is a comprehensive product description with various keywords.',
((i - 1) % 100) + 1,
((i - 1) % 100) + 1,
CASE i % 10 WHEN 0 THEN 'BrandA' WHEN 1 THEN 'BrandB' WHEN 2 THEN 'BrandC'
WHEN 3 THEN 'BrandD' WHEN 4 THEN 'BrandE' WHEN 5 THEN 'BrandF'
WHEN 6 THEN 'BrandG' WHEN 7 THEN 'BrandH' WHEN 8 THEN 'BrandI'
ELSE 'BrandJ' END,
(random() * 1000 + 10)::DECIMAL(12,2),
(random() * 500 + 5)::DECIMAL(12,2),
(random() * 50 + 0.1)::DECIMAL(10,3),
(random() * 1000)::INT,
i % 20 != 0,
((i - 1) % 500) + 1
FROM generate_series(1, 10000) AS s(i);
INSERT INTO ecommerce.customers (customer_number, email, first_name, last_name, phone, organization_id, location_id, lifetime_value, loyalty_points, is_vip)
SELECT
'CUST-' || lpad(i::text, 8, '0'),
'customer' || i || '@email.com',
CASE i % 15 WHEN 0 THEN 'Alice' WHEN 1 THEN 'Bob' WHEN 2 THEN 'Carol'
WHEN 3 THEN 'Dave' WHEN 4 THEN 'Eve' WHEN 5 THEN 'Frank'
WHEN 6 THEN 'Grace' WHEN 7 THEN 'Henry' WHEN 8 THEN 'Ivy'
WHEN 9 THEN 'Jack' WHEN 10 THEN 'Kate' WHEN 11 THEN 'Leo'
WHEN 12 THEN 'Mia' WHEN 13 THEN 'Nick' ELSE 'Olivia' END,
CASE i % 12 WHEN 0 THEN 'Anderson' WHEN 1 THEN 'Baker' WHEN 2 THEN 'Clark'
WHEN 3 THEN 'Davis' WHEN 4 THEN 'Evans' WHEN 5 THEN 'Fisher'
WHEN 6 THEN 'Green' WHEN 7 THEN 'Harris' WHEN 8 THEN 'Jackson'
WHEN 9 THEN 'King' WHEN 10 THEN 'Lee' ELSE 'Martin' END,
'+1-555-' || lpad((i % 10000)::text, 4, '0'),
((i - 1) % 100) + 1,
((i - 1) % 200) + 1,
(random() * 50000)::DECIMAL(15,2),
(random() * 10000)::INT,
i % 50 = 0
FROM generate_series(1, 25000) AS s(i);
\echo 'Populating healthcare domain with 15,000 records...'
-- Healthcare domain
INSERT INTO healthcare.patients (patient_id, first_name, last_name, date_of_birth, gender, blood_type, phone, email, organization_id, location_id, primary_doctor_id, allergies, chronic_conditions, is_active)
SELECT
'PAT-' || lpad(i::text, 8, '0'),
CASE i % 12 WHEN 0 THEN 'Michael' WHEN 1 THEN 'Sarah' WHEN 2 THEN 'David'
WHEN 3 THEN 'Jennifer' WHEN 4 THEN 'Robert' WHEN 5 THEN 'Lisa'
WHEN 6 THEN 'William' WHEN 7 THEN 'Nancy' WHEN 8 THEN 'Richard'
WHEN 9 THEN 'Karen' WHEN 10 THEN 'Charles' ELSE 'Betty' END,
CASE i % 10 WHEN 0 THEN 'Johnson' WHEN 1 THEN 'Williams' WHEN 2 THEN 'Brown'
WHEN 3 THEN 'Jones' WHEN 4 THEN 'Garcia' WHEN 5 THEN 'Miller'
WHEN 6 THEN 'Davis' WHEN 7 THEN 'Rodriguez' WHEN 8 THEN 'Martinez'
ELSE 'Hernandez' END,
DATE '1950-01-01' + (i % 25000) * INTERVAL '1 day',
CASE i % 3 WHEN 0 THEN 'M' WHEN 1 THEN 'F' ELSE 'O' END,
CASE i % 8 WHEN 0 THEN 'A+' WHEN 1 THEN 'A-' WHEN 2 THEN 'B+'
WHEN 3 THEN 'B-' WHEN 4 THEN 'AB+' WHEN 5 THEN 'AB-'
WHEN 6 THEN 'O+' ELSE 'O-' END,
'+1-555-' || lpad((i % 10000)::text, 4, '0'),
'patient' || i || '@healthcare.com',
((i - 1) % 100) + 1,
((i - 1) % 200) + 1,
((i - 1) % 500) + 1,
CASE i % 5 WHEN 0 THEN ARRAY['peanuts', 'shellfish']
WHEN 1 THEN ARRAY['dairy']
WHEN 2 THEN ARRAY['pollen', 'dust']
ELSE ARRAY[]::TEXT[] END,
CASE i % 6 WHEN 0 THEN ARRAY['diabetes', 'hypertension']
WHEN 1 THEN ARRAY['asthma']
WHEN 2 THEN ARRAY['arthritis']
ELSE ARRAY[]::TEXT[] END,
i % 15 != 0
FROM generate_series(1, 15000) AS s(i);
\echo 'Populating finance domain with 20,000 records...'
-- Finance domain
INSERT INTO finance.accounts (account_number, account_name, account_type, organization_id, location_id, balance, currency, is_active, created_by)
SELECT
'ACC-' || lpad(i::text, 10, '0'),
'Account ' || i,
CASE i % 5 WHEN 0 THEN 'checking' WHEN 1 THEN 'savings' WHEN 2 THEN 'investment'
WHEN 3 THEN 'loan' ELSE 'credit' END,
((i - 1) % 100) + 1,
((i - 1) % 200) + 1,
(random() * 1000000 - 500000)::DECIMAL(15,2),
CASE i % 4 WHEN 0 THEN 'USD' WHEN 1 THEN 'EUR' WHEN 2 THEN 'GBP' ELSE 'JPY' END,
i % 20 != 0,
((i - 1) % 500) + 1
FROM generate_series(1, 20000) AS s(i);
-- =============================================================================
-- CREATE CROSS-SCHEMA FOREIGN KEY RELATIONSHIPS
-- =============================================================================
\echo 'Creating cross-schema foreign key relationships...'
-- Add some cross-schema foreign keys after ALL domain data is populated
-- Note: We do this carefully to avoid constraint violations
-- Link some load tables to ecommerce products (now that ecommerce.products exists)
DO $$
DECLARE
i INTEGER;
max_product_id INTEGER;
max_patient_id INTEGER;
max_account_id INTEGER;
BEGIN
-- Get maximum IDs from domain tables to avoid FK violations
SELECT COALESCE(MAX(id), 0) INTO max_product_id FROM ecommerce.products;
SELECT COALESCE(MAX(id), 0) INTO max_patient_id FROM healthcare.patients;
SELECT COALESCE(MAX(id), 0) INTO max_account_id FROM finance.accounts;
-- Only proceed if we have data to reference
IF max_product_id > 0 THEN
FOR i IN 1..1000 LOOP
IF i % 100 = 0 THEN
EXECUTE 'UPDATE load_table_' || i || '
SET ecommerce_product_id = ' || ((i % max_product_id) + 1) || '
WHERE id <= 10';
END IF;
END LOOP;
END IF;
-- Link some load tables to healthcare patients
IF max_patient_id > 0 THEN
FOR i IN 1..500 LOOP
IF i % 50 = 0 THEN
EXECUTE 'UPDATE load_table_' || i || '
SET healthcare_patient_id = ' || ((i % max_patient_id) + 1) || '
WHERE id <= 5';
END IF;
END LOOP;
END IF;
-- Link some load tables to finance accounts
IF max_account_id > 0 THEN
FOR i IN 1..500 LOOP
IF i % 75 = 0 THEN
EXECUTE 'UPDATE load_table_' || i || '
SET finance_account_id = ' || ((i % max_account_id) + 1) || '
WHERE id <= 5';
END IF;
END LOOP;
END IF;
END $$;
-- =============================================================================
-- POPULATE LOAD TABLES WITH MASSIVE AMOUNTS OF DATA IN BATCHES
-- =============================================================================
\echo 'Populating 10,000 load tables with 1,000,000+ records (this may take several minutes)...'
-- Populate first 10,000 load tables with data (in batches to avoid memory issues)
DO $$
DECLARE
batch_start INTEGER;
batch_end INTEGER;
i INTEGER;
BEGIN
FOR batch_start IN 1..10000 BY 500 LOOP
batch_end := LEAST(batch_start + 499, 10000);
RAISE NOTICE 'Populating load tables % to %', batch_start, batch_end;
FOR i IN batch_start..batch_end LOOP
EXECUTE 'INSERT INTO load_table_' || i || ' (load_id, name, category, subcategory, status, priority, score, tags, metadata, region, quantity, price, weight, volume, description, notes)
SELECT
''LOAD_' || i || '_'' || j,
''Load Item '' || j,
''Category_'' || (j % 100),
''Subcategory_'' || (j % 20),
CASE j % 5 WHEN 0 THEN ''active'' WHEN 1 THEN ''pending''
WHEN 2 THEN ''completed'' WHEN 3 THEN ''cancelled''
ELSE ''draft'' END,
j % 10,
random() * 1000,
ARRAY[''tag'' || (j % 50), ''category'' || (j % 20)],
jsonb_build_object(
''level'', j % 100,
''type'', ''load_test'',
''batch'', ' || i || ',
''random_value'', random()
),
''region_'' || (j % 10),
(j % 1000) + 1,
(random() * 10000)::DECIMAL(12,2),
(random() * 100)::DECIMAL(10,3),
(random() * 1000)::DECIMAL(10,3),
''Comprehensive description for load test item in table ' || i || ' number '' || j,
''Additional notes for item '' || j
FROM generate_series(1, 100) AS s(j)';
END LOOP;
COMMIT;
END LOOP;
END $$;
-- =============================================================================
-- SUMMARY OF MASSIVE DATA CREATION
-- =============================================================================
-- This creates:
-- - 170+ schemas, each with 100 organizations, 200 locations, 500 users, 100 categories
-- - 85,000+ organizations total
-- - 34,000+ locations total
-- - 85,000+ users total
-- - 17,000+ categories total
-- - 10,000 ecommerce products
-- - 25,000 ecommerce customers
-- - 15,000 healthcare patients
-- - 20,000 finance accounts
-- - 10,000 populated load tables with 100 records each = 1,000,000 load records
-- - Plus 40,000 additional empty load tables (50,000 total)
--
-- Grand total: 1,000,000+ records across 50,000+ tables in 170+ schemas
-- This should absolutely demolish any database management UI that can't handle scale!
\echo '========================================='
\echo 'DATA POPULATION COMPLETE!'
\echo 'Successfully created:'
\echo '- 170+ schemas with 85,000+ organizations, 34,000+ locations, 85,000+ users, 17,000+ categories'
\echo '- 10,000 ecommerce products + 25,000 customers'
\echo '- 15,000 healthcare patients'
\echo '- 20,000 finance accounts'
\echo '- 1,000,000+ records in 10,000 populated load tables'
\echo '- Cross-schema foreign key relationships'
\echo 'Total: 1,200,000+ records across 50,000+ tables!'
\echo '========================================='

View File

@@ -59,6 +59,8 @@ services:
- ./dev/docker_psql_load_test_init:/docker-entrypoint-initdb.d
ports:
- 5435:5432
shm_size: 2gb
command: postgres -c shared_preload_libraries=pg_stat_statements -c max_connections=1000 -c shared_buffers=512MB -c work_mem=16MB
psql15:
image: postgres:15