SSB Benchmark性能测试
Star Schema Benchmark(以下简称SSB)是一个基于TPC-H的轻量级数据仓库场景下的星型模型测试集。该测试集为比较各类OLAP产品的基本性能指标提供了便利。本文为您介绍云原生数据仓库 AnalyticDB PostgreSQL 版在SSB 100 GB测试集上的性能表现。
前提条件
云原生数据仓库 AnalyticDB PostgreSQL 版实例和ECS实例位于同一VPC,详情请参见创建实例、创建ECS。
云原生数据仓库 AnalyticDB PostgreSQL 版实例和OSS存储空间Bucket位于同一地域,详情请参见创建存储空间。
已将ECS实例的私网IP地址添加到云原生数据仓库 AnalyticDB PostgreSQL 版实例的白名单中,详情请参见设置白名单。
已在ECS上安装psql,如需安装请参见psql。
使用阿里云账号或具备
AliyunGPDBFullAccess
、AliyunECSFullAccess
和AliyunOSSFullAccess
权限的RAM用户,并为其创建AccessKey ID和AccessKey Secret。详情请参见RAM用户、创建AccessKey。
测试环境
云原生数据仓库 AnalyticDB PostgreSQL 版实例规格 | ECS实例规格 |
|
|
测试数据生成
本次测试使用SSB提供的toolkit生成SSB数据。
完成工具的编译。
git clone https://github.com/vadimtk/ssb-dbgen.git cd ssb-dbgen make
生成100 GB测试数据。
FOR((i=1;i<=16;i++)); DO ./dbgen -s 100 -S $i -C 16 -f -T l & IF [ $(($i%16)) -eq 0 ] THEN wait echo $i FI DONE ./dbgen -s 100 -T p ./dbgen -s 100 -T c ./dbgen -s 100 -T s
使用ossutil将测试数据上传至OSS存储空间。ossutil工具的安装与使用,请参见命令行工具ossutil。
./ossutil64 cp -r <测试数据所在的路径> <OSS Bucket路径> --exclude "*.tbl*"
示例如下。
./ossutil64 cp -r /mnt/dbqgen/ oss://testBucketName --exclude "*.tbl*"
创建数据表。
-- 创建对应表 CREATE TABLE customer --客户信息表 ( C_CUSTKEY int, C_NAME text, C_ADDRESS text, C_CITY text, C_NATION text, C_REGION text, C_PHONE text, C_MKTSEGMENT text ) USING beam WITH(compresstype=auto, compresslevel=1) DISTRIBUTED BY(C_CUSTKEY) ORDER BY (C_CUSTKEY); CREATE TABLE lineorder --商品订单明细表 ( LO_ORDERKEY bigint, LO_LINENUMBER int, LO_CUSTKEY int, LO_PARTKEY int, LO_SUPPKEY int, LO_ORDERDATE Date, LO_ORDERPRIORITY text, LO_SHIPPRIORITY int, LO_QUANTITY int, LO_EXTENDEDPRICE int, LO_ORDTOTALPRICE int, LO_DISCOUNT int, LO_REVENUE int, LO_SUPPLYCOST int, LO_TAX int, LO_COMMITDATE Date, LO_SHIPMODE text ) USING beam WITH(compresstype=auto,compresslevel=1) DISTRIBUTED BY(LO_ORDERKEY) ORDER BY (LO_ORDERDATE, LO_ORDERKEY); CREATE TABLE part --零件信息表 ( P_PARTKEY int, P_NAME text, P_MFGR text, P_CATEGORY text, P_BRAND text, P_COLOR text, P_TYPE text, P_SIZE int, P_CONTAINER text ) USING beam WITH(compresstype=auto,compresslevel=1) DISTRIBUTED BY(P_PARTKEY) ORDER BY (P_PARTKEY); CREATE TABLE supplier --供应商信息表 ( S_SUPPKEY int, S_NAME text, S_ADDRESS text, S_CITY text, S_NATION text, S_REGION text, S_PHONE text ) USING beam WITH(compresstype=auto,compresslevel=1) DISTRIBUTED BY(S_SUPPKEY) ORDER BY (S_SUPPKEY);
创建OSS Foreign Table。利用OSS Foreign Table将OSS上的测试数据导入步骤4创建的表。
Endpoint
和Bucket
需指定为测试数据所在的OSS Bucket名称和Endpoint。获取Endpoint
和Bucket
请参见获取OSS Bucket信息。ID
和Key
即为阿里云账号或RAM用户的AccessKey ID和AccessKey Secret。获取AccessKey ID和AccessKey Secret请参见创建AccessKey。
CREATE SERVER oss_serv FOREIGN DATA WRAPPER oss_fdw OPTIONS ( endpoint 'oss-cn-hang****.aliyuncs.com', bucket 'testBucketName' ); CREATE USER MAPPING FOR PUBLIC SERVER oss_serv OPTIONS ( id 'LTAI5t5uGDuSBRXd6GLu****', key 'qlPCnIB2g8c3YbPWpHvMyNC5PX****' ); CREATE FOREIGN TABLE ext_customer ( C_CUSTKEY int, C_NAME text, C_ADDRESS text, C_CITY text, C_NATION text, C_REGION text, C_PHONE text, C_MKTSEGMENT text ) server oss_serv options ( filepath 'ssb100g/customer.tbl', format 'csv', DELIMITER '|' ); CREATE FOREIGN TABLE ext_lineorder ( LO_ORDERKEY bigint, LO_LINENUMBER int, LO_CUSTKEY int, LO_PARTKEY int, LO_SUPPKEY int, LO_ORDERDATE Date, LO_ORDERPRIORITY text, LO_SHIPPRIORITY int, LO_QUANTITY int, LO_EXTENDEDPRICE int, LO_ORDTOTALPRICE int, LO_DISCOUNT int, LO_REVENUE int, LO_SUPPLYCOST int, LO_TAX int, LO_COMMITDATE Date, LO_SHIPMODE text ) server oss_serv options ( prefix 'ssb100g/lineorder_', format 'csv', DELIMITER '|' ); CREATE FOREIGN TABLE ext_part ( P_PARTKEY int, P_NAME text, P_MFGR text, P_CATEGORY text, P_BRAND text, P_COLOR text, P_TYPE text, P_SIZE int, P_CONTAINER text ) server oss_serv options ( filepath 'ssb100g/part.tbl', format 'csv', DELIMITER '|' ); CREATE FOREIGN TABLE ext_supplier ( S_SUPPKEY int, S_NAME text, S_ADDRESS text, S_CITY text, S_NATION text, S_REGION text, S_PHONE text ) server oss_serv options ( filepath 'ssb100g/supplier.tbl', format 'csv', DELIMITER '|' ); --导入数据 INSERT INTO part SELECT * FROM ext_part; INSERT INTO supplier SELECT * FROM ext_supplier; INSERT INTO customer SELECT * FROM ext_customer; INSERT INTO lineorder SELECT * FROM ext_lineorder; --删除外表 DROP EXTERNAL TABLE ext_part; DROP EXTERNAL TABLE ext_supplier; DROP EXTERNAL TABLE ext_customer; DROP EXTERNAL TABLE ext_lineorder;
关联以上四张基础表生成flat表的数据。
CREATE TABLE lineorder_flat ( LO_ORDERKEY bigint NOT NULL, LO_LINENUMBER int NOT NULL, LO_CUSTKEY int NOT NULL, LO_PARTKEY int NOT NULL, LO_SUPPKEY int NOT NULL, LO_ORDERDATE Date NOT NULL, LO_ORDERPRIORITY text NOT NULL, LO_SHIPPRIORITY int NOT NULL, LO_QUANTITY int NOT NULL, LO_EXTENDEDPRICE int NOT NULL, LO_ORDTOTALPRICE int NOT NULL, LO_DISCOUNT int NOT NULL, LO_REVENUE int NOT NULL, LO_SUPPLYCOST int NOT NULL, LO_TAX int NOT NULL, LO_COMMITDATE Date NOT NULL, LO_SHIPMODE text NOT NULL, C_NAME text NOT NULL, C_ADDRESS text NOT NULL, C_CITY text NOT NULL, C_NATION text NOT NULL, C_REGION text NOT NULL, C_PHONE text NOT NULL, C_MKTSEGMENT text NOT NULL, S_NAME text NOT NULL, S_ADDRESS text NOT NULL, S_CITY text NOT NULL, S_NATION text NOT NULL, S_REGION text NOT NULL, S_PHONE text NOT NULL, P_NAME text NOT NULL, P_MFGR text NOT NULL, P_CATEGORY text NOT NULL, P_BRAND text NOT NULL, P_COLOR text NOT NULL, P_TYPE text NOT NULL, P_SIZE int NOT NULL, P_CONTAINER text NOT NULL ) USING beam WITH(compresstype=auto, compresslevel=1) DISTRIBUTED BY(LO_ORDERKEY) ORDER BY(LO_ORDERDATE, LO_ORDERKEY); INSERT INTO lineorder_flat SELECT l.LO_ORDERKEY AS LO_ORDERKEY, l.LO_LINENUMBER AS LO_LINENUMBER, l.LO_CUSTKEY AS LO_CUSTKEY, l.LO_PARTKEY AS LO_PARTKEY, l.LO_SUPPKEY AS LO_SUPPKEY, l.LO_ORDERDATE AS LO_ORDERDATE, l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, l.LO_QUANTITY AS LO_QUANTITY, l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, l.LO_DISCOUNT AS LO_DISCOUNT, l.LO_REVENUE AS LO_REVENUE, l.LO_SUPPLYCOST AS LO_SUPPLYCOST, l.LO_TAX AS LO_TAX, l.LO_COMMITDATE AS LO_COMMITDATE, l.LO_SHIPMODE AS LO_SHIPMODE, c.C_NAME AS C_NAME, c.C_ADDRESS AS C_ADDRESS, c.C_CITY AS C_CITY, c.C_NATION AS C_NATION, c.C_REGION AS C_REGION, c.C_PHONE AS C_PHONE, c.C_MKTSEGMENT AS C_MKTSEGMENT, s.S_NAME AS S_NAME, s.S_ADDRESS AS S_ADDRESS, s.S_CITY AS S_CITY, s.S_NATION AS S_NATION, s.S_REGION AS S_REGION, s.S_PHONE AS S_PHONE, p.P_NAME AS P_NAME, p.P_MFGR AS P_MFGR, p.P_CATEGORY AS P_CATEGORY, p.P_BRAND AS P_BRAND, p.P_COLOR AS P_COLOR, p.P_TYPE AS P_TYPE, p.P_SIZE AS P_SIZE, p.P_CONTAINER AS P_CONTAINER FROM lineorder AS l INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
在执行SQL后,后台会对该表采集统计信息。统计信息采集完成后,可开始测试查询。如果以下查询不返回结果,说明统计信息已采集完成。
SELECT datname, query, state FROM pg_stat_activity WHERE state <> 'idle' AND backend_type LIKE '%autovacuum%';
测试查询
\timing
--Q1.1
SELECT SUM(lo_extendedprice * lo_discount) AS revenue
FROM lineorder_flat
WHERE lo_orderdate >= '1993-01-01' AND lo_orderdate <= '1993-12-31'
AND lo_discount BETWEEN 1 AND 3 AND lo_quantity < 25;
--Q1.2
SELECT SUM(lo_extendedprice * lo_discount) AS revenue
FROM lineorder_flat
WHERE lo_orderdate >= '1994-01-01' AND lo_orderdate <= '1994-01-31'
AND lo_discount BETWEEN 4 AND 6 AND lo_quantity BETWEEN 26 AND 35;
--Q1.3
SELECT SUM(lo_extendedprice * lo_discount) AS revenue
FROM lineorder_flat
WHERE extract(week FROM lo_orderdate) = 6 AND lo_orderdate >= '1994-01-01'
AND lo_orderdate <= '1994-12-31' AND lo_discount BETWEEN 5 AND 7
AND lo_quantity BETWEEN 26 AND 35;
--Q2.1
SELECT SUM(lo_revenue), extract(YEAR FROM lo_orderdate) AS YEAR, p_brand
FROM lineorder_flat
WHERE p_category = 'MFGR#12' AND s_region = 'AMERICA'
GROUP BY YEAR, p_brand
ORDER BY YEAR, p_brand
LIMIT 100;
--Q2.2
SELECT SUM(lo_revenue), extract(YEAR FROM lo_orderdate) AS YEAR, p_brand
FROM lineorder_flat
WHERE p_brand >= 'MFGR#2221' AND p_brand <= 'MFGR#2228' AND s_region = 'ASIA'
GROUP BY YEAR, p_brand
ORDER BY YEAR, p_brand
LIMIT 100;
--Q2.3
SELECT SUM(lo_revenue), extract(YEAR FROM lo_orderdate) AS YEAR, p_brand
FROM lineorder_flat
WHERE p_brand = 'MFGR#2239' AND s_region = 'EUROPE'
GROUP BY YEAR, p_brand
ORDER BY YEAR, p_brand
LIMIT 100;
--Q3.1
SELECT c_nation, s_nation, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_region = 'ASIA' AND s_region = 'ASIA' AND lo_orderdate >= '1992-01-01'
AND lo_orderdate <= '1997-12-31'
GROUP BY c_nation, s_nation, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q3.2
SELECT c_city, s_city, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_nation = 'UNITED STATES' AND s_nation = 'UNITED STATES'
AND lo_orderdate >= '1992-01-01' AND lo_orderdate <= '1997-12-31'
GROUP BY c_city, s_city, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q3.3
SELECT c_city, s_city, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_city IN ( 'UNITED KI1' ,'UNITED KI5')
AND s_city IN ( 'UNITED KI1' ,'UNITED KI5')
AND lo_orderdate >= '1992-01-01' AND lo_orderdate <= '1997-12-31'
GROUP BY c_city, s_city, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q3.4
SELECT c_city, s_city, extract(YEAR FROM lo_orderdate) AS YEAR,
SUM(lo_revenue) AS revenue
FROM lineorder_flat
WHERE c_city IN ('UNITED KI1', 'UNITED KI5')
AND s_city IN ( 'UNITED KI1', 'UNITED KI5')
AND lo_orderdate >= '1997-12-01' AND lo_orderdate <= '1997-12-31'
GROUP BY c_city, s_city, YEAR
ORDER BY YEAR ASC, revenue DESC
LIMIT 100;
--Q4.1
SELECT EXTRACT(YEAR FROM lo_orderdate) AS YEAR, c_nation,
SUM(lo_revenue - lo_supplycost) AS profit
FROM lineorder_flat
WHERE c_region = 'AMERICA' AND s_region = 'AMERICA'
AND p_mfgr IN ('MFGR#1', 'MFGR#2')
GROUP BY YEAR, c_nation
ORDER BY YEAR ASC,c_nation ASC LIMIT 100;
--Q4.2
SELECT extract(YEAR FROM lo_orderdate) AS YEAR, s_nation, p_category,
SUM(lo_revenue - lo_supplycost) AS profit
FROM lineorder_flat
WHERE c_region = 'AMERICA' AND s_region = 'AMERICA'
AND lo_orderdate >= '1997-01-01' AND lo_orderdate <= '1998-12-31'
AND p_mfgr IN ( 'MFGR#1' , 'MFGR#2')
GROUP BY YEAR, s_nation, p_category
ORDER BY YEAR ASC, s_nation ASC, p_category ASC
LIMIT 100;
--Q4.3
SELECT extract(YEAR FROM lo_orderdate) AS YEAR, s_city, p_brand,
SUM(lo_revenue - lo_supplycost) AS profit
FROM lineorder_flat
WHERE s_nation = 'UNITED STATES' AND lo_orderdate >= '1997-01-01'
AND lo_orderdate <= '1998-12-31' AND p_category = 'MFGR#14'
GROUP BY YEAR, s_city, p_brand
ORDER BY YEAR ASC, s_city ASC, p_brand ASC
LIMIT 100;
测试结果
测试查询所需时间如下。
Query | Time(ms) |
SUM | 6755.965 |
Q1.1 | 198.882 |
Q1.2 | 131.133 |
Q1.3 | 166.825 |
Q2.1 | 716.363 |
Q2.2 | 690.485 |
Q2.3 | 413.219 |
Q3.1 | 1119.772 |
Q3.2 | 772.267 |
Q3.3 | 441.492 |
Q3.4 | 43.653 |
Q4.1 | 1315.484 |
Q4.2 | 491.881 |
Q4.3 | 254.509 |