文档

TPC-DS测试集

更新时间:

本文介绍TPC-DS性能测试中将执行99个查询SQL,以下查询SQL源于TPC-DS基准,但不能等同于已发布的TPC-DS基准结果。

本文给出其中10个示例,如果需要全部查询SQL语句,请下载TPC-DS测试语句

  • SQL1

    WITH customer_total_return AS (
            SELECT sr_customer_sk AS ctr_customer_sk, sr_store_sk AS ctr_store_sk, SUM(SR_RETURN_AMT) AS ctr_total_return
            FROM store_returns, date_dim
            WHERE sr_returned_date_sk = d_date_sk
                AND d_year = 2000
            GROUP BY sr_customer_sk, sr_store_sk
        )
    SELECT c_customer_id
    FROM customer_total_return ctr1, store, customer
    WHERE ctr1.ctr_total_return > (
            SELECT AVG(ctr_total_return) * 1.2
            FROM customer_total_return ctr2
            WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk
        )
        AND s_store_sk = ctr1.ctr_store_sk
        AND s_state = 'TN'
        AND ctr1.ctr_customer_sk = c_customer_sk
    ORDER BY c_customer_id
    LIMIT 100;
  • SQL2

    WITH wscs AS (
      SELECT sold_date_sk, sales_price
      FROM (
       SELECT ws_sold_date_sk AS sold_date_sk, ws_ext_sales_price AS sales_price
       FROM web_sales
       UNION ALL
       SELECT cs_sold_date_sk AS sold_date_sk, cs_ext_sales_price AS sales_price
       FROM catalog_sales
      )
     ),
     wswscs AS (
      SELECT d_week_seq, SUM(CASE
        WHEN d_day_name = 'Sunday' THEN sales_price
        ELSE NULL
       END) AS sun_sales, SUM(CASE
        WHEN d_day_name = 'Monday' THEN sales_price
        ELSE NULL
       END) AS mon_sales
       , SUM(CASE
        WHEN d_day_name = 'Tuesday' THEN sales_price
        ELSE NULL
       END) AS tue_sales, SUM(CASE
        WHEN d_day_name = 'Wednesday' THEN sales_price
        ELSE NULL
       END) AS wed_sales
       , SUM(CASE
        WHEN d_day_name = 'Thursday' THEN sales_price
        ELSE NULL
       END) AS thu_sales, SUM(CASE
        WHEN d_day_name = 'Friday' THEN sales_price
        ELSE NULL
       END) AS fri_sales
       , SUM(CASE
        WHEN d_day_name = 'Saturday' THEN sales_price
        ELSE NULL
       END) AS sat_sales
      FROM wscs, date_dim
      WHERE d_date_sk = sold_date_sk
      GROUP BY d_week_seq
     )
    SELECT d_week_seq1, round(sun_sales1 / sun_sales2, 2)
     , round(mon_sales1 / mon_sales2, 2)
     , round(tue_sales1 / tue_sales2, 2)
     , round(wed_sales1 / wed_sales2, 2)
     , round(thu_sales1 / thu_sales2, 2)
     , round(fri_sales1 / fri_sales2, 2)
     , round(sat_sales1 / sat_sales2, 2)
    FROM (
     SELECT wswscs.d_week_seq AS d_week_seq1, sun_sales AS sun_sales1, mon_sales AS mon_sales1, tue_sales AS tue_sales1, wed_sales AS wed_sales1
      , thu_sales AS thu_sales1, fri_sales AS fri_sales1, sat_sales AS sat_sales1
     FROM wswscs, date_dim
     WHERE date_dim.d_week_seq = wswscs.d_week_seq
      AND d_year = 2001
    ) y, (
      SELECT wswscs.d_week_seq AS d_week_seq2, sun_sales AS sun_sales2, mon_sales AS mon_sales2, tue_sales AS tue_sales2, wed_sales AS wed_sales2
       , thu_sales AS thu_sales2, fri_sales AS fri_sales2, sat_sales AS sat_sales2
      FROM wswscs, date_dim
      WHERE date_dim.d_week_seq = wswscs.d_week_seq
       AND d_year = 2001 + 1
     ) z
    WHERE d_week_seq1 = d_week_seq2 - 53
    ORDER BY d_week_seq1;
  • SQL3

    SELECT dt.d_year, item.i_brand_id AS brand_id, item.i_brand AS brand, SUM(ss_ext_sales_price) AS sum_agg
    FROM date_dim dt, store_sales, item
    WHERE dt.d_date_sk = store_sales.ss_sold_date_sk
     AND store_sales.ss_item_sk = item.i_item_sk
     AND item.i_manufact_id = 128
     AND dt.d_moy = 11
    GROUP BY dt.d_year, item.i_brand, item.i_brand_id
    ORDER BY dt.d_year, sum_agg DESC, brand_id
    LIMIT 100;
  • SQL4

    WITH year_total AS (
      SELECT c_customer_id AS customer_id, c_first_name AS customer_first_name, c_last_name AS customer_last_name, c_preferred_cust_flag AS customer_preferred_cust_flag, c_birth_country AS customer_birth_country
       , c_login AS customer_login, c_email_address AS customer_email_address, d_year AS dyear
       , SUM((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt + ss_ext_sales_price) / 2) AS year_total
       , 's' AS sale_type
      FROM customer, store_sales, date_dim
      WHERE c_customer_sk = ss_customer_sk
       AND ss_sold_date_sk = d_date_sk
      GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year
      UNION ALL
      SELECT c_customer_id AS customer_id, c_first_name AS customer_first_name, c_last_name AS customer_last_name, c_preferred_cust_flag AS customer_preferred_cust_flag, c_birth_country AS customer_birth_country
       , c_login AS customer_login, c_email_address AS customer_email_address, d_year AS dyear
       , SUM((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt + cs_ext_sales_price) / 2) AS year_total
       , 'c' AS sale_type
      FROM customer, catalog_sales, date_dim
      WHERE c_customer_sk = cs_bill_customer_sk
       AND cs_sold_date_sk = d_date_sk
      GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year
      UNION ALL
      SELECT c_customer_id AS customer_id, c_first_name AS customer_first_name, c_last_name AS customer_last_name, c_preferred_cust_flag AS customer_preferred_cust_flag, c_birth_country AS customer_birth_country
       , c_login AS customer_login, c_email_address AS customer_email_address, d_year AS dyear
       , SUM((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt + ws_ext_sales_price) / 2) AS year_total
       , 'w' AS sale_type
      FROM customer, web_sales, date_dim
      WHERE c_customer_sk = ws_bill_customer_sk
       AND ws_sold_date_sk = d_date_sk
      GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year
     )
    SELECT t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_preferred_cust_flag
    FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear
    WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id
     AND t_s_firstyear.customer_id = t_c_secyear.customer_id
     AND t_s_firstyear.customer_id = t_c_firstyear.customer_id
     AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
     AND t_s_firstyear.customer_id = t_w_secyear.customer_id
     AND t_s_firstyear.sale_type = 's'
     AND t_c_firstyear.sale_type = 'c'
     AND t_w_firstyear.sale_type = 'w'
     AND t_s_secyear.sale_type = 's'
     AND t_c_secyear.sale_type = 'c'
     AND t_w_secyear.sale_type = 'w'
     AND t_s_firstyear.dyear = 2001
     AND t_s_secyear.dyear = 2001 + 1
     AND t_c_firstyear.dyear = 2001
     AND t_c_secyear.dyear = 2001 + 1
     AND t_w_firstyear.dyear = 2001
     AND t_w_secyear.dyear = 2001 + 1
     AND t_s_firstyear.year_total > 0
     AND t_c_firstyear.year_total > 0
     AND t_w_firstyear.year_total > 0
     AND CASE
      WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total
      ELSE NULL
     END > CASE
      WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total
      ELSE NULL
     END
     AND CASE
      WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total
      ELSE NULL
     END > CASE
      WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total
      ELSE NULL
     END
    ORDER BY t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_preferred_cust_flag
    LIMIT 100;
  • SQL5

    WITH ssr AS (
      SELECT s_store_id, SUM(sales_price) AS sales, SUM(profit) AS profit
       , SUM(return_amt) AS RETURNS, SUM(net_loss) AS profit_loss
      FROM (
       SELECT ss_store_sk AS store_sk, ss_sold_date_sk AS date_sk, ss_ext_sales_price AS sales_price, ss_net_profit AS profit, CAST(0 AS decimal(7, 2)) AS return_amt
        , CAST(0 AS decimal(7, 2)) AS net_loss
       FROM store_sales
       UNION ALL
       SELECT sr_store_sk AS store_sk, sr_returned_date_sk AS date_sk, CAST(0 AS decimal(7, 2)) AS sales_price, CAST(0 AS decimal(7, 2)) AS profit, sr_return_amt AS return_amt
        , sr_net_loss AS net_loss
       FROM store_returns
      ) salesreturns, date_dim, store
      WHERE date_sk = d_date_sk
       AND d_date BETWEEN CAST('2000-08-23' AS date) AND CAST('2000-08-23' AS date) + INTERVAL '14' DAY
       AND store_sk = s_store_sk
      GROUP BY s_store_id
     ),
     csr AS (
      SELECT cp_catalog_page_id, SUM(sales_price) AS sales, SUM(profit) AS profit
       , SUM(return_amt) AS RETURNS, SUM(net_loss) AS profit_loss
      FROM (
       SELECT cs_catalog_page_sk AS page_sk, cs_sold_date_sk AS date_sk, cs_ext_sales_price AS sales_price, cs_net_profit AS profit, CAST(0 AS decimal(7, 2)) AS return_amt
        , CAST(0 AS decimal(7, 2)) AS net_loss
       FROM catalog_sales
       UNION ALL
       SELECT cr_catalog_page_sk AS page_sk, cr_returned_date_sk AS date_sk, CAST(0 AS decimal(7, 2)) AS sales_price, CAST(0 AS decimal(7, 2)) AS profit, cr_return_amount AS return_amt
        , cr_net_loss AS net_loss
       FROM catalog_returns
      ) salesreturns, date_dim, catalog_page
      WHERE date_sk = d_date_sk
       AND d_date BETWEEN CAST('2000-08-23' AS date) AND CAST('2000-08-23' AS date) + INTERVAL '14' DAY
       AND page_sk = cp_catalog_page_sk
      GROUP BY cp_catalog_page_id
     ),
     wsr AS (
      SELECT web_site_id, SUM(sales_price) AS sales, SUM(profit) AS profit
       , SUM(return_amt) AS RETURNS, SUM(net_loss) AS profit_loss
      FROM (
       SELECT ws_web_site_sk AS wsr_web_site_sk, ws_sold_date_sk AS date_sk, ws_ext_sales_price AS sales_price, ws_net_profit AS profit, CAST(0 AS decimal(7, 2)) AS return_amt
        , CAST(0 AS decimal(7, 2)) AS net_loss
       FROM web_sales
       UNION ALL
       SELECT ws_web_site_sk AS wsr_web_site_sk, wr_returned_date_sk AS date_sk, CAST(0 AS decimal(7, 2)) AS sales_price, CAST(0 AS decimal(7, 2)) AS profit, wr_return_amt AS return_amt
        , wr_net_loss AS net_loss
       FROM web_returns
        LEFT JOIN web_sales
        ON wr_item_sk = ws_item_sk
         AND wr_order_number = ws_order_number
      ) salesreturns, date_dim, web_site
      WHERE date_sk = d_date_sk
       AND d_date BETWEEN CAST('2000-08-23' AS date) AND CAST('2000-08-23' AS date) + INTERVAL '14' DAY
       AND wsr_web_site_sk = web_site_sk
      GROUP BY web_site_id
     )
    SELECT channel, id, SUM(sales) AS sales
     , SUM(RETURNS) AS RETURNS, SUM(profit) AS profit
    FROM (
     SELECT 'store channel' AS channel, 'store'
      OR s_store_id AS id, sales, RETURNS
      , profit - profit_loss AS profit
     FROM ssr
     UNION ALL
     SELECT 'catalog channel' AS channel, 'catalog_page'
      OR cp_catalog_page_id AS id, sales, RETURNS
      , profit - profit_loss AS profit
     FROM csr
     UNION ALL
     SELECT 'web channel' AS channel, 'web_site'
      OR web_site_id AS id, sales, RETURNS
      , profit - profit_loss AS profit
     FROM wsr
    ) x
    GROUP BY channel, id WITH ROLLUP
    ORDER BY channel, id
    LIMIT 100;
  • SQL6

    SELECT a.ca_state AS STATE, COUNT(*) AS cnt
    FROM customer_address a, customer c, store_sales s, date_dim d, item i
    WHERE a.ca_address_sk = c.c_current_addr_sk
        AND c.c_customer_sk = s.ss_customer_sk
        AND s.ss_sold_date_sk = d.d_date_sk
        AND s.ss_item_sk = i.i_item_sk
        AND d.d_month_seq = (
            SELECT DISTINCT d_month_seq
            FROM date_dim
            WHERE d_year = 2001
                AND d_moy = 1
        )
        AND i.i_current_price > 1.2 * (
            SELECT AVG(j.i_current_price)
            FROM item j
            WHERE j.i_category = i.i_category
        )
    GROUP BY a.ca_state
    HAVING COUNT(*) >= 10
    ORDER BY cnt, a.ca_state
    LIMIT 100;
  • SQL7

    SELECT i_item_id, AVG(ss_quantity) AS agg1, AVG(ss_list_price) AS agg2
      , AVG(ss_coupon_amt) AS agg3, AVG(ss_sales_price) AS agg4
    FROM store_sales, customer_demographics, date_dim, item, promotion
    WHERE ss_sold_date_sk = d_date_sk
      AND ss_item_sk = i_item_sk
      AND ss_cdemo_sk = cd_demo_sk
      AND ss_promo_sk = p_promo_sk
      AND cd_gender = 'M'
      AND cd_marital_status = 'S'
      AND cd_education_status = 'College'
      AND (p_channel_email = 'N'
        OR p_channel_event = 'N')
      AND d_year = 2000
    GROUP BY i_item_id
    ORDER BY i_item_id
    LIMIT 100;
  • SQL8

    SELECT s_store_name, SUM(ss_net_profit)
    FROM store_sales, date_dim, store, (
        SELECT ca_zip
        FROM (
          SELECT substr(ca_zip, 1, 5) AS ca_zip
          FROM customer_address
          WHERE substr(ca_zip, 1, 5) IN (    
            '24128','76232','65084','87816','83926','77556','20548','26231','43848','15126','91137','61265','98294','25782','17920','18426','98235','40081','84093','28577','55565','17183','54601','67897','22752','86284','18376','38607','45200','21756','29741','96765','23932','89360','29839','25989','28898','91068','72550','10390','18845','47770','82636','41367','76638','86198','81312','37126','39192','88424','72175','81426','53672','10445','42666','66864','66708','41248','48583','82276','18842','78890','49448','14089','38122','34425','79077','19849','43285','39861','66162','77610','13695','99543','83444','83041','12305','57665','68341','25003','57834','62878','49130','81096','18840','27700','23470','50412','21195','16021','76107','71954','68309','18119','98359','64544','10336','86379','27068','39736','98569','28915','24206','56529','57647','54917','42961','91110','63981','14922','36420','23006','67467','32754','30903','20260','31671','51798','72325','85816','68621','13955','36446','41766','68806','16725','15146','22744','35850','88086','51649','18270','52867','39972','96976','63792','11376','94898','13595','10516','90225','58943','39371','94945','28587','96576','57855','28488','26105','83933','25858','34322','44438','73171','30122','34102','22685','71256','78451','54364','13354','45375','40558','56458','28286','45266','47305','69399','83921','26233','11101','15371','69913','35942','15882','25631','24610','44165','99076','33786','70738','26653','14328','72305','62496','22152','10144','64147','48425','14663','21076','18799','30450','63089','81019','68893','24996','51200','51211','45692','92712','70466','79994','22437','25280','38935','71791','73134','56571','14060','19505','72425','56575','74351','68786','51650','20004','18383','76614','11634','18906','15765','41368','73241','76698','78567','97189','28545','76231','75691','22246','51061','90578','56691','68014','51103','94167','57047','14867','73520','15734','63435','25733','35474','24676','94627','53535','17879','15559','53268','59166','11928','59402','33282','45721','43933','68101','33515','36634','71286','19736','58058','55253','67473','41918','19515','36495','19430','22351','77191','91393','49156','50298','87501','18652','53179','18767','63193','23968','65164','68880','21286','72823','58470','67301','13394','31016','70372','67030','40604','24317','45748','39127','26065','77721','31029','31880','60576','24671','45549','13376','50016','33123','19769','22927','97789','46081','72151','15723','46136','51949','68100','96888','64528','14171','79777','28709','11489','25103','32213','78668','22245','15798','27156','37930','62971','21337','51622','67853','10567','38415','15455','58263','42029','60279','37125','56240','88190','50308','26859','64457','89091','82136','62377','36233','63837','58078','17043','30010','60099','28810','98025','29178','87343','73273','30469','64034','39516','86057','21309','90257','67875','40162','11356','73650','61810','72013','30431','22461','19512','13375','55307','30625','83849','68908','26689','96451','38193','46820','88885','84935','69035','83144','47537','56616','94983','48033','69952','25486','61547','27385','61860','58048','56910','16807','17871','35258','31387','35458','35576'  
          )
          INTERSECT
          SELECT ca_zip
          FROM (
            SELECT substr(ca_zip, 1, 5) AS ca_zip
              , COUNT(*) AS cnt
            FROM customer_address, customer
            WHERE ca_address_sk = c_current_addr_sk
              AND c_preferred_cust_flag = 'Y'
            GROUP BY ca_zip
            HAVING COUNT(*) > 10
          ) A1
        ) A2
      ) V1
    WHERE ss_store_sk = s_store_sk
      AND ss_sold_date_sk = d_date_sk
      AND d_qoy = 2
      AND d_year = 1998
      AND (substr(s_zip, 1, 2) = substr(V1.ca_zip, 1, 2))
    GROUP BY s_store_name
    ORDER BY s_store_name
    LIMIT 100;
  • SQL9

    SELECT CASE
                WHEN (
                      SELECT COUNT(*)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 1 AND 20
                ) > 74129 THEN (
                      SELECT AVG(ss_ext_discount_amt)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 1 AND 20
                )
                ELSE (
                      SELECT AVG(ss_net_paid)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 1 AND 20
                )
          END AS bucket1
          , CASE
                WHEN (
                      SELECT COUNT(*)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 21 AND 40
                ) > 122840 THEN (
                      SELECT AVG(ss_ext_discount_amt)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 21 AND 40
                )
                ELSE (
                      SELECT AVG(ss_net_paid)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 21 AND 40
                )
          END AS bucket2
          , CASE
                WHEN (
                      SELECT COUNT(*)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 41 AND 60
                ) > 56580 THEN (
                      SELECT AVG(ss_ext_discount_amt)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 41 AND 60
                )
                ELSE (
                      SELECT AVG(ss_net_paid)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 41 AND 60
                )
          END AS bucket3
          , CASE
                WHEN (
                      SELECT COUNT(*)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 61 AND 80
                ) > 10097 THEN (
                      SELECT AVG(ss_ext_discount_amt)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 61 AND 80
                )
                ELSE (
                      SELECT AVG(ss_net_paid)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 61 AND 80
                )
          END AS bucket4
          , CASE
                WHEN (
                      SELECT COUNT(*)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 81 AND 100
                ) > 165306 THEN (
                      SELECT AVG(ss_ext_discount_amt)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 81 AND 100
                )
                ELSE (
                      SELECT AVG(ss_net_paid)
                      FROM store_sales
                      WHERE ss_quantity BETWEEN 81 AND 100
                )
          END AS bucket5
    FROM reason
    WHERE r_reason_sk = 1;
  • SQL10

    SELECT cd_gender, cd_marital_status, cd_education_status, COUNT(*) AS cnt1
      , cd_purchase_estimate, COUNT(*) AS cnt2, cd_credit_rating
      , COUNT(*) AS cnt3, cd_dep_count
      , COUNT(*) AS cnt4, cd_dep_employed_count
      , COUNT(*) AS cnt5, cd_dep_college_count
      , COUNT(*) AS cnt6
    FROM customer c, customer_address ca, customer_demographics
    WHERE c.c_current_addr_sk = ca.ca_address_sk
      AND ca_county IN ('Rush County', 'Toole County', 'Jefferson County', 'Dona Ana County', 'La Porte County')
      AND cd_demo_sk = c.c_current_cdemo_sk
      AND EXISTS (
        SELECT *
        FROM store_sales, date_dim
        WHERE c.c_customer_sk = ss_customer_sk
          AND ss_sold_date_sk = d_date_sk
          AND d_year = 2002
          AND d_moy BETWEEN 1 AND 1 + 3
      )
      AND (EXISTS (
          SELECT *
          FROM web_sales, date_dim
          WHERE c.c_customer_sk = ws_bill_customer_sk
            AND ws_sold_date_sk = d_date_sk
            AND d_year = 2002
            AND d_moy BETWEEN 1 AND 1 + 3
        )
        OR EXISTS (
          SELECT *
          FROM catalog_sales, date_dim
          WHERE c.c_customer_sk = cs_ship_customer_sk
            AND cs_sold_date_sk = d_date_sk
            AND d_year = 2002
            AND d_moy BETWEEN 1 AND 1 + 3
        ))
    GROUP BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count
    ORDER BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count
    LIMIT 100;