维表关联

本文将为您介绍维表关联的使用。

维表关联(Lookup Join)

  • Lookup Join是维表 Join的一种实现方式,详情请参见维表JOIN语句

  • Fluss的主键表支持作为维表进行join,且join的条件需要包含维度表的全部主键。

  • Fluss的维表关联默认使用异步模式,吞吐更高,可通过 SQL Hint设置'lookup.async' = 'false',将维表关联改成同步模式。

关联两个维表的SQL示例

创建两个维表

CREATE TABLE `my-catalog`.`my_db`.`orders` (
  `o_orderkey` INT NOT NULL,
  `o_custkey` INT NOT NULL,
  `o_orderstatus` CHAR(1) NOT NULL,
  `o_totalprice` DECIMAL(15, 2) NOT NULL,
  `o_orderdate` DATE NOT NULL,
  `o_orderpriority` CHAR(15) NOT NULL,
  `o_clerk` CHAR(15) NOT NULL,
  `o_shippriority` INT NOT NULL,
  `o_comment` STRING NOT NULL,
  PRIMARY KEY (o_orderkey) NOT ENFORCED
);


CREATE TABLE `my-catalog`.`my_db`.`customer` (
  `c_custkey` INT NOT NULL,
  `c_name` STRING NOT NULL,
  `c_address` STRING NOT NULL,
  `c_nationkey` INT NOT NULL,
  `c_phone` CHAR(15) NOT NULL,
  `c_acctbal` DECIMAL(15, 2) NOT NULL,
  `c_mktsegment` CHAR(10) NOT NULL,
  `c_comment` STRING NOT NULL,
  PRIMARY KEY (c_custkey) NOT ENFORCED
);

两个维表关联

USE CATALOG `fluss-catalog`;
USE my_db;

CREATE TEMPORARY TABLE lookup_join_sink
(
  `orderkey`          INT NOT NULL,
  `custkey`          INT NOT NULL,
  `orderstatus`      CHAR(1) NOT NULL,
  `totalprice`       DECIMAL(15, 2) NOT NULL,
  `orderdate`        DATE NOT NULL,
  `orderpriority`    CHAR(15) NOT NULL,
  `clerk`            CHAR(15) NOT NULL,
  `shippriority`     INT NOT NULL,
  `order_comment`    STRING NOT NULL,
  `customer_name`    STRING NOT NULL,
  `customer_address` STRING NOT NULL,
  `nationkey`        INT NOT NULL,
  `phone`            CHAR(15) NOT NULL,
  `acctbal`          DECIMAL(15, 2) NOT NULL,
  `mktsegment`       CHAR(10) NOT NULL,
  `customer_comment` STRING NOT NULL
) WITH ('connector' = 'blackhole');

// lookup 异步模式 进行 join
INSERT INTO lookup_join_sink
SELECT *
FROM `orders` LEFT JOIN `customer` 
FOR SYSTEM_TIME AS OF PROCTIME()
ON `o`.`o_custkey` = `c`.`c_custkey`;

#########################################
// lookup 同步模式 进行 join 
INSERT INTO lookup_join_sink
SELECT *
FROM `orders` LEFT JOIN `customer` /*+ OPTIONS('lookup.async' = 'false') */ 
FOR SYSTEM_TIME AS OF PROCTIME()
ON `o`.`o_custkey` = `c`.`c_custkey`;