GCP Data Architect Series - Part IV
Implementing SCD Type 2 in BigQuery Let’s say customer address changes must be tracked historically.
AdminFollow
5 min•Feb 28, 2026
Views - 13

Step 1: Table Structure
CREATE TABLE analytics.dim_customer (
customer_key INT64,
customer_id STRING,
name STRING,
email STRING,
city STRING,
effective_start_date DATE,
effective_end_date DATE,
is_current BOOL
);
customer_key INT64,
customer_id STRING,
name STRING,
email STRING,
city STRING,
effective_start_date DATE,
effective_end_date DATE,
is_current BOOL
);
Step 2: Detect Changes
Compare staging vs current dimension.
WITH source AS (
SELECT * FROM staging.customers
),
current_dim AS (
SELECT *
FROM analytics.dim_customer
WHERE is_current = TRUE
)
SELECT s.*
FROM source s
LEFT JOIN current_dim d
ON s.customer_id = d.customer_id
WHERE d.customer_id IS NULL
OR s.name != d.name
OR s.city != d.city;
SELECT * FROM staging.customers
),
current_dim AS (
SELECT *
FROM analytics.dim_customer
WHERE is_current = TRUE
)
SELECT s.*
FROM source s
LEFT JOIN current_dim d
ON s.customer_id = d.customer_id
WHERE d.customer_id IS NULL
OR s.name != d.name
OR s.city != d.city;
Step 3: Close Existing Records
UPDATE analytics.dim_customer
SET effective_end_date = CURRENT_DATE(),
is_current = FALSE
WHERE customer_id IN (
SELECT s.customer_id
FROM staging.customers s
JOIN analytics.dim_customer d
ON s.customer_id = d.customer_id
WHERE d.is_current = TRUE
AND (s.name != d.name OR s.city != d.city)
);
SET effective_end_date = CURRENT_DATE(),
is_current = FALSE
WHERE customer_id IN (
SELECT s.customer_id
FROM staging.customers s
JOIN analytics.dim_customer d
ON s.customer_id = d.customer_id
WHERE d.is_current = TRUE
AND (s.name != d.name OR s.city != d.city)
);
Step 4: Insert New Version
INSERT INTO analytics.dim_customer
SELECT
GENERATE_UUID() AS customer_key,
s.customer_id,
s.name,
s.email,
s.city,
CURRENT_DATE(),
DATE '9999-12-31',
TRUE
FROM staging.customers s
LEFT JOIN analytics.dim_customer d
ON s.customer_id = d.customer_id
AND d.is_current = TRUE
WHERE d.customer_id IS NULL
OR s.name != d.name
OR s.city != d.city;
SELECT
GENERATE_UUID() AS customer_key,
s.customer_id,
s.name,
s.email,
s.city,
CURRENT_DATE(),
DATE '9999-12-31',
TRUE
FROM staging.customers s
LEFT JOIN analytics.dim_customer d
ON s.customer_id = d.customer_id
AND d.is_current = TRUE
WHERE d.customer_id IS NULL
OR s.name != d.name
OR s.city != d.city;
? BigQuery Optimization Tip
Because storage is cheap:
Don’t over-engineer SCD logic
Simpler MERGE statements often work best
Modern production pattern:
MERGE analytics.dim_customer T
USING staging.customers S
ON T.customer_id = S.customer_id
AND T.is_current = TRUE
WHEN MATCHED AND (
T.name != S.name OR
T.city != S.city
) THEN
UPDATE SET
is_current = FALSE,
effective_end_date = CURRENT_DATE()
WHEN NOT MATCHED THEN
INSERT (...)
USING staging.customers S
ON T.customer_id = S.customer_id
AND T.is_current = TRUE
WHEN MATCHED AND (
T.name != S.name OR
T.city != S.city
) THEN
UPDATE SET
is_current = FALSE,
effective_end_date = CURRENT_DATE()
WHEN NOT MATCHED THEN
INSERT (...)
Comments (0)
No comments yet.
