Skip to main content

Example SQL Queries for Onboarding.online Analytics

When working with Onboarding.online data in BigQuery, you can use SQL queries to track conversions, analyze form steps, and identify drop-offs during the onboarding process. Below are common use cases with sample queries.

A
Written by Alexander Karpovich
Updated over 3 weeks ago

Conversion by Form Steps

Goal:
Track the sequence of user actions across screens and calculate the conversion rate to the OnboardingFinished event.
​

-- 1. Sessions by onboardingId and date
WITH session_users AS (
SELECT
jsonPayload.userId AS userId,
PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*S%Ez', jsonPayload.eventtime) AS started_at
FROM `onboarding-online-prod.web_events_01a6dbc3_feda_4ab5_9679_924ca5b183d1.stderr_*`
WHERE jsonPayload.eventname = 'StartOnboarding'
AND JSON_VALUE(jsonPayload.eventargs, '$.onboardingId') = '30576f61-17c3-4a73-9488-58f0e902c119'
AND jsonPayload.userId IS NOT NULL
AND PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*S%Ez', jsonPayload.eventtime)
BETWEEN TIMESTAMP('2025-06-01 00:00:00') AND TIMESTAMP('2025-06-30 23:59:59')
),


-- 2. All UserUpdatedValue events (latest by screen(input))
ranked_inputs AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY jsonPayload.userId,
JSON_VALUE(jsonPayload.eventargs, '$.screenId'),
TO_JSON_STRING(JSON_QUERY(jsonPayload.eventargs, '$.userInputValue')
)
ORDER BY PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*S%Ez', jsonPayload.eventtime) DESC
) AS rn
FROM `onboarding-online-prod.web_events_01a6dbc3_feda_4ab5_9679_924ca5b183d1.stderr_*`
WHERE jsonPayload.eventname = 'UserUpdatedValue'
AND JSON_VALUE(jsonPayload.eventargs, '$.screenId') IS NOT NULL
AND JSON_QUERY(jsonPayload.eventargs, '$.userInputValue') IS NOT NULL
),


-- 3. Cleaned inputs
filtered_inputs AS (
SELECT
jsonPayload.userId AS userId,
JSON_VALUE(jsonPayload.eventargs, '$.screenId') AS screenId,
TO_JSON_STRING(JSON_QUERY(jsonPayload.eventargs, '$.userInputValue')) AS inputValue,
PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*S%Ez', jsonPayload.eventtime) AS eventtime
FROM ranked_inputs
WHERE rn = 1
),


-- 4. Group steps by userId
raw_chains AS (
SELECT
userId,
ARRAY_AGG(CONCAT(screenId, '(', inputValue, ')') ORDER BY eventtime) AS input_steps
FROM filtered_inputs
GROUP BY userId
),


-- 5. Remove consecutive duplicates
deduplicated_chains AS (
SELECT
userId,
ARRAY(
SELECT step
FROM UNNEST(input_steps) AS step WITH OFFSET pos
WHERE pos = 0 OR step != input_steps[OFFSET(pos - 1)]
) AS cleaned_steps
FROM raw_chains
),


-- 6. Combine cleaned steps into a single string
user_paths AS (
SELECT
u.userId,
ARRAY_TO_STRING(cleaned_steps, '->') AS input_path,
su.started_at
FROM deduplicated_chains u
JOIN session_users su ON u.userId = su.userId
),


-- 7. Users who reached OnboardingFinished
onboarding_finished AS (
SELECT DISTINCT jsonPayload.userId AS userId
FROM `onboarding-online-prod.web_events_01a6dbc3_feda_4ab5_9679_924ca5b183d1.stderr_*`
WHERE jsonPayload.eventname = 'OnboardingFinished'
AND jsonPayload.userId IS NOT NULL
)


-- 8. Final report
SELECT
input_path,
COUNT(*) AS total_users,
COUNT(IF(f.userId IS NOT NULL, 1, NULL)) AS conversions,
ROUND(SAFE_DIVIDE(COUNT(IF(f.userId IS NOT NULL, 1, NULL)), COUNT(*)) * 100, 2) AS conversion_percent
FROM user_paths p
LEFT JOIN onboarding_finished f
ON p.userId = f.userId
GROUP BY input_path
ORDER BY total_users DESC;

Screen Drop-Offs

Goal:
Measure how many users leave each screen to identify where drop-offs happen in the onboarding flow.

-- 1. Extract onboardingId + onboardingName and normalize URL (remove protocol and query)
WITH onboarding_meta AS (
SELECT
REGEXP_EXTRACT(jsonPayload.url, r'^(?:https?:\/\/)?([^?#]+)') AS clean_url,
JSON_VALUE(jsonPayload.eventargs, '$.onboardingId') AS onboardingId,
JSON_VALUE(jsonPayload.eventargs, '$.onboardingName') AS onboardingName,
ROW_NUMBER() OVER (
PARTITION BY REGEXP_EXTRACT(jsonPayload.url, r'^(?:https?:\/\/)?([^?#]+)')
ORDER BY jsonPayload.eventtime
) AS rn
FROM `onboarding-online.web_events_76172cd5_dc8e_49f4_8fa4_7001e815bed8.stderr_*`
WHERE jsonPayload.eventname = 'StartOnboarding'
AND JSON_VALUE(jsonPayload.eventargs, '$.onboardingId') IS NOT NULL
AND jsonPayload.url IS NOT NULL
),
unique_meta AS (
SELECT clean_url, onboardingId, onboardingName
FROM onboarding_meta
WHERE rn = 1
),


-- 2. Screen appearances
appears AS (
SELECT
REGEXP_EXTRACT(jsonPayload.url, r'^(?:https?:\/\/)?([^?#]+)') AS clean_url,
JSON_VALUE(jsonPayload.eventargs, '$.screenId') AS screenId
FROM `onboarding-online.web_events_76172cd5_dc8e_49f4_8fa4_7001e815bed8.stderr_*`
WHERE jsonPayload.eventname = 'ScreenDidAppear'
AND JSON_VALUE(jsonPayload.eventargs, '$.screenId') IS NOT NULL
AND jsonPayload.url IS NOT NULL
),


-- 3. Screen disappearances
disappears AS (
SELECT
REGEXP_EXTRACT(jsonPayload.url, r'^(?:https?:\/\/)?([^?#]+)') AS clean_url,
JSON_VALUE(jsonPayload.eventargs, '$.screenId') AS screenId
FROM `onboarding-online.web_events_76172cd5_dc8e_49f4_8fa4_7001e815bed8.stderr_*`
WHERE jsonPayload.eventname = 'ScreenDisappeared'
AND JSON_VALUE(jsonPayload.eventargs, '$.screenId') IS NOT NULL
AND jsonPayload.url IS NOT NULL
),


-- 4. Count appearances
appears_count AS (
SELECT
clean_url,
screenId,
COUNT(*) AS appeared
FROM appears
GROUP BY clean_url, screenId
),


-- 5. Count disappearances
disappears_count AS (
SELECT
clean_url,
screenId,
COUNT(*) AS disappeared
FROM disappears
GROUP BY clean_url, screenId
),


-- 6. Join by normalized URL
joined AS (
SELECT
a.clean_url,
a.screenId,
a.appeared,
IFNULL(d.disappeared, 0) AS disappeared,
a.appeared - IFNULL(d.disappeared, 0) AS dropped,
ROUND(SAFE_DIVIDE(a.appeared - IFNULL(d.disappeared, 0), a.appeared), 2) AS drop_rate
FROM appears_count a
LEFT JOIN disappears_count d
ON a.clean_url = d.clean_url AND a.screenId = d.screenId
)


-- 7. Final result with onboardingId
SELECT
m.onboardingId,
m.onboardingName,
j.screenId,
j.appeared,
j.disappeared,
j.dropped,
j.drop_rate
FROM joined j
LEFT JOIN unique_meta m
ON j.clean_url = m.clean_url
WHERE m.onboardingId IS NOT NULL
ORDER BY onboardingId, screenId;

Did this answer your question?