Таблицы для всех задач
Все задачи используют один датасет e-commerce маркетплейса. Вот основные таблицы:
-- Пользователи users (user_id, registration_date, acquisition_channel, platform) -- Заказы orders (order_id, user_id, order_date, amount, status) -- События (логи) events (event_id, user_id, event_name, event_time, platform) -- Значения status: 'completed', 'cancelled', 'refunded' -- Значения event_name: 'page_view', 'search', 'view_item', -- 'add_to_cart', 'begin_checkout', 'purchase'
WITH ranked AS ( SELECT user_id, order_id, order_date, amount, ROW_NUMBER() OVER ( PARTITION BY user_id ORDER BY order_date DESC ) AS rn FROM orders WHERE status = 'completed' ) SELECT user_id, order_id, order_date, amount FROM ranked WHERE rn = 1;
WITH monthly AS ( SELECT DATE_TRUNC('month', order_date) AS month, SUM(amount) AS revenue FROM orders WHERE status = 'completed' GROUP BY 1 ) SELECT month, revenue, LAG(revenue) OVER (ORDER BY month) AS prev_revenue, ROUND(100.0 * (revenue - LAG(revenue) OVER (ORDER BY month)) / LAG(revenue) OVER (ORDER BY month), 1) AS mom_pct FROM monthly;
SELECT DATE_TRUNC('month', u.registration_date) AS cohort, COUNT(DISTINCT u.user_id) AS cohort_size, ROUND(100.0 * COUNT(DISTINCT CASE WHEN e.event_time::DATE = u.registration_date + 7 THEN u.user_id END) / COUNT(DISTINCT u.user_id), 1) AS day7_retention FROM users u LEFT JOIN events e ON u.user_id = e.user_id GROUP BY 1 ORDER BY 1;
WITH f AS ( SELECT COUNT(DISTINCT CASE WHEN event_name='page_view' THEN user_id END) AS s1, COUNT(DISTINCT CASE WHEN event_name='view_item' THEN user_id END) AS s2, COUNT(DISTINCT CASE WHEN event_name='add_to_cart' THEN user_id END) AS s3, COUNT(DISTINCT CASE WHEN event_name='purchase' THEN user_id END) AS s4 FROM events WHERE event_time >= CURRENT_DATE - 30 ) SELECT s1, s2, s3, s4, ROUND(100.0*s2/s1,1) AS cr_1_2, ROUND(100.0*s3/NULLIF(s2,0),1) AS cr_2_3, ROUND(100.0*s4/NULLIF(s3,0),1) AS cr_3_4 FROM f;
WITH user_rev AS ( SELECT user_id, SUM(amount) AS total FROM orders WHERE status='completed' GROUP BY user_id ), with_decile AS ( SELECT *, NTILE(10) OVER (ORDER BY total DESC) AS decile FROM user_rev ) SELECT decile, COUNT(*) AS users, SUM(total) AS revenue, ROUND(100.0*SUM(total)/SUM(SUM(total)) OVER(),1) AS pct FROM with_decile GROUP BY decile ORDER BY decile;
WITH with_prev AS ( SELECT *, LAG(event_time) OVER ( PARTITION BY user_id ORDER BY event_time ) AS prev_time FROM events ), with_flag AS ( SELECT *, CASE WHEN prev_time IS NULL OR event_time - prev_time > INTERVAL '30 min' THEN 1 ELSE 0 END AS is_new FROM with_prev ) SELECT *, SUM(is_new) OVER ( PARTITION BY user_id ORDER BY event_time ) AS session_id FROM with_flag;
WITH c AS ( SELECT user_id, DATE_TRUNC('month', registration_date) AS cohort FROM users ), rev AS ( SELECT c.cohort, DATE_PART('month', AGE(DATE_TRUNC('month',o.order_date), c.cohort) )::INT AS period, SUM(o.amount) AS revenue, (SELECT COUNT(DISTINCT user_id) FROM c c2 WHERE c2.cohort=c.cohort) AS size FROM c JOIN orders o ON c.user_id=o.user_id AND o.status='completed' AND o.order_date >= c.cohort GROUP BY c.cohort, period ) SELECT cohort, period, ROUND(SUM(revenue) OVER ( PARTITION BY cohort ORDER BY period ) / size, 0) AS cumulative_ltv FROM rev ORDER BY cohort, period;
SELECT order_date::DATE AS day, SUM(amount) AS daily_rev, SUM(SUM(amount)) OVER ( PARTITION BY DATE_TRUNC('month', order_date) ORDER BY order_date::DATE ) AS mtd_total FROM orders WHERE status = 'completed' GROUP BY order_date::DATE, DATE_TRUNC('month', order_date) ORDER BY day;
WITH numbered AS ( SELECT user_id, amount, ROW_NUMBER() OVER ( PARTITION BY user_id ORDER BY order_date ) AS rn FROM orders WHERE status = 'completed' ) SELECT a.user_id, a.amount AS first_order, b.amount AS second_order FROM numbered a JOIN numbered b ON a.user_id = b.user_id AND a.rn = 1 AND b.rn = 2 WHERE b.amount > a.amount;
WITH calendar AS ( SELECT d::DATE FROM generate_series( CURRENT_DATE - 90, CURRENT_DATE, '1 day'::INTERVAL ) d ), daily_orders AS ( SELECT order_date::DATE AS day, COUNT(*) AS cnt FROM orders GROUP BY 1 ) SELECT c.d AS missing_day FROM calendar c LEFT JOIN daily_orders o ON c.d = o.day WHERE o.day IS NULL ORDER BY c.d;
Связанные материалы
Как готовиться к SQL-секции
На собеседовании в BigTech SQL-задача занимает 15–20 минут. Вас просят написать запрос на доске или в онлайн-редакторе. Оценивают: правильность, читаемость, умение объяснить логику и знание edge cases (NULL, деление на ноль, LEFT vs INNER JOIN).
Ключевые паттерны, которые закрывают 80% задач: ROW_NUMBER для топ-N по группам, LAG/LEAD для сравнения с предыдущим, SUM OVER для running total, COUNT DISTINCT CASE WHEN для условной агрегации, и generate_series + LEFT JOIN для заполнения пропусков.
Следующий шаг: решите все 10 задач самостоятельно, не подглядывая. Потом сравните. Если решили 7+ без ошибок — вы готовы к SQL-секции. Если меньше — перечитайте статью про оконные функции и попробуйте ещё раз.