Lý thuyết
Bài 11/17

Funnel Analysis

Phân tích conversion rates, drop-off points, và tối ưu hóa user journeys

Funnel Analysis

Conversion Funnel Analytics

1. Introduction

Funnel Analysis là gì?

Funnel Analysis theo dõi users qua các stages của một process (signup, purchase, etc.) để identify drop-off points và optimize conversion rates. Đây là foundation của growth analytics và product optimization.

1.1 Common Funnel Types

Text
1┌─────────────────────────────────────────────────────────┐
2│ Marketing Funnel (AIDA) │
3├─────────────────────────────────────────────────────────┤
4│ │
5│ ████████████████████████████████ Awareness (100%) │
6│ ██████████████████████ Interest (65%) │
7│ ████████████████ Desire (45%) │
8│ ██████████ Action (25%) │
9│ │
10├─────────────────────────────────────────────────────────┤
11│ E-commerce Funnel │
12├─────────────────────────────────────────────────────────┤
13│ │
14│ ████████████████████████████████ Visit (100%) │
15│ ██████████████████████████ View Product (75%) │
16│ ████████████████████ Add to Cart (50%) │
17│ ██████████████ Checkout (30%) │
18│ ████████ Purchase (18%) │
19│ │
20├─────────────────────────────────────────────────────────┤
21│ SaaS Signup Funnel │
22├─────────────────────────────────────────────────────────┤
23│ │
24│ ████████████████████████████████ Landing (100%) │
25│ ██████████████████████████ Signup Start (60%) │
26│ ████████████████████ Email Verify (40%) │
27│ ██████████████ Complete Profile │
28│ ████████ First Action (15%) │
29│ │
30└─────────────────────────────────────────────────────────┘

2. Data Preparation

2.1 Event-based Data

Python
1import pandas as pd
2import numpy as np
3import matplotlib.pyplot as plt
4from datetime import datetime, timedelta
5
6# Generate sample event data
7np.random.seed(42)
8n_users = 10000
9
10# Simulate funnel with drop-off at each stage
11users = list(range(1, n_users + 1))
12events = []
13
14base_date = datetime(2024, 1, 1)
15
16for user_id in users:
17 session_time = base_date + timedelta(days=np.random.randint(0, 90))
18
19 # Stage 1: Landing (100%)
20 events.append({
21 'user_id': user_id,
22 'event': 'landing',
23 'timestamp': session_time
24 })
25
26 # Stage 2: View Product (70%)
27 if np.random.random() < 0.70:
28 session_time += timedelta(seconds=np.random.randint(10, 120))
29 events.append({
30 'user_id': user_id,
31 'event': 'view_product',
32 'timestamp': session_time
33 })
34 else:
35 continue
36
37 # Stage 3: Add to Cart (55%)
38 if np.random.random() < 0.55:
39 session_time += timedelta(seconds=np.random.randint(30, 300))
40 events.append({
41 'user_id': user_id,
42 'event': 'add_to_cart',
43 'timestamp': session_time
44 })
45 else:
46 continue
47
48 # Stage 4: Checkout (60%)
49 if np.random.random() < 0.60:
50 session_time += timedelta(seconds=np.random.randint(60, 600))
51 events.append({
52 'user_id': user_id,
53 'event': 'checkout',
54 'timestamp': session_time
55 })
56 else:
57 continue
58
59 # Stage 5: Purchase (70%)
60 if np.random.random() < 0.70:
61 session_time += timedelta(seconds=np.random.randint(30, 180))
62 events.append({
63 'user_id': user_id,
64 'event': 'purchase',
65 'timestamp': session_time
66 })
67
68events_df = pd.DataFrame(events)
69print(f"Total events: {len(events_df)}")
70print(events_df['event'].value_counts())

2.2 Define Funnel Steps

Python
1# Define funnel sequence
2funnel_steps = ['landing', 'view_product', 'add_to_cart', 'checkout', 'purchase']
3
4# Create step order mapping
5step_order = {step: i for i, step in enumerate(funnel_steps)}
6events_df['step_order'] = events_df['event'].map(step_order)
7
8# Get max step reached per user
9user_progress = events_df.groupby('user_id').agg({
10 'step_order': 'max',
11 'timestamp': ['min', 'max']
12}).reset_index()
13
14user_progress.columns = ['user_id', 'max_step', 'first_event', 'last_event']
15user_progress['journey_time'] = (user_progress['last_event'] - user_progress['first_event']).dt.total_seconds()
16
17print(user_progress.head())

3. Basic Funnel Analysis

3.1 Calculate Conversion Rates

Python
1def calculate_funnel(events_df, funnel_steps):
2 """Calculate funnel metrics"""
3
4 # Count users at each step
5 user_steps = events_df.groupby('user_id')['event'].apply(set).reset_index()
6
7 results = []
8 for i, step in enumerate(funnel_steps):
9 # Users who reached this step
10 users_at_step = user_steps[user_steps['event'].apply(lambda x: step in x)]
11 count = len(users_at_step)
12
13 # Conversion rates
14 overall_rate = count / len(user_steps) * 100
15 step_rate = count / results[i-1]['count'] * 100 if i > 0 else 100
16
17 results.append({
18 'step': step,
19 'step_number': i + 1,
20 'count': count,
21 'overall_conversion': round(overall_rate, 2),
22 'step_conversion': round(step_rate, 2),
23 'drop_off': round(100 - step_rate, 2) if i > 0 else 0
24 })
25
26 return pd.DataFrame(results)
27
28funnel_df = calculate_funnel(events_df, funnel_steps)
29print("Funnel Analysis:")
30print(funnel_df)

3.2 Visualize Funnel

Python
1def plot_funnel(funnel_df):
2 """Create funnel visualization"""
3 fig, axes = plt.subplots(1, 2, figsize=(14, 6))
4
5 # Bar chart
6 bars = axes[0].barh(funnel_df['step'][::-1], funnel_df['count'][::-1])
7 axes[0].set_xlabel('Users')
8 axes[0].set_title('Funnel: User Counts')
9
10 # Add count labels
11 for bar, count in zip(bars, funnel_df['count'][::-1]):
12 axes[0].text(bar.get_width() + 100, bar.get_y() + bar.get_height()/2,
13 f'{count:,}', va='center')
14
15 # Conversion rate chart
16 x = range(len(funnel_df))
17 axes[1].plot(x, funnel_df['overall_conversion'], marker='o', linewidth=2, label='Overall')
18 axes[1].bar(x, funnel_df['step_conversion'], alpha=0.3, label='Step')
19 axes[1].set_xticks(x)
20 axes[1].set_xticklabels(funnel_df['step'], rotation=45, ha='right')
21 axes[1].set_ylabel('Conversion Rate (%)')
22 axes[1].set_title('Conversion Rates')
23 axes[1].legend()
24 axes[1].set_ylim(0, 105)
25
26 # Add percentage labels
27 for i, (overall, step) in enumerate(zip(funnel_df['overall_conversion'], funnel_df['step_conversion'])):
28 axes[1].text(i, overall + 2, f'{overall}%', ha='center', fontsize=9)
29
30 plt.tight_layout()
31 plt.savefig('funnel_analysis.png', dpi=150)
32 plt.show()
33
34plot_funnel(funnel_df)

3.3 Funnel Chart (Pyramid)

Python
1def plot_funnel_pyramid(funnel_df):
2 """Create pyramid-style funnel"""
3 fig, ax = plt.subplots(figsize=(10, 8))
4
5 n_steps = len(funnel_df)
6 max_width = funnel_df['count'].max()
7
8 colors = plt.cm.Blues(np.linspace(0.3, 0.9, n_steps))
9
10 for i, (_, row) in enumerate(funnel_df.iterrows()):
11 width = row['count'] / max_width
12 left = (1 - width) / 2
13
14 rect = plt.Rectangle(
15 (left, n_steps - i - 1),
16 width, 0.8,
17 facecolor=colors[i],
18 edgecolor='white',
19 linewidth=2
20 )
21 ax.add_patch(rect)
22
23 # Labels
24 ax.text(0.5, n_steps - i - 0.6,
25 f"{row['step']}\n{row['count']:,} ({row['overall_conversion']}%)",
26 ha='center', va='center', fontsize=10, fontweight='bold')
27
28 # Drop-off annotation
29 if i > 0:
30 ax.annotate(
31 f"↓ {row['drop_off']}% drop",
32 xy=(0.85, n_steps - i + 0.1),
33 fontsize=9, color='red'
34 )
35
36 ax.set_xlim(0, 1)
37 ax.set_ylim(0, n_steps)
38 ax.axis('off')
39 ax.set_title('Conversion Funnel', fontsize=14, fontweight='bold')
40
41 plt.tight_layout()
42 plt.savefig('funnel_pyramid.png', dpi=150)
43 plt.show()
44
45plot_funnel_pyramid(funnel_df)

4. Segmented Funnel Analysis

4.1 Add Segments

Python
1# Add user segments
2np.random.seed(42)
3user_segments = pd.DataFrame({
4 'user_id': range(1, n_users + 1),
5 'device': np.random.choice(['Mobile', 'Desktop', 'Tablet'], n_users, p=[0.55, 0.35, 0.10]),
6 'channel': np.random.choice(['Organic', 'Paid', 'Social', 'Email'], n_users, p=[0.35, 0.30, 0.20, 0.15]),
7 'is_new': np.random.choice([True, False], n_users, p=[0.7, 0.3])
8})
9
10events_df = events_df.merge(user_segments, on='user_id')
11print(events_df.head())

4.2 Funnel by Segment

Python
1def funnel_by_segment(events_df, funnel_steps, segment_col):
2 """Calculate funnel for each segment"""
3 results = []
4
5 for segment in events_df[segment_col].unique():
6 segment_data = events_df[events_df[segment_col] == segment]
7 funnel = calculate_funnel(segment_data, funnel_steps)
8 funnel['segment'] = segment
9 results.append(funnel)
10
11 return pd.concat(results, ignore_index=True)
12
13# By device
14device_funnel = funnel_by_segment(events_df, funnel_steps, 'device')
15print("\nFunnel by Device:")
16device_pivot = device_funnel.pivot(index='step', columns='segment', values='overall_conversion')
17print(device_pivot)
18
19# By channel
20channel_funnel = funnel_by_segment(events_df, funnel_steps, 'channel')
21print("\nFunnel by Channel:")
22channel_pivot = channel_funnel.pivot(index='step', columns='segment', values='overall_conversion')
23print(channel_pivot)

4.3 Visualize Segment Comparison

Python
1def plot_segment_comparison(funnel_df, segment_col):
2 """Compare funnels across segments"""
3 segments = funnel_df['segment'].unique()
4 n_segments = len(segments)
5
6 fig, ax = plt.subplots(figsize=(12, 6))
7
8 x = np.arange(len(funnel_steps))
9 width = 0.8 / n_segments
10
11 for i, segment in enumerate(segments):
12 segment_data = funnel_df[funnel_df['segment'] == segment]
13 offset = (i - n_segments/2 + 0.5) * width
14 bars = ax.bar(x + offset, segment_data['overall_conversion'],
15 width, label=segment, alpha=0.8)
16
17 ax.set_ylabel('Conversion Rate (%)')
18 ax.set_title(f'Funnel Comparison by {segment_col}')
19 ax.set_xticks(x)
20 ax.set_xticklabels(funnel_steps, rotation=45, ha='right')
21 ax.legend(title=segment_col)
22 ax.set_ylim(0, 105)
23
24 plt.tight_layout()
25 plt.savefig(f'funnel_by_{segment_col}.png', dpi=150)
26 plt.show()
27
28plot_segment_comparison(device_funnel, 'Device')
29plot_segment_comparison(channel_funnel, 'Channel')

5. Time-based Analysis

5.1 Time Between Steps

Python
1def calculate_step_times(events_df, funnel_steps):
2 """Calculate time between funnel steps"""
3 # Pivot events to get timestamp for each step per user
4 user_events = events_df.pivot_table(
5 index='user_id',
6 columns='event',
7 values='timestamp',
8 aggfunc='first'
9 )
10
11 # Calculate time differences
12 time_diffs = {}
13 for i in range(len(funnel_steps) - 1):
14 step1 = funnel_steps[i]
15 step2 = funnel_steps[i + 1]
16
17 if step1 in user_events.columns and step2 in user_events.columns:
18 diff = (user_events[step2] - user_events[step1]).dt.total_seconds()
19 time_diffs[f'{step1}_to_{step2}'] = diff.dropna()
20
21 return time_diffs
22
23time_diffs = calculate_step_times(events_df, funnel_steps)
24
25# Summary statistics
26print("Time Between Steps (seconds):")
27for step, times in time_diffs.items():
28 print(f"\n{step}:")
29 print(f" Median: {times.median():.1f}s")
30 print(f" Mean: {times.mean():.1f}s")
31 print(f" 95th percentile: {times.quantile(0.95):.1f}s")

5.2 Time-to-Convert Analysis

Python
1def time_to_convert_analysis(events_df):
2 """Analyze time from first touch to conversion"""
3 # Get first and last events per user
4 user_journey = events_df.groupby('user_id').agg({
5 'timestamp': ['min', 'max'],
6 'event': lambda x: list(x)
7 }).reset_index()
8 user_journey.columns = ['user_id', 'first_event', 'last_event', 'events']
9
10 # Time to convert (for users who purchased)
11 purchasers = user_journey[user_journey['events'].apply(lambda x: 'purchase' in x)]
12 purchasers['time_to_convert'] = (purchasers['last_event'] - purchasers['first_event']).dt.total_seconds()
13
14 # Distribution
15 fig, axes = plt.subplots(1, 2, figsize=(12, 5))
16
17 # Histogram
18 axes[0].hist(purchasers['time_to_convert'] / 60, bins=50, edgecolor='black')
19 axes[0].set_xlabel('Time to Convert (minutes)')
20 axes[0].set_ylabel('Users')
21 axes[0].set_title('Time-to-Convert Distribution')
22 axes[0].axvline(purchasers['time_to_convert'].median() / 60, color='r',
23 linestyle='--', label=f"Median: {purchasers['time_to_convert'].median()/60:.1f}m")
24 axes[0].legend()
25
26 # Cumulative
27 sorted_times = np.sort(purchasers['time_to_convert'].values) / 60
28 cumulative = np.arange(1, len(sorted_times) + 1) / len(sorted_times) * 100
29 axes[1].plot(sorted_times, cumulative)
30 axes[1].set_xlabel('Time (minutes)')
31 axes[1].set_ylabel('Cumulative % Converted')
32 axes[1].set_title('Cumulative Conversion by Time')
33 axes[1].axhline(50, color='r', linestyle='--', alpha=0.5)
34 axes[1].axhline(80, color='g', linestyle='--', alpha=0.5)
35
36 plt.tight_layout()
37 plt.show()
38
39 return purchasers
40
41purchasers_df = time_to_convert_analysis(events_df)

5.3 Funnel Over Time

Python
1def funnel_trend(events_df, funnel_steps, freq='W'):
2 """Calculate funnel metrics over time"""
3 events_df['period'] = events_df['timestamp'].dt.to_period(freq)
4
5 results = []
6 for period in events_df['period'].unique():
7 period_data = events_df[events_df['period'] == period]
8 funnel = calculate_funnel(period_data, funnel_steps)
9 funnel['period'] = period
10 results.append(funnel)
11
12 trend_df = pd.concat(results, ignore_index=True)
13 return trend_df
14
15trend = funnel_trend(events_df, funnel_steps, 'W')
16
17# Plot conversion rate trend
18fig, ax = plt.subplots(figsize=(14, 6))
19
20purchase_trend = trend[trend['step'] == 'purchase'].copy()
21purchase_trend['period'] = purchase_trend['period'].astype(str)
22
23ax.plot(purchase_trend['period'], purchase_trend['overall_conversion'], marker='o')
24ax.set_xlabel('Week')
25ax.set_ylabel('Overall Conversion Rate (%)')
26ax.set_title('Purchase Conversion Rate Over Time')
27ax.tick_params(axis='x', rotation=45)
28
29# Add trend line
30z = np.polyfit(range(len(purchase_trend)), purchase_trend['overall_conversion'], 1)
31p = np.poly1d(z)
32ax.plot(purchase_trend['period'], p(range(len(purchase_trend))), 'r--', label='Trend')
33ax.legend()
34
35plt.tight_layout()
36plt.show()

6. Advanced Funnel Metrics

6.1 Funnel Velocity

Python
1def calculate_velocity(events_df, funnel_steps):
2 """Calculate funnel velocity metrics"""
3 metrics = {}
4
5 # Median time per step
6 time_diffs = calculate_step_times(events_df, funnel_steps)
7 metrics['median_step_times'] = {k: v.median() for k, v in time_diffs.items()}
8
9 # Total funnel time
10 total_times = events_df.groupby('user_id').apply(
11 lambda x: (x['timestamp'].max() - x['timestamp'].min()).total_seconds()
12 )
13 metrics['total_funnel_time'] = {
14 'median': total_times.median(),
15 'mean': total_times.mean(),
16 'p75': total_times.quantile(0.75)
17 }
18
19 # Conversion window (time within which X% convert)
20 purchasers = events_df[events_df['event'] == 'purchase']['user_id'].unique()
21 converter_times = total_times[total_times.index.isin(purchasers)]
22
23 metrics['conversion_windows'] = {
24 '50%': converter_times.quantile(0.5),
25 '75%': converter_times.quantile(0.75),
26 '90%': converter_times.quantile(0.90)
27 }
28
29 return metrics
30
31velocity = calculate_velocity(events_df, funnel_steps)
32
33print("Funnel Velocity Metrics:")
34print("\nMedian Time Between Steps (seconds):")
35for step, time in velocity['median_step_times'].items():
36 print(f" {step}: {time:.1f}s")
37
38print(f"\nTotal Funnel Time:")
39for metric, value in velocity['total_funnel_time'].items():
40 print(f" {metric}: {value:.1f}s")
41
42print(f"\nConversion Windows:")
43for pct, time in velocity['conversion_windows'].items():
44 print(f" {pct} of converters complete in: {time:.1f}s")

6.2 Drop-off Analysis

Python
1def analyze_dropoff(events_df, funnel_steps):
2 """Detailed drop-off analysis"""
3 user_max_step = events_df.groupby('user_id')['step_order'].max().reset_index()
4 user_max_step = user_max_step.merge(
5 events_df[['user_id', 'device', 'channel', 'is_new']].drop_duplicates(),
6 on='user_id'
7 )
8
9 results = {}
10
11 for i, step in enumerate(funnel_steps[:-1]):
12 # Users who dropped at this step
13 dropped = user_max_step[user_max_step['step_order'] == i]
14
15 if len(dropped) > 0:
16 results[step] = {
17 'count': len(dropped),
18 'by_device': dropped['device'].value_counts().to_dict(),
19 'by_channel': dropped['channel'].value_counts().to_dict(),
20 'new_vs_returning': dropped['is_new'].value_counts().to_dict()
21 }
22
23 return results
24
25dropoff = analyze_dropoff(events_df, funnel_steps)
26
27print("Drop-off Analysis:")
28for step, details in dropoff.items():
29 print(f"\n{step.upper()} ({details['count']} users dropped)")
30 print(f" By Device: {details['by_device']}")
31 print(f" By Channel: {details['by_channel']}")

6.3 Funnel Health Score

Python
1def funnel_health_score(funnel_df, benchmarks=None):
2 """Calculate funnel health score"""
3 if benchmarks is None:
4 # Default benchmarks
5 benchmarks = {
6 'view_product': 75,
7 'add_to_cart': 35,
8 'checkout': 20,
9 'purchase': 15
10 }
11
12 scores = []
13 for _, row in funnel_df.iterrows():
14 if row['step'] in benchmarks:
15 benchmark = benchmarks[row['step']]
16 actual = row['overall_conversion']
17 score = min(100, (actual / benchmark) * 100)
18 scores.append({
19 'step': row['step'],
20 'actual': actual,
21 'benchmark': benchmark,
22 'score': round(score, 1),
23 'status': '✅' if actual >= benchmark else '⚠️' if actual >= benchmark * 0.8 else '❌'
24 })
25
26 health_df = pd.DataFrame(scores)
27 overall_health = health_df['score'].mean()
28
29 print("Funnel Health Report:")
30 print(health_df.to_string(index=False))
31 print(f"\nOverall Health Score: {overall_health:.1f}/100")
32
33 return health_df, overall_health
34
35health_df, overall_score = funnel_health_score(funnel_df)

7. Funnel Analysis SQL

7.1 Basic Funnel Query

SQL
1-- Count users at each funnel step
2WITH funnel_stages AS (
3 SELECT
4 user_id,
5 MAX(CASE WHEN event = 'landing' THEN 1 ELSE 0 END) AS landing,
6 MAX(CASE WHEN event = 'view_product' THEN 1 ELSE 0 END) AS view_product,
7 MAX(CASE WHEN event = 'add_to_cart' THEN 1 ELSE 0 END) AS add_to_cart,
8 MAX(CASE WHEN event = 'checkout' THEN 1 ELSE 0 END) AS checkout,
9 MAX(CASE WHEN event = 'purchase' THEN 1 ELSE 0 END) AS purchase
10 FROM events
11 GROUP BY user_id
12)
13SELECT
14 SUM(landing) AS landing_users,
15 SUM(view_product) AS view_product_users,
16 SUM(add_to_cart) AS add_to_cart_users,
17 SUM(checkout) AS checkout_users,
18 SUM(purchase) AS purchase_users,
19 ROUND(SUM(view_product) * 100.0 / SUM(landing), 2) AS landing_to_view_rate,
20 ROUND(SUM(add_to_cart) * 100.0 / SUM(view_product), 2) AS view_to_cart_rate,
21 ROUND(SUM(checkout) * 100.0 / SUM(add_to_cart), 2) AS cart_to_checkout_rate,
22 ROUND(SUM(purchase) * 100.0 / SUM(checkout), 2) AS checkout_to_purchase_rate,
23 ROUND(SUM(purchase) * 100.0 / SUM(landing), 2) AS overall_conversion_rate
24FROM funnel_stages;

7.2 Sequential Funnel

SQL
1-- Strict sequential funnel (users must complete steps in order)
2WITH user_paths AS (
3 SELECT
4 user_id,
5 STRING_AGG(event, '->' ORDER BY timestamp) AS path
6 FROM events
7 GROUP BY user_id
8),
9sequential_check AS (
10 SELECT
11 user_id,
12 path,
13 CASE WHEN path LIKE '%landing%' THEN 1 ELSE 0 END AS reached_landing,
14 CASE WHEN path LIKE '%landing%->%view_product%' THEN 1 ELSE 0 END AS reached_view,
15 CASE WHEN path LIKE '%landing%->%view_product%->%add_to_cart%' THEN 1 ELSE 0 END AS reached_cart,
16 CASE WHEN path LIKE '%landing%->%view_product%->%add_to_cart%->%checkout%' THEN 1 ELSE 0 END AS reached_checkout,
17 CASE WHEN path LIKE '%landing%->%view_product%->%add_to_cart%->%checkout%->%purchase%' THEN 1 ELSE 0 END AS reached_purchase
18 FROM user_paths
19)
20SELECT
21 SUM(reached_landing) AS step_1_landing,
22 SUM(reached_view) AS step_2_view,
23 SUM(reached_cart) AS step_3_cart,
24 SUM(reached_checkout) AS step_4_checkout,
25 SUM(reached_purchase) AS step_5_purchase
26FROM sequential_check;

7.3 Funnel by Segment

SQL
1-- Funnel by device type
2WITH funnel_by_device AS (
3 SELECT
4 u.device,
5 e.user_id,
6 MAX(CASE WHEN e.event = 'landing' THEN 1 ELSE 0 END) AS landing,
7 MAX(CASE WHEN e.event = 'purchase' THEN 1 ELSE 0 END) AS purchase
8 FROM events e
9 JOIN users u ON e.user_id = u.user_id
10 GROUP BY u.device, e.user_id
11)
12SELECT
13 device,
14 COUNT(DISTINCT user_id) AS total_users,
15 SUM(landing) AS landing_users,
16 SUM(purchase) AS purchasers,
17 ROUND(SUM(purchase) * 100.0 / SUM(landing), 2) AS conversion_rate
18FROM funnel_by_device
19GROUP BY device
20ORDER BY conversion_rate DESC;

8. Thực hành

Funnel Analysis Project

Exercise: Complete Funnel Analysis

Python
1# Build comprehensive funnel analysis:
2# 1. Calculate basic funnel metrics
3# 2. Segment by device and channel
4# 3. Analyze drop-off points
5# 4. Calculate time metrics
6# 5. Generate recommendations
7
8# YOUR CODE HERE
💡 Xem đáp án
Python
1import pandas as pd
2import numpy as np
3import matplotlib.pyplot as plt
4
5class FunnelAnalyzer:
6 def __init__(self, events_df, funnel_steps, user_segments=None):
7 self.events = events_df.copy()
8 self.steps = funnel_steps
9 self.step_order = {step: i for i, step in enumerate(funnel_steps)}
10 self.events['step_order'] = self.events['event'].map(self.step_order)
11
12 if user_segments is not None:
13 self.events = self.events.merge(user_segments, on='user_id', how='left')
14
15 def basic_funnel(self):
16 """Calculate basic funnel metrics"""
17 user_events = self.events.groupby('user_id')['event'].apply(set)
18
19 results = []
20 prev_count = len(user_events)
21
22 for i, step in enumerate(self.steps):
23 users = user_events[user_events.apply(lambda x: step in x)]
24 count = len(users)
25
26 results.append({
27 'step': step,
28 'step_num': i + 1,
29 'users': count,
30 'overall_rate': round(count / len(user_events) * 100, 2),
31 'step_rate': round(count / prev_count * 100, 2) if prev_count > 0 else 0,
32 'dropoff': round((1 - count / prev_count) * 100, 2) if prev_count > 0 and i > 0 else 0
33 })
34 prev_count = count
35
36 return pd.DataFrame(results)
37
38 def segment_funnel(self, segment_col):
39 """Calculate funnel by segment"""
40 results = []
41 for segment in self.events[segment_col].dropna().unique():
42 segment_data = self.events[self.events[segment_col] == segment]
43 user_events = segment_data.groupby('user_id')['event'].apply(set)
44
45 for i, step in enumerate(self.steps):
46 users = user_events[user_events.apply(lambda x: step in x)]
47 results.append({
48 'segment': segment,
49 'step': step,
50 'users': len(users),
51 'rate': round(len(users) / len(user_events) * 100, 2)
52 })
53
54 return pd.DataFrame(results)
55
56 def dropoff_analysis(self):
57 """Analyze where users drop off"""
58 user_max = self.events.groupby('user_id')['step_order'].max()
59
60 dropoff = []
61 for i, step in enumerate(self.steps[:-1]):
62 dropped = (user_max == i).sum()
63 dropoff.append({
64 'dropped_at': step,
65 'count': dropped,
66 'pct': round(dropped / len(user_max) * 100, 2)
67 })
68
69 return pd.DataFrame(dropoff)
70
71 def time_analysis(self):
72 """Analyze time between steps"""
73 pivot = self.events.pivot_table(
74 index='user_id',
75 columns='event',
76 values='timestamp',
77 aggfunc='first'
78 )
79
80 times = {}
81 for i in range(len(self.steps) - 1):
82 s1, s2 = self.steps[i], self.steps[i+1]
83 if s1 in pivot.columns and s2 in pivot.columns:
84 diff = (pivot[s2] - pivot[s1]).dt.total_seconds()
85 times[f'{s1}→{s2}'] = {
86 'median': round(diff.median(), 1),
87 'p75': round(diff.quantile(0.75), 1),
88 'p90': round(diff.quantile(0.90), 1)
89 }
90
91 return times
92
93 def plot_funnel(self):
94 """Visualize funnel"""
95 funnel = self.basic_funnel()
96
97 fig, axes = plt.subplots(1, 3, figsize=(15, 5))
98
99 # Bar chart
100 bars = axes[0].barh(funnel['step'][::-1], funnel['users'][::-1], color='steelblue')
101 axes[0].set_xlabel('Users')
102 axes[0].set_title('Funnel Users')
103 for bar, val in zip(bars, funnel['users'][::-1]):
104 axes[0].text(bar.get_width() + 50, bar.get_y() + bar.get_height()/2,
105 f'{val:,}', va='center')
106
107 # Conversion rates
108 x = range(len(funnel))
109 axes[1].plot(x, funnel['overall_rate'], 'o-', label='Overall', linewidth=2)
110 axes[1].set_xticks(x)
111 axes[1].set_xticklabels(funnel['step'], rotation=45, ha='right')
112 axes[1].set_ylabel('Conversion Rate (%)')
113 axes[1].set_title('Conversion Rates')
114 axes[1].set_ylim(0, 105)
115 for i, rate in enumerate(funnel['overall_rate']):
116 axes[1].text(i, rate + 3, f'{rate}%', ha='center', fontsize=9)
117
118 # Drop-off
119 dropoff = self.dropoff_analysis()
120 axes[2].bar(dropoff['dropped_at'], dropoff['pct'], color='coral')
121 axes[2].set_ylabel('% of Total Users')
122 axes[2].set_title('Drop-off Distribution')
123 axes[2].tick_params(axis='x', rotation=45)
124
125 plt.tight_layout()
126 plt.savefig('funnel_complete.png', dpi=150)
127 plt.show()
128
129 def generate_report(self):
130 """Generate complete analysis report"""
131 funnel = self.basic_funnel()
132 dropoff = self.dropoff_analysis()
133 times = self.time_analysis()
134
135 print("=" * 60)
136 print("FUNNEL ANALYSIS REPORT")
137 print("=" * 60)
138
139 print(f"\n📊 OVERVIEW")
140 print(f"Total Users: {funnel.iloc[0]['users']:,}")
141 print(f"Final Conversions: {funnel.iloc[-1]['users']:,}")
142 print(f"Overall Conversion Rate: {funnel.iloc[-1]['overall_rate']}%")
143
144 print(f"\n📉 DROP-OFF ANALYSIS")
145 biggest_drop = dropoff.loc[dropoff['count'].idxmax()]
146 print(f"Biggest Drop: {biggest_drop['dropped_at']} ({biggest_drop['pct']}% of users)")
147
148 print(f"\n⏱️ TIME METRICS (seconds)")
149 for step, metrics in times.items():
150 print(f" {step}: median={metrics['median']}s, p75={metrics['p75']}s")
151
152 print(f"\n💡 RECOMMENDATIONS")
153 # Identify problem areas
154 for _, row in funnel.iterrows():
155 if row['step_num'] > 1 and row['dropoff'] > 30:
156 print(f" ⚠️ High drop-off at {row['step']} ({row['dropoff']}%)")
157 print(f" → Consider UX improvements or reducing friction")
158
159 print("\n" + "=" * 60)
160
161
162# Generate test data
163np.random.seed(42)
164n_users = 10000
165events = []
166base_date = pd.Timestamp('2024-01-01')
167
168funnel_steps = ['landing', 'view_product', 'add_to_cart', 'checkout', 'purchase']
169probs = [1.0, 0.70, 0.55, 0.60, 0.70]
170
171for uid in range(1, n_users + 1):
172 t = base_date + pd.Timedelta(days=np.random.randint(0, 90))
173
174 for i, step in enumerate(funnel_steps):
175 if i == 0 or np.random.random() < probs[i]:
176 events.append({
177 'user_id': uid,
178 'event': step,
179 'timestamp': t
180 })
181 t += pd.Timedelta(seconds=np.random.randint(10, 300))
182 else:
183 break
184
185events_df = pd.DataFrame(events)
186
187# Add segments
188segments = pd.DataFrame({
189 'user_id': range(1, n_users + 1),
190 'device': np.random.choice(['Mobile', 'Desktop', 'Tablet'], n_users, p=[0.55, 0.35, 0.10]),
191 'channel': np.random.choice(['Organic', 'Paid', 'Social'], n_users, p=[0.4, 0.35, 0.25])
192})
193
194# Run analysis
195analyzer = FunnelAnalyzer(events_df, funnel_steps, segments)
196
197# Generate visualizations
198analyzer.plot_funnel()
199
200# Generate report
201analyzer.generate_report()
202
203# Segment analysis
204print("\n📱 FUNNEL BY DEVICE:")
205device_funnel = analyzer.segment_funnel('device')
206print(device_funnel.pivot(index='step', columns='segment', values='rate'))

9. Tổng kết

TopicKey Concepts
Funnel BasicsSteps, conversion rates, drop-off
SegmentationDevice, channel, user type analysis
Time AnalysisStep duration, velocity, conversion windows
AdvancedHealth scores, optimization opportunities

Bài tiếp theo: Predictive Analytics Basics