Funnel Analysis
1. Introduction
Funnel Analysis là gì?
Funnel Analysis theo dõi users qua các stages của một process (signup, purchase, etc.) để identify drop-off points và optimize conversion rates. Đây là foundation của growth analytics và product optimization.
1.1 Common Funnel Types
Text
1┌─────────────────────────────────────────────────────────┐2│ Marketing Funnel (AIDA) │3├─────────────────────────────────────────────────────────┤4│ │5│ ████████████████████████████████ Awareness (100%) │6│ ██████████████████████ Interest (65%) │7│ ████████████████ Desire (45%) │8│ ██████████ Action (25%) │9│ │10├─────────────────────────────────────────────────────────┤11│ E-commerce Funnel │12├─────────────────────────────────────────────────────────┤13│ │14│ ████████████████████████████████ Visit (100%) │15│ ██████████████████████████ View Product (75%) │16│ ████████████████████ Add to Cart (50%) │17│ ██████████████ Checkout (30%) │18│ ████████ Purchase (18%) │19│ │20├─────────────────────────────────────────────────────────┤21│ SaaS Signup Funnel │22├─────────────────────────────────────────────────────────┤23│ │24│ ████████████████████████████████ Landing (100%) │25│ ██████████████████████████ Signup Start (60%) │26│ ████████████████████ Email Verify (40%) │27│ ██████████████ Complete Profile │28│ ████████ First Action (15%) │29│ │30└─────────────────────────────────────────────────────────┘2. Data Preparation
2.1 Event-based Data
Python
1import pandas as pd2import numpy as np3import matplotlib.pyplot as plt4from datetime import datetime, timedelta56# Generate sample event data7np.random.seed(42)8n_users = 10000910# Simulate funnel with drop-off at each stage11users = list(range(1, n_users + 1))12events = []1314base_date = datetime(2024, 1, 1)1516for user_id in users:17 session_time = base_date + timedelta(days=np.random.randint(0, 90))18 19 # Stage 1: Landing (100%)20 events.append({21 'user_id': user_id,22 'event': 'landing',23 'timestamp': session_time24 })25 26 # Stage 2: View Product (70%)27 if np.random.random() < 0.70:28 session_time += timedelta(seconds=np.random.randint(10, 120))29 events.append({30 'user_id': user_id,31 'event': 'view_product',32 'timestamp': session_time33 })34 else:35 continue36 37 # Stage 3: Add to Cart (55%)38 if np.random.random() < 0.55:39 session_time += timedelta(seconds=np.random.randint(30, 300))40 events.append({41 'user_id': user_id,42 'event': 'add_to_cart',43 'timestamp': session_time44 })45 else:46 continue47 48 # Stage 4: Checkout (60%)49 if np.random.random() < 0.60:50 session_time += timedelta(seconds=np.random.randint(60, 600))51 events.append({52 'user_id': user_id,53 'event': 'checkout',54 'timestamp': session_time55 })56 else:57 continue58 59 # Stage 5: Purchase (70%)60 if np.random.random() < 0.70:61 session_time += timedelta(seconds=np.random.randint(30, 180))62 events.append({63 'user_id': user_id,64 'event': 'purchase',65 'timestamp': session_time66 })6768events_df = pd.DataFrame(events)69print(f"Total events: {len(events_df)}")70print(events_df['event'].value_counts())2.2 Define Funnel Steps
Python
1# Define funnel sequence2funnel_steps = ['landing', 'view_product', 'add_to_cart', 'checkout', 'purchase']34# Create step order mapping5step_order = {step: i for i, step in enumerate(funnel_steps)}6events_df['step_order'] = events_df['event'].map(step_order)78# Get max step reached per user9user_progress = events_df.groupby('user_id').agg({10 'step_order': 'max',11 'timestamp': ['min', 'max']12}).reset_index()1314user_progress.columns = ['user_id', 'max_step', 'first_event', 'last_event']15user_progress['journey_time'] = (user_progress['last_event'] - user_progress['first_event']).dt.total_seconds()1617print(user_progress.head())3. Basic Funnel Analysis
3.1 Calculate Conversion Rates
Python
1def calculate_funnel(events_df, funnel_steps):2 """Calculate funnel metrics"""3 4 # Count users at each step5 user_steps = events_df.groupby('user_id')['event'].apply(set).reset_index()6 7 results = []8 for i, step in enumerate(funnel_steps):9 # Users who reached this step10 users_at_step = user_steps[user_steps['event'].apply(lambda x: step in x)]11 count = len(users_at_step)12 13 # Conversion rates14 overall_rate = count / len(user_steps) * 10015 step_rate = count / results[i-1]['count'] * 100 if i > 0 else 10016 17 results.append({18 'step': step,19 'step_number': i + 1,20 'count': count,21 'overall_conversion': round(overall_rate, 2),22 'step_conversion': round(step_rate, 2),23 'drop_off': round(100 - step_rate, 2) if i > 0 else 024 })25 26 return pd.DataFrame(results)2728funnel_df = calculate_funnel(events_df, funnel_steps)29print("Funnel Analysis:")30print(funnel_df)3.2 Visualize Funnel
Python
1def plot_funnel(funnel_df):2 """Create funnel visualization"""3 fig, axes = plt.subplots(1, 2, figsize=(14, 6))4 5 # Bar chart6 bars = axes[0].barh(funnel_df['step'][::-1], funnel_df['count'][::-1])7 axes[0].set_xlabel('Users')8 axes[0].set_title('Funnel: User Counts')9 10 # Add count labels11 for bar, count in zip(bars, funnel_df['count'][::-1]):12 axes[0].text(bar.get_width() + 100, bar.get_y() + bar.get_height()/2,13 f'{count:,}', va='center')14 15 # Conversion rate chart16 x = range(len(funnel_df))17 axes[1].plot(x, funnel_df['overall_conversion'], marker='o', linewidth=2, label='Overall')18 axes[1].bar(x, funnel_df['step_conversion'], alpha=0.3, label='Step')19 axes[1].set_xticks(x)20 axes[1].set_xticklabels(funnel_df['step'], rotation=45, ha='right')21 axes[1].set_ylabel('Conversion Rate (%)')22 axes[1].set_title('Conversion Rates')23 axes[1].legend()24 axes[1].set_ylim(0, 105)25 26 # Add percentage labels27 for i, (overall, step) in enumerate(zip(funnel_df['overall_conversion'], funnel_df['step_conversion'])):28 axes[1].text(i, overall + 2, f'{overall}%', ha='center', fontsize=9)29 30 plt.tight_layout()31 plt.savefig('funnel_analysis.png', dpi=150)32 plt.show()3334plot_funnel(funnel_df)3.3 Funnel Chart (Pyramid)
Python
1def plot_funnel_pyramid(funnel_df):2 """Create pyramid-style funnel"""3 fig, ax = plt.subplots(figsize=(10, 8))4 5 n_steps = len(funnel_df)6 max_width = funnel_df['count'].max()7 8 colors = plt.cm.Blues(np.linspace(0.3, 0.9, n_steps))9 10 for i, (_, row) in enumerate(funnel_df.iterrows()):11 width = row['count'] / max_width12 left = (1 - width) / 213 14 rect = plt.Rectangle(15 (left, n_steps - i - 1), 16 width, 0.8,17 facecolor=colors[i],18 edgecolor='white',19 linewidth=220 )21 ax.add_patch(rect)22 23 # Labels24 ax.text(0.5, n_steps - i - 0.6, 25 f"{row['step']}\n{row['count']:,} ({row['overall_conversion']}%)",26 ha='center', va='center', fontsize=10, fontweight='bold')27 28 # Drop-off annotation29 if i > 0:30 ax.annotate(31 f"↓ {row['drop_off']}% drop",32 xy=(0.85, n_steps - i + 0.1),33 fontsize=9, color='red'34 )35 36 ax.set_xlim(0, 1)37 ax.set_ylim(0, n_steps)38 ax.axis('off')39 ax.set_title('Conversion Funnel', fontsize=14, fontweight='bold')40 41 plt.tight_layout()42 plt.savefig('funnel_pyramid.png', dpi=150)43 plt.show()4445plot_funnel_pyramid(funnel_df)4. Segmented Funnel Analysis
4.1 Add Segments
Python
1# Add user segments2np.random.seed(42)3user_segments = pd.DataFrame({4 'user_id': range(1, n_users + 1),5 'device': np.random.choice(['Mobile', 'Desktop', 'Tablet'], n_users, p=[0.55, 0.35, 0.10]),6 'channel': np.random.choice(['Organic', 'Paid', 'Social', 'Email'], n_users, p=[0.35, 0.30, 0.20, 0.15]),7 'is_new': np.random.choice([True, False], n_users, p=[0.7, 0.3])8})910events_df = events_df.merge(user_segments, on='user_id')11print(events_df.head())4.2 Funnel by Segment
Python
1def funnel_by_segment(events_df, funnel_steps, segment_col):2 """Calculate funnel for each segment"""3 results = []4 5 for segment in events_df[segment_col].unique():6 segment_data = events_df[events_df[segment_col] == segment]7 funnel = calculate_funnel(segment_data, funnel_steps)8 funnel['segment'] = segment9 results.append(funnel)10 11 return pd.concat(results, ignore_index=True)1213# By device14device_funnel = funnel_by_segment(events_df, funnel_steps, 'device')15print("\nFunnel by Device:")16device_pivot = device_funnel.pivot(index='step', columns='segment', values='overall_conversion')17print(device_pivot)1819# By channel20channel_funnel = funnel_by_segment(events_df, funnel_steps, 'channel')21print("\nFunnel by Channel:")22channel_pivot = channel_funnel.pivot(index='step', columns='segment', values='overall_conversion')23print(channel_pivot)4.3 Visualize Segment Comparison
Python
1def plot_segment_comparison(funnel_df, segment_col):2 """Compare funnels across segments"""3 segments = funnel_df['segment'].unique()4 n_segments = len(segments)5 6 fig, ax = plt.subplots(figsize=(12, 6))7 8 x = np.arange(len(funnel_steps))9 width = 0.8 / n_segments10 11 for i, segment in enumerate(segments):12 segment_data = funnel_df[funnel_df['segment'] == segment]13 offset = (i - n_segments/2 + 0.5) * width14 bars = ax.bar(x + offset, segment_data['overall_conversion'], 15 width, label=segment, alpha=0.8)16 17 ax.set_ylabel('Conversion Rate (%)')18 ax.set_title(f'Funnel Comparison by {segment_col}')19 ax.set_xticks(x)20 ax.set_xticklabels(funnel_steps, rotation=45, ha='right')21 ax.legend(title=segment_col)22 ax.set_ylim(0, 105)23 24 plt.tight_layout()25 plt.savefig(f'funnel_by_{segment_col}.png', dpi=150)26 plt.show()2728plot_segment_comparison(device_funnel, 'Device')29plot_segment_comparison(channel_funnel, 'Channel')5. Time-based Analysis
5.1 Time Between Steps
Python
1def calculate_step_times(events_df, funnel_steps):2 """Calculate time between funnel steps"""3 # Pivot events to get timestamp for each step per user4 user_events = events_df.pivot_table(5 index='user_id',6 columns='event',7 values='timestamp',8 aggfunc='first'9 )10 11 # Calculate time differences12 time_diffs = {}13 for i in range(len(funnel_steps) - 1):14 step1 = funnel_steps[i]15 step2 = funnel_steps[i + 1]16 17 if step1 in user_events.columns and step2 in user_events.columns:18 diff = (user_events[step2] - user_events[step1]).dt.total_seconds()19 time_diffs[f'{step1}_to_{step2}'] = diff.dropna()20 21 return time_diffs2223time_diffs = calculate_step_times(events_df, funnel_steps)2425# Summary statistics26print("Time Between Steps (seconds):")27for step, times in time_diffs.items():28 print(f"\n{step}:")29 print(f" Median: {times.median():.1f}s")30 print(f" Mean: {times.mean():.1f}s")31 print(f" 95th percentile: {times.quantile(0.95):.1f}s")5.2 Time-to-Convert Analysis
Python
1def time_to_convert_analysis(events_df):2 """Analyze time from first touch to conversion"""3 # Get first and last events per user4 user_journey = events_df.groupby('user_id').agg({5 'timestamp': ['min', 'max'],6 'event': lambda x: list(x)7 }).reset_index()8 user_journey.columns = ['user_id', 'first_event', 'last_event', 'events']9 10 # Time to convert (for users who purchased)11 purchasers = user_journey[user_journey['events'].apply(lambda x: 'purchase' in x)]12 purchasers['time_to_convert'] = (purchasers['last_event'] - purchasers['first_event']).dt.total_seconds()13 14 # Distribution15 fig, axes = plt.subplots(1, 2, figsize=(12, 5))16 17 # Histogram18 axes[0].hist(purchasers['time_to_convert'] / 60, bins=50, edgecolor='black')19 axes[0].set_xlabel('Time to Convert (minutes)')20 axes[0].set_ylabel('Users')21 axes[0].set_title('Time-to-Convert Distribution')22 axes[0].axvline(purchasers['time_to_convert'].median() / 60, color='r', 23 linestyle='--', label=f"Median: {purchasers['time_to_convert'].median()/60:.1f}m")24 axes[0].legend()25 26 # Cumulative27 sorted_times = np.sort(purchasers['time_to_convert'].values) / 6028 cumulative = np.arange(1, len(sorted_times) + 1) / len(sorted_times) * 10029 axes[1].plot(sorted_times, cumulative)30 axes[1].set_xlabel('Time (minutes)')31 axes[1].set_ylabel('Cumulative % Converted')32 axes[1].set_title('Cumulative Conversion by Time')33 axes[1].axhline(50, color='r', linestyle='--', alpha=0.5)34 axes[1].axhline(80, color='g', linestyle='--', alpha=0.5)35 36 plt.tight_layout()37 plt.show()38 39 return purchasers4041purchasers_df = time_to_convert_analysis(events_df)5.3 Funnel Over Time
Python
1def funnel_trend(events_df, funnel_steps, freq='W'):2 """Calculate funnel metrics over time"""3 events_df['period'] = events_df['timestamp'].dt.to_period(freq)4 5 results = []6 for period in events_df['period'].unique():7 period_data = events_df[events_df['period'] == period]8 funnel = calculate_funnel(period_data, funnel_steps)9 funnel['period'] = period10 results.append(funnel)11 12 trend_df = pd.concat(results, ignore_index=True)13 return trend_df1415trend = funnel_trend(events_df, funnel_steps, 'W')1617# Plot conversion rate trend18fig, ax = plt.subplots(figsize=(14, 6))1920purchase_trend = trend[trend['step'] == 'purchase'].copy()21purchase_trend['period'] = purchase_trend['period'].astype(str)2223ax.plot(purchase_trend['period'], purchase_trend['overall_conversion'], marker='o')24ax.set_xlabel('Week')25ax.set_ylabel('Overall Conversion Rate (%)')26ax.set_title('Purchase Conversion Rate Over Time')27ax.tick_params(axis='x', rotation=45)2829# Add trend line30z = np.polyfit(range(len(purchase_trend)), purchase_trend['overall_conversion'], 1)31p = np.poly1d(z)32ax.plot(purchase_trend['period'], p(range(len(purchase_trend))), 'r--', label='Trend')33ax.legend()3435plt.tight_layout()36plt.show()6. Advanced Funnel Metrics
6.1 Funnel Velocity
Python
1def calculate_velocity(events_df, funnel_steps):2 """Calculate funnel velocity metrics"""3 metrics = {}4 5 # Median time per step6 time_diffs = calculate_step_times(events_df, funnel_steps)7 metrics['median_step_times'] = {k: v.median() for k, v in time_diffs.items()}8 9 # Total funnel time10 total_times = events_df.groupby('user_id').apply(11 lambda x: (x['timestamp'].max() - x['timestamp'].min()).total_seconds()12 )13 metrics['total_funnel_time'] = {14 'median': total_times.median(),15 'mean': total_times.mean(),16 'p75': total_times.quantile(0.75)17 }18 19 # Conversion window (time within which X% convert)20 purchasers = events_df[events_df['event'] == 'purchase']['user_id'].unique()21 converter_times = total_times[total_times.index.isin(purchasers)]22 23 metrics['conversion_windows'] = {24 '50%': converter_times.quantile(0.5),25 '75%': converter_times.quantile(0.75),26 '90%': converter_times.quantile(0.90)27 }28 29 return metrics3031velocity = calculate_velocity(events_df, funnel_steps)3233print("Funnel Velocity Metrics:")34print("\nMedian Time Between Steps (seconds):")35for step, time in velocity['median_step_times'].items():36 print(f" {step}: {time:.1f}s")3738print(f"\nTotal Funnel Time:")39for metric, value in velocity['total_funnel_time'].items():40 print(f" {metric}: {value:.1f}s")4142print(f"\nConversion Windows:")43for pct, time in velocity['conversion_windows'].items():44 print(f" {pct} of converters complete in: {time:.1f}s")6.2 Drop-off Analysis
Python
1def analyze_dropoff(events_df, funnel_steps):2 """Detailed drop-off analysis"""3 user_max_step = events_df.groupby('user_id')['step_order'].max().reset_index()4 user_max_step = user_max_step.merge(5 events_df[['user_id', 'device', 'channel', 'is_new']].drop_duplicates(),6 on='user_id'7 )8 9 results = {}10 11 for i, step in enumerate(funnel_steps[:-1]):12 # Users who dropped at this step13 dropped = user_max_step[user_max_step['step_order'] == i]14 15 if len(dropped) > 0:16 results[step] = {17 'count': len(dropped),18 'by_device': dropped['device'].value_counts().to_dict(),19 'by_channel': dropped['channel'].value_counts().to_dict(),20 'new_vs_returning': dropped['is_new'].value_counts().to_dict()21 }22 23 return results2425dropoff = analyze_dropoff(events_df, funnel_steps)2627print("Drop-off Analysis:")28for step, details in dropoff.items():29 print(f"\n{step.upper()} ({details['count']} users dropped)")30 print(f" By Device: {details['by_device']}")31 print(f" By Channel: {details['by_channel']}")6.3 Funnel Health Score
Python
1def funnel_health_score(funnel_df, benchmarks=None):2 """Calculate funnel health score"""3 if benchmarks is None:4 # Default benchmarks5 benchmarks = {6 'view_product': 75,7 'add_to_cart': 35,8 'checkout': 20,9 'purchase': 1510 }11 12 scores = []13 for _, row in funnel_df.iterrows():14 if row['step'] in benchmarks:15 benchmark = benchmarks[row['step']]16 actual = row['overall_conversion']17 score = min(100, (actual / benchmark) * 100)18 scores.append({19 'step': row['step'],20 'actual': actual,21 'benchmark': benchmark,22 'score': round(score, 1),23 'status': '✅' if actual >= benchmark else '⚠️' if actual >= benchmark * 0.8 else '❌'24 })25 26 health_df = pd.DataFrame(scores)27 overall_health = health_df['score'].mean()28 29 print("Funnel Health Report:")30 print(health_df.to_string(index=False))31 print(f"\nOverall Health Score: {overall_health:.1f}/100")32 33 return health_df, overall_health3435health_df, overall_score = funnel_health_score(funnel_df)7. Funnel Analysis SQL
7.1 Basic Funnel Query
SQL
1-- Count users at each funnel step2WITH funnel_stages AS (3 SELECT 4 user_id,5 MAX(CASE WHEN event = 'landing' THEN 1 ELSE 0 END) AS landing,6 MAX(CASE WHEN event = 'view_product' THEN 1 ELSE 0 END) AS view_product,7 MAX(CASE WHEN event = 'add_to_cart' THEN 1 ELSE 0 END) AS add_to_cart,8 MAX(CASE WHEN event = 'checkout' THEN 1 ELSE 0 END) AS checkout,9 MAX(CASE WHEN event = 'purchase' THEN 1 ELSE 0 END) AS purchase10 FROM events11 GROUP BY user_id12)13SELECT 14 SUM(landing) AS landing_users,15 SUM(view_product) AS view_product_users,16 SUM(add_to_cart) AS add_to_cart_users,17 SUM(checkout) AS checkout_users,18 SUM(purchase) AS purchase_users,19 ROUND(SUM(view_product) * 100.0 / SUM(landing), 2) AS landing_to_view_rate,20 ROUND(SUM(add_to_cart) * 100.0 / SUM(view_product), 2) AS view_to_cart_rate,21 ROUND(SUM(checkout) * 100.0 / SUM(add_to_cart), 2) AS cart_to_checkout_rate,22 ROUND(SUM(purchase) * 100.0 / SUM(checkout), 2) AS checkout_to_purchase_rate,23 ROUND(SUM(purchase) * 100.0 / SUM(landing), 2) AS overall_conversion_rate24FROM funnel_stages;7.2 Sequential Funnel
SQL
1-- Strict sequential funnel (users must complete steps in order)2WITH user_paths AS (3 SELECT 4 user_id,5 STRING_AGG(event, '->' ORDER BY timestamp) AS path6 FROM events7 GROUP BY user_id8),9sequential_check AS (10 SELECT 11 user_id,12 path,13 CASE WHEN path LIKE '%landing%' THEN 1 ELSE 0 END AS reached_landing,14 CASE WHEN path LIKE '%landing%->%view_product%' THEN 1 ELSE 0 END AS reached_view,15 CASE WHEN path LIKE '%landing%->%view_product%->%add_to_cart%' THEN 1 ELSE 0 END AS reached_cart,16 CASE WHEN path LIKE '%landing%->%view_product%->%add_to_cart%->%checkout%' THEN 1 ELSE 0 END AS reached_checkout,17 CASE WHEN path LIKE '%landing%->%view_product%->%add_to_cart%->%checkout%->%purchase%' THEN 1 ELSE 0 END AS reached_purchase18 FROM user_paths19)20SELECT 21 SUM(reached_landing) AS step_1_landing,22 SUM(reached_view) AS step_2_view,23 SUM(reached_cart) AS step_3_cart,24 SUM(reached_checkout) AS step_4_checkout,25 SUM(reached_purchase) AS step_5_purchase26FROM sequential_check;7.3 Funnel by Segment
SQL
1-- Funnel by device type2WITH funnel_by_device AS (3 SELECT 4 u.device,5 e.user_id,6 MAX(CASE WHEN e.event = 'landing' THEN 1 ELSE 0 END) AS landing,7 MAX(CASE WHEN e.event = 'purchase' THEN 1 ELSE 0 END) AS purchase8 FROM events e9 JOIN users u ON e.user_id = u.user_id10 GROUP BY u.device, e.user_id11)12SELECT 13 device,14 COUNT(DISTINCT user_id) AS total_users,15 SUM(landing) AS landing_users,16 SUM(purchase) AS purchasers,17 ROUND(SUM(purchase) * 100.0 / SUM(landing), 2) AS conversion_rate18FROM funnel_by_device19GROUP BY device20ORDER BY conversion_rate DESC;8. Thực hành
Funnel Analysis Project
Exercise: Complete Funnel Analysis
Python
1# Build comprehensive funnel analysis:2# 1. Calculate basic funnel metrics3# 2. Segment by device and channel4# 3. Analyze drop-off points5# 4. Calculate time metrics6# 5. Generate recommendations78# YOUR CODE HERE💡 Xem đáp án
Python
1import pandas as pd2import numpy as np3import matplotlib.pyplot as plt45class FunnelAnalyzer:6 def __init__(self, events_df, funnel_steps, user_segments=None):7 self.events = events_df.copy()8 self.steps = funnel_steps9 self.step_order = {step: i for i, step in enumerate(funnel_steps)}10 self.events['step_order'] = self.events['event'].map(self.step_order)11 12 if user_segments is not None:13 self.events = self.events.merge(user_segments, on='user_id', how='left')14 15 def basic_funnel(self):16 """Calculate basic funnel metrics"""17 user_events = self.events.groupby('user_id')['event'].apply(set)18 19 results = []20 prev_count = len(user_events)21 22 for i, step in enumerate(self.steps):23 users = user_events[user_events.apply(lambda x: step in x)]24 count = len(users)25 26 results.append({27 'step': step,28 'step_num': i + 1,29 'users': count,30 'overall_rate': round(count / len(user_events) * 100, 2),31 'step_rate': round(count / prev_count * 100, 2) if prev_count > 0 else 0,32 'dropoff': round((1 - count / prev_count) * 100, 2) if prev_count > 0 and i > 0 else 033 })34 prev_count = count35 36 return pd.DataFrame(results)37 38 def segment_funnel(self, segment_col):39 """Calculate funnel by segment"""40 results = []41 for segment in self.events[segment_col].dropna().unique():42 segment_data = self.events[self.events[segment_col] == segment]43 user_events = segment_data.groupby('user_id')['event'].apply(set)44 45 for i, step in enumerate(self.steps):46 users = user_events[user_events.apply(lambda x: step in x)]47 results.append({48 'segment': segment,49 'step': step,50 'users': len(users),51 'rate': round(len(users) / len(user_events) * 100, 2)52 })53 54 return pd.DataFrame(results)55 56 def dropoff_analysis(self):57 """Analyze where users drop off"""58 user_max = self.events.groupby('user_id')['step_order'].max()59 60 dropoff = []61 for i, step in enumerate(self.steps[:-1]):62 dropped = (user_max == i).sum()63 dropoff.append({64 'dropped_at': step,65 'count': dropped,66 'pct': round(dropped / len(user_max) * 100, 2)67 })68 69 return pd.DataFrame(dropoff)70 71 def time_analysis(self):72 """Analyze time between steps"""73 pivot = self.events.pivot_table(74 index='user_id',75 columns='event', 76 values='timestamp',77 aggfunc='first'78 )79 80 times = {}81 for i in range(len(self.steps) - 1):82 s1, s2 = self.steps[i], self.steps[i+1]83 if s1 in pivot.columns and s2 in pivot.columns:84 diff = (pivot[s2] - pivot[s1]).dt.total_seconds()85 times[f'{s1}→{s2}'] = {86 'median': round(diff.median(), 1),87 'p75': round(diff.quantile(0.75), 1),88 'p90': round(diff.quantile(0.90), 1)89 }90 91 return times92 93 def plot_funnel(self):94 """Visualize funnel"""95 funnel = self.basic_funnel()96 97 fig, axes = plt.subplots(1, 3, figsize=(15, 5))98 99 # Bar chart100 bars = axes[0].barh(funnel['step'][::-1], funnel['users'][::-1], color='steelblue')101 axes[0].set_xlabel('Users')102 axes[0].set_title('Funnel Users')103 for bar, val in zip(bars, funnel['users'][::-1]):104 axes[0].text(bar.get_width() + 50, bar.get_y() + bar.get_height()/2, 105 f'{val:,}', va='center')106 107 # Conversion rates108 x = range(len(funnel))109 axes[1].plot(x, funnel['overall_rate'], 'o-', label='Overall', linewidth=2)110 axes[1].set_xticks(x)111 axes[1].set_xticklabels(funnel['step'], rotation=45, ha='right')112 axes[1].set_ylabel('Conversion Rate (%)')113 axes[1].set_title('Conversion Rates')114 axes[1].set_ylim(0, 105)115 for i, rate in enumerate(funnel['overall_rate']):116 axes[1].text(i, rate + 3, f'{rate}%', ha='center', fontsize=9)117 118 # Drop-off119 dropoff = self.dropoff_analysis()120 axes[2].bar(dropoff['dropped_at'], dropoff['pct'], color='coral')121 axes[2].set_ylabel('% of Total Users')122 axes[2].set_title('Drop-off Distribution')123 axes[2].tick_params(axis='x', rotation=45)124 125 plt.tight_layout()126 plt.savefig('funnel_complete.png', dpi=150)127 plt.show()128 129 def generate_report(self):130 """Generate complete analysis report"""131 funnel = self.basic_funnel()132 dropoff = self.dropoff_analysis()133 times = self.time_analysis()134 135 print("=" * 60)136 print("FUNNEL ANALYSIS REPORT")137 print("=" * 60)138 139 print(f"\n📊 OVERVIEW")140 print(f"Total Users: {funnel.iloc[0]['users']:,}")141 print(f"Final Conversions: {funnel.iloc[-1]['users']:,}")142 print(f"Overall Conversion Rate: {funnel.iloc[-1]['overall_rate']}%")143 144 print(f"\n📉 DROP-OFF ANALYSIS")145 biggest_drop = dropoff.loc[dropoff['count'].idxmax()]146 print(f"Biggest Drop: {biggest_drop['dropped_at']} ({biggest_drop['pct']}% of users)")147 148 print(f"\n⏱️ TIME METRICS (seconds)")149 for step, metrics in times.items():150 print(f" {step}: median={metrics['median']}s, p75={metrics['p75']}s")151 152 print(f"\n💡 RECOMMENDATIONS")153 # Identify problem areas154 for _, row in funnel.iterrows():155 if row['step_num'] > 1 and row['dropoff'] > 30:156 print(f" ⚠️ High drop-off at {row['step']} ({row['dropoff']}%)")157 print(f" → Consider UX improvements or reducing friction")158 159 print("\n" + "=" * 60)160161162# Generate test data163np.random.seed(42)164n_users = 10000165events = []166base_date = pd.Timestamp('2024-01-01')167168funnel_steps = ['landing', 'view_product', 'add_to_cart', 'checkout', 'purchase']169probs = [1.0, 0.70, 0.55, 0.60, 0.70]170171for uid in range(1, n_users + 1):172 t = base_date + pd.Timedelta(days=np.random.randint(0, 90))173 174 for i, step in enumerate(funnel_steps):175 if i == 0 or np.random.random() < probs[i]:176 events.append({177 'user_id': uid,178 'event': step,179 'timestamp': t180 })181 t += pd.Timedelta(seconds=np.random.randint(10, 300))182 else:183 break184185events_df = pd.DataFrame(events)186187# Add segments188segments = pd.DataFrame({189 'user_id': range(1, n_users + 1),190 'device': np.random.choice(['Mobile', 'Desktop', 'Tablet'], n_users, p=[0.55, 0.35, 0.10]),191 'channel': np.random.choice(['Organic', 'Paid', 'Social'], n_users, p=[0.4, 0.35, 0.25])192})193194# Run analysis195analyzer = FunnelAnalyzer(events_df, funnel_steps, segments)196197# Generate visualizations198analyzer.plot_funnel()199200# Generate report201analyzer.generate_report()202203# Segment analysis204print("\n📱 FUNNEL BY DEVICE:")205device_funnel = analyzer.segment_funnel('device')206print(device_funnel.pivot(index='step', columns='segment', values='rate'))9. Tổng kết
| Topic | Key Concepts |
|---|---|
| Funnel Basics | Steps, conversion rates, drop-off |
| Segmentation | Device, channel, user type analysis |
| Time Analysis | Step duration, velocity, conversion windows |
| Advanced | Health scores, optimization opportunities |
Bài tiếp theo: Predictive Analytics Basics
