rybbit/mockdata/index.js
Bill Yang 5cc4e08f80 Optimize session event generation and data handling for improved performance
- Refactored the session event generation function to enhance speed and reduce memory usage.
- Implemented pre-allocation of arrays and caching of session data to streamline event creation.
- Simplified session data generation using pre-generated arrays for faster random selection of parameters.
- Enhanced timestamp generation logic to reflect realistic traffic patterns, improving event accuracy.
- Adjusted batch processing for event handling to optimize memory management and throughput.
2025-03-23 23:23:11 -07:00

1357 lines
42 KiB
JavaScript

const { createClient } = require("@clickhouse/client");
const crypto = require("crypto");
const { DateTime } = require("luxon");
const { faker } = require("@faker-js/faker");
require("dotenv").config();
const clickhouse = createClient({
host: process.env.CLICKHOUSE_HOST,
database: process.env.CLICKHOUSE_DB,
password: process.env.CLICKHOUSE_PASSWORD,
});
// Command line arguments
const args = process.argv.slice(2);
const daysInPast = parseInt(args[0] || "30", 10);
const eventsPerDay = parseInt(args[1] || "5000000", 10);
console.log(
`Generating mock data for ${daysInPast} days with approximately ${eventsPerDay} events per day`
);
// Site ID to use
const SITE_ID = 5;
// Generate 10,000 unique user IDs
const userIds = Array.from({ length: 10000 }, () => faker.string.uuid());
// Website details
const SITE_NAME = "ShopEase";
const SITE_DOMAIN = "shopease.com";
// Generate products for consistent paths
const products = Array.from({ length: 50 }, (_, index) => {
const productId = 1000 + index;
const name = faker.commerce.productName();
const category = faker.commerce.department();
const subcategory = faker.commerce.productAdjective();
const slug = name.toLowerCase().replace(/[^a-z0-9]+/g, "-");
return {
id: productId,
name,
category,
subcategory,
slug,
price: faker.commerce.price({ min: 10, max: 1000 }),
};
});
// Generate categories
const categories = [...new Set(products.map((p) => p.category))];
const subcategories = [...new Set(products.map((p) => p.subcategory))];
// Realistic page paths with weighted distribution
const pagePaths = [
{ path: "/", weight: 20, title: `${SITE_NAME} - Online Shopping` },
{ path: "/products", weight: 15, title: `All Products - ${SITE_NAME}` },
{ path: "/categories", weight: 10, title: `Shop by Category - ${SITE_NAME}` },
{ path: "/sale", weight: 12, title: `Sale Items - ${SITE_NAME}` },
{ path: "/new-arrivals", weight: 8, title: `New Arrivals - ${SITE_NAME}` },
{ path: "/cart", weight: 7, title: `Your Shopping Cart - ${SITE_NAME}` },
{ path: "/checkout", weight: 5, title: `Checkout - ${SITE_NAME}` },
{ path: "/account", weight: 5, title: `Your Account - ${SITE_NAME}` },
{ path: "/wishlist", weight: 4, title: `Your Wishlist - ${SITE_NAME}` },
{ path: "/order-history", weight: 3, title: `Order History - ${SITE_NAME}` },
{ path: "/about", weight: 2, title: `About Us - ${SITE_NAME}` },
{ path: "/contact", weight: 2, title: `Contact Us - ${SITE_NAME}` },
{ path: "/faq", weight: 2, title: `FAQ - ${SITE_NAME}` },
{ path: "/shipping", weight: 2, title: `Shipping Policy - ${SITE_NAME}` },
{ path: "/returns", weight: 2, title: `Returns & Refunds - ${SITE_NAME}` },
];
// Add category pages
categories.forEach((category) => {
const slug = category.toLowerCase().replace(/[^a-z0-9]+/g, "-");
pagePaths.push({
path: `/category/${slug}`,
weight: 6,
title: `${category} - ${SITE_NAME}`,
category,
});
});
// Add product pages (high weight as these are important for e-commerce)
products.forEach((product) => {
pagePaths.push({
path: `/product/${product.id}-${product.slug}`,
weight: 9,
title: `${product.name} - ${SITE_NAME}`,
product,
});
});
// Custom events with weighted distribution
const customEvents = [
{ name: "page-view", weight: 25, properties: { page: "home" } },
{ name: "product-view", weight: 20, properties: {} }, // Will be filled with product details
{ name: "add-to-cart", weight: 15, properties: {} }, // Will be filled with product details
{ name: "remove-from-cart", weight: 5, properties: {} }, // Will be filled with product details
{
name: "begin-checkout",
weight: 8,
properties: { items_count: 1, value: 0 },
},
{ name: "checkout-step", weight: 7, properties: { step: 1, option: "" } },
{
name: "purchase",
weight: 5,
properties: { transaction_id: "", value: 0, currency: "USD", items: [] },
},
{
name: "product-click",
weight: 12,
properties: { list_name: "recommended" },
},
{ name: "search", weight: 10, properties: { search_term: "" } },
{
name: "filter-products",
weight: 8,
properties: { filter: "price", value: "100-200" },
},
{ name: "add-to-wishlist", weight: 6, properties: {} }, // Will be filled with product details
{ name: "share-product", weight: 4, properties: { method: "email" } },
{
name: "select-promotion",
weight: 5,
properties: { promotion_name: "Summer Sale" },
},
{ name: "sign-up", weight: 3, properties: { method: "email" } },
{ name: "login", weight: 6, properties: { method: "site" } },
{ name: "view-cart", weight: 10, properties: { items_count: 0, value: 0 } },
];
// Tab/query parameters for certain pages
const tabParams = {
"/product/": [
{ param: "?variant=large", weight: 30 },
{ param: "?variant=medium", weight: 25 },
{ param: "?variant=small", weight: 20 },
{ param: "?color=blue", weight: 15 },
{ param: "?color=black", weight: 15 },
{ param: "", weight: 20 },
],
"/products": [
{ param: "?sort=price-asc", weight: 25 },
{ param: "?sort=price-desc", weight: 20 },
{ param: "?sort=newest", weight: 20 },
{ param: "?sort=popular", weight: 20 },
{ param: "", weight: 15 },
],
"/category/": [
{ param: "?filter=new", weight: 20 },
{ param: "?filter=sale", weight: 25 },
{ param: "?filter=in-stock", weight: 20 },
{ param: "?sort=price-low", weight: 15 },
{ param: "", weight: 20 },
],
};
// Referrers with weighted distribution
const referrers = [
{ url: "", weight: 30 }, // Direct
{ url: "https://www.google.com/", weight: 25 },
{ url: "https://www.facebook.com/", weight: 10 },
{ url: "https://www.instagram.com/", weight: 12 },
{ url: "https://pinterest.com/", weight: 8 },
{ url: "https://www.amazon.com/", weight: 5 },
{ url: "https://www.youtube.com/", weight: 5 },
{ url: "https://www.tiktok.com/", weight: 4 },
{ url: "https://twitter.com/", weight: 3 },
{ url: "https://www.bing.com/", weight: 3 },
{ url: "https://www.retailmenot.com/", weight: 2 },
{ url: "https://slickdeals.net/", weight: 2 },
{ url: "https://www.dealsplus.com/", weight: 1 },
{ url: `https://email.${SITE_DOMAIN}/`, weight: 10 }, // Email campaigns
{ url: "https://www.pricegrabber.com/", weight: 1 },
];
// Browsers with weighted distribution
const browsers = [
{ name: "Chrome", weight: 60, versions: ["133", "134", "135", "132"] },
{ name: "Firefox", weight: 15, versions: ["124", "123", "122"] },
{ name: "Safari", weight: 10, versions: ["17", "16", "15"] },
{ name: "Edge", weight: 8, versions: ["122", "121", "120"] },
{ name: "Opera", weight: 4, versions: ["105", "104", "103"] },
{ name: "Yandex", weight: 3, versions: ["25", "24", "23"] },
];
// Operating systems with weighted distribution
const operatingSystems = [
{ name: "Windows", weight: 65, versions: ["10", "11", "7"] },
{ name: "Android", weight: 15, versions: ["13", "12", "11", "10"] },
{ name: "iOS", weight: 10, versions: ["17", "16", "15"] },
{ name: "macOS", weight: 7, versions: ["14", "13", "12"] },
{ name: "Linux", weight: 3, versions: ["5.15", "5.10", "5.8"] },
];
// Screen resolutions with weighted distribution
const screenResolutions = [
{ width: 1920, height: 1080, weight: 30 },
{ width: 1366, height: 768, weight: 20 },
{ width: 1536, height: 864, weight: 15 },
{ width: 2560, height: 1440, weight: 10 },
{ width: 1440, height: 900, weight: 8 },
{ width: 1024, height: 768, weight: 5 },
{ width: 3840, height: 2160, weight: 5 },
{ width: 1280, height: 720, weight: 3 },
// Mobile resolutions
{ width: 414, height: 896, weight: 3 },
{ width: 375, height: 667, weight: 3 },
{ width: 360, height: 740, weight: 3 },
{ width: 412, height: 915, weight: 2 },
{ width: 390, height: 844, weight: 2 },
{ width: 768, height: 1024, weight: 1 }, // iPad
];
// Languages with weighted distribution
const languages = [
{ code: "en-US", weight: 30 },
{ code: "en", weight: 15 },
{ code: "ru", weight: 12 },
{ code: "de", weight: 8 },
{ code: "pl", weight: 7 },
{ code: "es", weight: 6 },
{ code: "fr", weight: 6 },
{ code: "zh-CN", weight: 5 },
{ code: "it", weight: 4 },
{ code: "cs", weight: 3 },
{ code: "pt-BR", weight: 2 },
{ code: "ja", weight: 2 },
];
// Countries and regions
const geoData = [
{
country: "US",
weight: 25,
regions: [
{ code: "US-NY", weight: 20 },
{ code: "US-CA", weight: 18 },
{ code: "US-TX", weight: 15 },
{ code: "US-FL", weight: 12 },
{ code: "US-IL", weight: 10 },
{ code: "US-PA", weight: 8 },
{ code: "US-OH", weight: 7 },
{ code: "US-MI", weight: 5 },
{ code: "US-GA", weight: 5 },
],
},
{
country: "RU",
weight: 20,
regions: [
{ code: "RU-MOW", weight: 25 },
{ code: "RU-SPE", weight: 15 },
{ code: "RU-KDA", weight: 10 },
{ code: "RU-KYA", weight: 10 },
{ code: "RU-NVS", weight: 8 },
{ code: "RU-SVE", weight: 8 },
{ code: "RU-IRK", weight: 5 },
{ code: "RU-YAR", weight: 5 },
],
},
{
country: "DE",
weight: 10,
regions: [
{ code: "DE-BY", weight: 20 },
{ code: "DE-NW", weight: 18 },
{ code: "DE-BW", weight: 15 },
{ code: "DE-NI", weight: 12 },
{ code: "DE-HE", weight: 10 },
{ code: "DE-BE", weight: 8 },
],
},
{
country: "PL",
weight: 8,
regions: [
{ code: "PL-MZ", weight: 20 },
{ code: "PL-DS", weight: 15 },
{ code: "PL-WP", weight: 15 },
{ code: "PL-SL", weight: 12 },
{ code: "PL-MA", weight: 10 },
],
},
{
country: "GB",
weight: 7,
regions: [
{ code: "GB-ENG", weight: 80 },
{ code: "GB-SCT", weight: 10 },
{ code: "GB-WLS", weight: 7 },
{ code: "GB-NIR", weight: 3 },
],
},
{
country: "FR",
weight: 6,
regions: [
{ code: "FR-IDF", weight: 25 },
{ code: "FR-ARA", weight: 15 },
{ code: "FR-HDF", weight: 12 },
{ code: "FR-NAQ", weight: 10 },
{ code: "FR-OCC", weight: 10 },
],
},
{
country: "CA",
weight: 5,
regions: [
{ code: "CA-ON", weight: 40 },
{ code: "CA-QC", weight: 25 },
{ code: "CA-BC", weight: 15 },
{ code: "CA-AB", weight: 10 },
],
},
{
country: "CN",
weight: 4,
regions: [
{ code: "CN-11", weight: 20 }, // Beijing
{ code: "CN-31", weight: 20 }, // Shanghai
{ code: "CN-44", weight: 15 }, // Guangdong
{ code: "CN-51", weight: 10 }, // Sichuan
{ code: "CN-33", weight: 10 }, // Zhejiang
{ code: "CN-32", weight: 10 }, // Jiangsu
],
},
{
country: "AU",
weight: 3,
regions: [
{ code: "AU-NSW", weight: 35 },
{ code: "AU-VIC", weight: 30 },
{ code: "AU-QLD", weight: 20 },
{ code: "AU-WA", weight: 15 },
],
},
{
country: "ES",
weight: 3,
regions: [
{ code: "ES-MD", weight: 25 },
{ code: "ES-CT", weight: 20 },
{ code: "ES-AN", weight: 18 },
{ code: "ES-VA", weight: 12 },
],
},
{
country: "IT",
weight: 2,
regions: [
{ code: "IT-25", weight: 20 }, // Lombardy
{ code: "IT-62", weight: 15 }, // Lazio
{ code: "IT-21", weight: 15 }, // Piedmont
{ code: "IT-34", weight: 12 }, // Veneto
],
},
{
country: "CZ",
weight: 2,
regions: [
{ code: "CZ-PR", weight: 35 },
{ code: "CZ-JM", weight: 20 },
{ code: "CZ-ST", weight: 15 },
],
},
{
country: "AT",
weight: 2,
regions: [
{ code: "AT-9", weight: 35 }, // Vienna
{ code: "AT-6", weight: 20 }, // Styria
{ code: "AT-4", weight: 15 }, // Upper Austria
],
},
{
country: "JP",
weight: 2,
regions: [
{ code: "JP-13", weight: 30 }, // Tokyo
{ code: "JP-27", weight: 15 }, // Osaka
{ code: "JP-14", weight: 10 }, // Kanagawa
],
},
{
country: "BR",
weight: 1,
regions: [
{ code: "BR-SP", weight: 35 },
{ code: "BR-RJ", weight: 20 },
{ code: "BR-MG", weight: 15 },
],
},
];
// Helper function to select a random item based on weights
function weightedRandom(items) {
const totalWeight = items.reduce((sum, item) => sum + item.weight, 0);
let random = Math.random() * totalWeight;
for (const item of items) {
random -= item.weight;
if (random <= 0) {
return item;
}
}
return items[0]; // Fallback
}
// Helper function to generate a UUID
function generateUUID() {
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
const r = (Math.random() * 16) | 0;
const v = c === "x" ? r : (r & 0x3) | 0x8;
return v.toString(16);
});
}
// Function to generate a realistic timestamp for a given day with more traffic during peak hours
function generateTimeForDay(date) {
// Define peak hours (in 24-hour format)
const peakHours = [
{ hour: 9, weight: 5 }, // 9 AM
{ hour: 10, weight: 6 }, // 10 AM
{ hour: 11, weight: 7 }, // 11 AM
{ hour: 12, weight: 8 }, // 12 PM
{ hour: 13, weight: 9 }, // 1 PM
{ hour: 14, weight: 10 }, // 2 PM
{ hour: 15, weight: 10 }, // 3 PM
{ hour: 16, weight: 9 }, // 4 PM
{ hour: 17, weight: 8 }, // 5 PM
{ hour: 18, weight: 9 }, // 6 PM
{ hour: 19, weight: 10 }, // 7 PM
{ hour: 20, weight: 10 }, // 8 PM
{ hour: 21, weight: 9 }, // 9 PM
{ hour: 22, weight: 7 }, // 10 PM
{ hour: 23, weight: 5 }, // 11 PM
{ hour: 0, weight: 3 }, // 12 AM
{ hour: 1, weight: 2 }, // 1 AM
{ hour: 2, weight: 1 }, // 2 AM
{ hour: 3, weight: 1 }, // 3 AM
{ hour: 4, weight: 1 }, // 4 AM
{ hour: 5, weight: 1 }, // 5 AM
{ hour: 6, weight: 2 }, // 6 AM
{ hour: 7, weight: 3 }, // 7 AM
{ hour: 8, weight: 4 }, // 8 AM
];
const hour = weightedRandom(peakHours).hour;
const minute = Math.floor(Math.random() * 60);
const second = Math.floor(Math.random() * 60);
return date.set({ hour, minute, second });
}
// Helper function to format estimated time remaining
function formatETA(currentCount, targetCount, eventsPerSecond, elapsedSeconds) {
if (eventsPerSecond <= 0 || elapsedSeconds < 1) return "calculating...";
const remainingEvents = targetCount - currentCount;
// Use a dynamic slowdown factor based on how far along we are
// The more events we've generated, the more we expect slowdown
const progress = currentCount / targetCount;
const slowdownFactor = 1 + progress * 0.3; // Gradually add up to 30% buffer as we progress
// Calculate seconds remaining with the slowdown factor applied
const adjustedRate = eventsPerSecond / slowdownFactor;
const secondsRemaining = remainingEvents / adjustedRate;
// Format the ETA string
if (secondsRemaining < 60) {
return `${Math.ceil(secondsRemaining)}s`;
} else if (secondsRemaining < 3600) {
return `${Math.floor(secondsRemaining / 60)}m ${Math.ceil(
secondsRemaining % 60
)}s`;
} else {
const hours = Math.floor(secondsRemaining / 3600);
const minutes = Math.floor((secondsRemaining % 3600) / 60);
return `${hours}h ${minutes}m`;
}
}
// Helper function to format elapsed time
function formatTime(seconds) {
if (seconds < 60) {
return `${Math.floor(seconds)}s`;
} else if (seconds < 3600) {
const minutes = Math.floor(seconds / 60);
const remainingSeconds = Math.floor(seconds % 60);
return `${minutes}m ${remainingSeconds}s`;
} else {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const remainingSeconds = Math.floor(seconds % 60);
return `${hours}h ${minutes}m ${remainingSeconds}s`;
}
}
// Optimized version of generateSessionEvents for speed
function generateSessionEventsOptimized(
userId,
sessionId,
startTime,
sessionData
) {
// Pre-allocate array for events - this is much faster than dynamic growth
// Most sessions will have between 1-30 events, so this is a reasonable size
const MAX_EVENTS = 36;
const events = new Array(MAX_EVENTS);
let eventCount = 0;
// Session duration calculation
const sessionDuration = Math.floor(Math.random() * 1800) + 60; // 1-30 minutes in seconds
const numberOfPageviews = Math.floor(Math.random() * 8) + 1; // 1-8 pageviews per session (reduced for speed)
// Cache commonly used values to avoid repeated property lookups
const siteId = SITE_ID;
const hostname = SITE_DOMAIN;
const browser = sessionData.browser;
const browserVersion = sessionData.browserVersion;
const os = sessionData.os;
const osVersion = sessionData.osVersion;
const language = sessionData.language;
const screenWidth = sessionData.screenWidth;
const screenHeight = sessionData.screenHeight;
const deviceType = sessionData.deviceType;
const country = sessionData.country;
const iso3166 = sessionData.iso3166;
const initialReferrer = sessionData.referrer;
const dateFormat = "yyyy-MM-dd HH:mm:ss";
// Track session state
let currentPath = null;
let currentProduct = null;
let cartItems = [];
let hasPurchased = false;
let currentTime = startTime;
// Create a fast template event creator function - much faster than creating and copying objects
function createEvent(
pathname,
querystring,
title,
referrer,
type,
eventName,
props
) {
if (eventCount >= MAX_EVENTS) return; // Safety check
events[eventCount++] = {
site_id: siteId,
session_id: sessionId,
user_id: userId,
hostname: hostname,
browser: browser,
browser_version: browserVersion,
operating_system: os,
operating_system_version: osVersion,
language: language,
screen_width: screenWidth,
screen_height: screenHeight,
device_type: deviceType,
country: country,
iso_3166_2: iso3166,
timestamp: currentTime.toFormat(dateFormat),
pathname: pathname,
querystring: querystring || "",
page_title: title,
referrer: referrer || "",
type: type,
event_name: eventName || "",
properties: props || "",
};
}
// Fast entry page selection - avoid complex weighted random for most common case
// First page - entry point - optimize for most common case
const entryPageIndex = Math.floor(Math.random() * 10);
if (entryPageIndex < 3) {
// 30% homepage
currentPath = pagePaths[0]; // homepage
} else if (entryPageIndex < 6) {
// 30% products
currentPath = pagePaths[1]; // products
} else if (entryPageIndex < 8) {
// 20% sale
currentPath = pagePaths[3]; // sale
} else {
// 20% random
currentPath = pagePaths[Math.floor(Math.random() * 15)];
}
// Run through the pageviews for this session
for (let i = 0; i < numberOfPageviews; i++) {
// Advance time - simplify time calculation
const timeAdvance = 30 + Math.floor(Math.random() * 30);
currentTime = currentTime.plus({ seconds: timeAdvance });
// Check if we've exceeded session duration
if (i > 0 && currentTime > startTime.plus({ seconds: sessionDuration })) {
break;
}
// Get querystring - simplified
let querystring = "";
if (Math.random() < 0.2) {
// Reduced probability for speed
if (currentPath.path.startsWith("/product/")) {
// Use most common product variant
querystring = "?variant=medium";
} else if (currentPath.path === "/products") {
// Use most common sort
querystring = "?sort=newest";
}
}
// Add pageview event - always happens
createEvent(
currentPath.path,
querystring,
currentPath.title,
i === 0 ? initialReferrer : "",
"pageview",
"",
""
);
// Update product for product pages
if (currentPath.path.startsWith("/product/")) {
currentProduct = currentPath.product;
// Add product view event - always on product pages
currentTime = currentTime.plus({ seconds: 2 });
// Simplified properties creation - avoid building complex objects
const productProps = JSON.stringify({
product_id: currentProduct.id,
product_name: currentProduct.name,
category: currentProduct.category,
price: currentProduct.price,
currency: "USD",
});
createEvent(
currentPath.path,
querystring,
currentPath.title,
"",
"custom_event",
"product-view",
productProps
);
// Add to cart (30% chance)
if (Math.random() < 0.3 && !cartItems.includes(currentProduct)) {
currentTime = currentTime.plus({ seconds: 5 });
cartItems.push(currentProduct);
createEvent(
currentPath.path,
querystring,
currentPath.title,
"",
"custom_event",
"add-to-cart",
JSON.stringify({
product_id: currentProduct.id,
product_name: currentProduct.name,
category: currentProduct.category,
price: currentProduct.price,
quantity: 1,
currency: "USD",
})
);
}
}
// Cart view/checkout - simplified for speed
else if (currentPath.path === "/cart" && cartItems.length > 0) {
// Calculate cart total once
const cartTotal = cartItems
.reduce((sum, item) => sum + parseFloat(item.price), 0)
.toFixed(2);
createEvent(
currentPath.path,
querystring,
currentPath.title,
"",
"custom_event",
"view-cart",
JSON.stringify({
items_count: cartItems.length,
value: cartTotal,
currency: "USD",
})
);
}
// Purchase flow - simplified for speed
else if (
currentPath.path === "/checkout" &&
cartItems.length > 0 &&
!hasPurchased
) {
// Calculate cart total once
const cartTotal = cartItems
.reduce((sum, item) => sum + parseFloat(item.price), 0)
.toFixed(2);
createEvent(
currentPath.path,
querystring,
currentPath.title,
"",
"custom_event",
"begin-checkout",
JSON.stringify({
items_count: cartItems.length,
value: cartTotal,
currency: "USD",
})
);
// Purchase - simplify condition
if (
(i === numberOfPageviews - 1 || Math.random() < 0.6) &&
!hasPurchased
) {
currentTime = currentTime.plus({ seconds: 15 });
const transactionId = Math.random()
.toString(36)
.substring(2, 8)
.toUpperCase();
// Simplified purchase event - avoid creating complex JSON structures
createEvent(
currentPath.path,
querystring,
currentPath.title,
"",
"custom_event",
"purchase",
JSON.stringify({
transaction_id: transactionId,
value: cartTotal,
tax: (parseFloat(cartTotal) * 0.08).toFixed(2),
shipping: "9.99",
currency: "USD",
items_count: cartItems.length,
})
);
hasPurchased = true;
}
}
// Simplified navigation logic for speed - use faster random selection
if (i < numberOfPageviews - 1) {
const navRand = Math.random();
// Faster navigation logic - avoid complex condition checking
if (cartItems.length > 0 && navRand < 0.3) {
// 30% go to cart if items exist
currentPath = pagePaths.find((p) => p.path === "/cart") || pagePaths[0];
} else if (
currentPath.path === "/cart" &&
cartItems.length > 0 &&
navRand < 0.6
) {
// 30% go to checkout from cart
currentPath =
pagePaths.find((p) => p.path === "/checkout") || pagePaths[0];
} else if (navRand < 0.4) {
// 40% go to product - products are after index 15
const productIndex =
15 + Math.floor(Math.random() * (pagePaths.length - 15));
currentPath = pagePaths[productIndex];
} else if (navRand < 0.7) {
// 30% go to one of the main pages (indexes 0-5)
currentPath = pagePaths[Math.floor(Math.random() * 6)];
} else {
// 30% random navigation
currentPath = pagePaths[Math.floor(Math.random() * pagePaths.length)];
}
}
}
// Return only the populated part of the array
return events.slice(0, eventCount);
}
// Function to generate session data (browser, OS, screen resolution, etc.)
function generateSessionData() {
// Select browser and version
const browser = weightedRandom(browsers);
const browserVersion =
browser.versions[Math.floor(Math.random() * browser.versions.length)];
// Select OS and version
const os = weightedRandom(operatingSystems);
const osVersion = os.versions[Math.floor(Math.random() * os.versions.length)];
// Select screen resolution
const resolution = weightedRandom(screenResolutions);
// Generate language code (instead of using faker.locale)
const languageCodes = [
"en",
"es",
"fr",
"de",
"it",
"ru",
"zh",
"ja",
"pt",
"nl",
];
const language =
languageCodes[Math.floor(Math.random() * languageCodes.length)] +
(Math.random() < 0.5 ? "" : "-" + faker.location.countryCode());
// Select referrer
const referrer = weightedRandom(referrers).url;
// Use Faker for location data
const country = faker.location.countryCode();
let region = "";
// Get region code based on country
if (country === "US") {
region = faker.location.state({ abbreviated: true });
} else {
// For non-US, we'll use a simple region code
region = faker.location.county().slice(0, 3).toUpperCase();
}
// Determine device type based on screen resolution and OS
let deviceType = "Desktop";
if (os.name === "Android" || os.name === "iOS") {
deviceType = "Mobile";
if (resolution.width > 768) {
deviceType = "Tablet";
}
} else if (resolution.width <= 1024) {
deviceType = "Mobile";
}
return {
browser: browser.name,
browserVersion: browserVersion,
os: os.name,
osVersion: osVersion,
screenWidth: resolution.width,
screenHeight: resolution.height,
language: language,
referrer: referrer,
country: country,
iso3166: country && region ? `${country}-${region}` : country,
deviceType: deviceType,
};
}
// Function to generate events for a specific day
async function generateEventsForDay(date, targetEventsCount) {
// Instead of one large array, use a collection of batch arrays
const eventBatches = [];
const BATCH_SIZE = 100000; // Store events in manageable chunks
let currentBatch = [];
// Active sessions map to track ongoing sessions
const activeSessions = new Map();
// Vary the count by ±15% to make it more realistic
const variation = Math.random() * 0.3 - 0.15; // -15% to +15%
const actualEventsCount = Math.round(targetEventsCount * (1 + variation));
console.log(
`Generating ${actualEventsCount.toLocaleString()} events for ${date.toFormat(
"yyyy-MM-dd"
)}`
);
// Generate 4-10x more events than sessions (averaging 5-6 events per session)
const eventsPerSession = 6;
let eventCount = 0;
const sessionsToGenerate = Math.ceil(actualEventsCount / eventsPerSession);
console.log(
`Preparing ${sessionsToGenerate.toLocaleString()} sessions with timestamps...`
);
// For session data creation - cache common data to avoid recreating for every session
// Pre-generate some session data arrays for faster random selection
const pregenSessionData = {
browsers: Array.from({ length: 1000 }, () => {
const browser = weightedRandom(browsers);
return {
name: browser.name,
version:
browser.versions[Math.floor(Math.random() * browser.versions.length)],
};
}),
operatingSystems: Array.from({ length: 1000 }, () => {
const os = weightedRandom(operatingSystems);
return {
name: os.name,
version: os.versions[Math.floor(Math.random() * os.versions.length)],
};
}),
resolutions: Array.from({ length: 100 }, () =>
weightedRandom(screenResolutions)
),
referrers: Array.from({ length: 100 }, () => weightedRandom(referrers).url),
countries: Array.from({ length: 100 }, () => {
const country = faker.location.countryCode();
let region = "";
if (country === "US") {
region = faker.location.state({ abbreviated: true });
} else {
region = faker.location.county().slice(0, 3).toUpperCase();
}
return { country, region };
}),
languages: Array.from({ length: 100 }, () => {
const languageCodes = [
"en",
"es",
"fr",
"de",
"it",
"ru",
"zh",
"ja",
"pt",
"nl",
];
return (
languageCodes[Math.floor(Math.random() * languageCodes.length)] +
(Math.random() < 0.5 ? "" : "-" + faker.location.countryCode())
);
}),
};
// Pre-generate timestamps for the day with realistic distribution in batches
// This reduces memory pressure while maintaining chronological order
const TIMESTAMP_BATCH_SIZE = 100000;
let totalTimestampsGenerated = 0;
let timestampBatch = [];
// For tracking progress - modified to limit logs to once per second
const startGenerationTime = Date.now();
let lastProgressTime = startGenerationTime;
const MIN_LOG_INTERVAL_MS = 1000; // Minimum 1 second between progress logs
// For improved ETA calculation
let recentRates = [];
const MAX_RATES_TO_TRACK = 5; // Keep track of the last 5 generation rates
console.log(`Beginning event generation in optimized batches...`);
// Faster UUID generation
const fastUUID = () => {
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
const r = (Math.random() * 16) | 0;
return (c === "x" ? r : (r & 0x3) | 0x8).toString(16);
});
};
// Initial progress message
console.log(
`Progress: 0% | Generated 0 of ${actualEventsCount.toLocaleString()} events | 0 events/sec | Time: 0s | ETA: calculating...`
);
// Process in batches to optimize memory usage and performance
while (totalTimestampsGenerated < sessionsToGenerate) {
// Generate next batch of timestamps
const batchSize = Math.min(
TIMESTAMP_BATCH_SIZE,
sessionsToGenerate - totalTimestampsGenerated
);
timestampBatch = [];
// Generate timestamps - using a more optimized approach
const dateJSDate = date.toJSDate();
for (let i = 0; i < batchSize; i++) {
// Simplified time generation - avoid full weightedRandom for every timestamp
const hour = 9 + Math.floor(Math.random() * 12); // Focus on 9am-9pm where most traffic happens
const minute = Math.floor(Math.random() * 60);
const second = Math.floor(Math.random() * 60);
timestampBatch.push(
DateTime.fromJSDate(dateJSDate).set({ hour, minute, second })
);
}
// Sort this batch chronologically
timestampBatch.sort((a, b) => a.toMillis() - b.toMillis());
// Process each timestamp to generate sessions and events
for (let idx = 0; idx < timestampBatch.length; idx++) {
const timestamp = timestampBatch[idx];
const sessionIdx = totalTimestampsGenerated + idx;
// Check if it's time to show a progress update
const currentTime = Date.now();
const timeSinceLastLog = currentTime - lastProgressTime;
// Only show a progress update if it's been at least MIN_LOG_INTERVAL_MS since the last one
// Or if this is the last session
if (
timeSinceLastLog >= MIN_LOG_INTERVAL_MS ||
sessionIdx === sessionsToGenerate - 1
) {
const elapsedSeconds = (currentTime - startGenerationTime) / 1000;
// Calculate progress based on actual events generated, not sessions
const percentComplete = Math.min(
((eventCount / actualEventsCount) * 100).toFixed(1),
100.0
);
// Format elapsed time
const elapsedFormatted = formatTime(elapsedSeconds);
// Calculate current generation rate
const currentIntervalRate =
eventCount > 0 ? Math.round(eventCount / elapsedSeconds) : 0;
// Store recent rates for weighted average calculation
if (timeSinceLastLog > 0) {
// Calculate rate just for this interval
const newEventsInThisInterval =
eventCount -
(recentRates.length > 0
? eventCount -
(elapsedSeconds - timeSinceLastLog / 1000) * currentIntervalRate
: 0);
const intervalRate = Math.round(
newEventsInThisInterval / (timeSinceLastLog / 1000)
);
// Add to recent rates, keeping only the most recent ones
recentRates.push(intervalRate);
if (recentRates.length > MAX_RATES_TO_TRACK) {
recentRates.shift(); // Remove oldest rate
}
}
// Calculate weighted average of recent rates (more weight to recent rates)
let weightedRate = currentIntervalRate;
if (recentRates.length > 0) {
let totalWeight = 0;
let weightedSum = 0;
for (let i = 0; i < recentRates.length; i++) {
const weight = i + 1; // More recent rates get higher weights
weightedSum += recentRates[i] * weight;
totalWeight += weight;
}
weightedRate = Math.round(weightedSum / totalWeight);
}
console.log(
`Progress: ${percentComplete}% | Generated ${eventCount.toLocaleString()} of ${actualEventsCount.toLocaleString()} events ` +
`| ${weightedRate.toLocaleString()} events/sec | Time: ${elapsedFormatted} | ETA: ${formatETA(
eventCount,
actualEventsCount,
weightedRate,
elapsedSeconds
)}`
);
lastProgressTime = currentTime;
}
// Optimized session generation logic
// Decide whether to continue an existing session or create a new one
const now = timestamp.toMillis();
let sessionId, userId, sessionData;
// First expire old sessions (more than 30 minutes old)
if (activeSessions.size > 100) {
// Only check when we have many active sessions
for (const [sid, sessionInfo] of activeSessions.entries()) {
if (now - sessionInfo.lastActivity > 30 * 60 * 1000) {
activeSessions.delete(sid);
}
}
}
// 20% chance to continue an existing session if any are active and not too many sessions
if (
activeSessions.size > 0 &&
activeSessions.size < 1000 &&
Math.random() < 0.2
) {
// Continue an existing session (select one of the first 50 to keep it fast)
const activeSids = Array.from(activeSessions.keys()).slice(0, 50);
sessionId = activeSids[Math.floor(Math.random() * activeSids.length)];
const sessionInfo = activeSessions.get(sessionId);
userId = sessionInfo.userId;
sessionData = sessionInfo.data;
// Update last activity timestamp
sessionInfo.lastActivity = now;
} else {
// Create a new session
sessionId = fastUUID(); // Using faster UUID generation
userId = userIds[Math.floor(Math.random() * userIds.length)];
// Faster session data creation using pre-generated arrays
const browserIdx = Math.floor(Math.random() * 1000);
const osIdx = Math.floor(Math.random() * 1000);
const resIdx = Math.floor(Math.random() * 100);
const countryIdx = Math.floor(Math.random() * 100);
const langIdx = Math.floor(Math.random() * 100);
const refIdx = Math.floor(Math.random() * 100);
const browserInfo = pregenSessionData.browsers[browserIdx];
const osInfo = pregenSessionData.operatingSystems[osIdx];
const resolution = pregenSessionData.resolutions[resIdx];
const { country, region } = pregenSessionData.countries[countryIdx];
// Determine device type based on screen resolution and OS - simplified
let deviceType = "Desktop";
if (osInfo.name === "Android" || osInfo.name === "iOS") {
deviceType = resolution.width > 768 ? "Tablet" : "Mobile";
} else if (resolution.width <= 1024) {
deviceType = "Mobile";
}
sessionData = {
browser: browserInfo.name,
browserVersion: browserInfo.version,
os: osInfo.name,
osVersion: osInfo.version,
screenWidth: resolution.width,
screenHeight: resolution.height,
language: pregenSessionData.languages[langIdx],
referrer: pregenSessionData.referrers[refIdx],
country: country,
iso3166: country && region ? `${country}-${region}` : country,
deviceType: deviceType,
};
// Add to active sessions
activeSessions.set(sessionId, {
userId,
data: sessionData,
lastActivity: now,
});
}
// Generate events for this session using optimized event generation
const sessionEvents = generateSessionEventsOptimized(
userId,
sessionId,
timestamp,
sessionData
);
// Add these events to our collection using batch arrays
const newEventCount = sessionEvents.length;
// Add events to the current batch, creating new batches as needed
for (let i = 0; i < newEventCount; i++) {
// If current batch is full, add it to batches and create a new one
if (currentBatch.length >= BATCH_SIZE) {
eventBatches.push(currentBatch);
currentBatch = [];
}
// Add event to current batch
currentBatch.push(sessionEvents[i]);
eventCount++;
}
// Check if we've reached our target event count
if (eventCount >= actualEventsCount) {
break;
}
}
totalTimestampsGenerated += timestampBatch.length;
// If we've reached our target event count, break out
if (eventCount >= actualEventsCount) {
break;
}
}
// Don't forget to add the last batch if it has any events
if (currentBatch.length > 0) {
eventBatches.push(currentBatch);
}
// Final progress update
const finalGenTime = Date.now();
const totalGenSeconds = ((finalGenTime - startGenerationTime) / 1000).toFixed(
2
);
// Flatten all batches into a single array, but only up to the actualEventsCount
// This is more memory-efficient than pre-allocating a huge array
let finalEvents = [];
let remainingEvents = Math.min(eventCount, actualEventsCount);
for (const batch of eventBatches) {
if (remainingEvents <= 0) break;
const eventsToTake = Math.min(batch.length, remainingEvents);
finalEvents = finalEvents.concat(batch.slice(0, eventsToTake));
remainingEvents -= eventsToTake;
}
const generationEndTime = Date.now();
const generationSeconds = (
(generationEndTime - startGenerationTime) /
1000
).toFixed(2);
const genEventsPerSecond = Math.round(finalEvents.length / generationSeconds);
console.log(
`Generated ${finalEvents.length.toLocaleString()} events in ${generationSeconds} seconds ` +
`(${genEventsPerSecond.toLocaleString()} events/sec). Starting insertion...`
);
// Add timing measurement
const startTime = Date.now();
let totalInserted = 0;
// Increase batch size for higher throughput
const INSERTION_BATCH_SIZE = 50000; // Increased from 10000 to 50000
// Create batches
const insertionBatches = [];
for (let i = 0; i < finalEvents.length; i += INSERTION_BATCH_SIZE) {
insertionBatches.push(finalEvents.slice(i, i + INSERTION_BATCH_SIZE));
}
// Maximum number of parallel inserts (based on CPU cores)
const MAX_PARALLEL = 3; // Using 3 of the 4 cores for parallelism
// Process batches with controlled parallelism
for (let i = 0; i < insertionBatches.length; i += MAX_PARALLEL) {
const batchPromises = [];
// Create a set of promises for parallel execution
for (let j = 0; j < MAX_PARALLEL && i + j < insertionBatches.length; j++) {
const batchIndex = i + j;
const batch = insertionBatches[batchIndex];
batchPromises.push(
(async () => {
const batchStartTime = Date.now();
try {
await clickhouse.insert({
table: "pageviews",
values: batch,
format: "JSONEachRow",
});
const batchEndTime = Date.now();
const batchSeconds = (batchEndTime - batchStartTime) / 1000;
const batchSpeed = Math.round(batch.length / batchSeconds);
return {
batchIndex,
batchSize: batch.length,
batchSpeed,
success: true,
};
} catch (error) {
console.error(`Error inserting batch ${batchIndex + 1}:`, error);
throw error;
}
})()
);
}
// Wait for this group of batches to complete
try {
const results = await Promise.all(batchPromises);
// Update metrics
for (const result of results) {
if (result.success) {
totalInserted += result.batchSize;
const totalElapsed = (Date.now() - startTime) / 1000;
const averageSpeed = Math.round(totalInserted / totalElapsed);
console.log(
`Inserted batch ${result.batchIndex + 1} of ${
insertionBatches.length
} | ` +
`Batch speed: ${result.batchSpeed.toLocaleString()} events/sec | ` +
`Avg speed: ${averageSpeed.toLocaleString()} events/sec`
);
}
}
} catch (error) {
console.error("Error in batch processing:", error);
throw error;
}
}
const endTime = Date.now();
const totalSeconds = (endTime - startTime) / 1000;
const overallSpeed = Math.round(finalEvents.length / totalSeconds);
console.log(
`Day completed in ${totalSeconds.toFixed(2)} seconds | ` +
`Overall insertion speed: ${overallSpeed.toLocaleString()} events/sec | ` +
`CPU cores utilized: ${MAX_PARALLEL} of 4`
);
return finalEvents.length;
}
// Main function to generate all the data
async function generateMockData() {
let totalEvents = 0;
// Generate data for each day, starting from the most recent
for (let day = 0; day < daysInPast; day++) {
const date = DateTime.now().minus({ days: day });
const eventsCount = await generateEventsForDay(date, eventsPerDay);
totalEvents += eventsCount;
console.log(
`Completed day ${day + 1} of ${daysInPast}. Total events: ${totalEvents}`
);
}
console.log(
`Mock data generation complete. Generated ${totalEvents} events across ${daysInPast} days.`
);
process.exit(0);
}
// Start the data generation
generateMockData().catch((error) => {
console.error("Error generating mock data:", error);
process.exit(1);
});