Machine Coding Problem

Video Recommendation Engine (LLD)

macoAllmlfeature-servingranking
Commonly Asked By:NetflixYouTubeByteDance

Functional Scope (In-Scope)

  • Multi-Stage Recommendation Pipeline: Implements Candidate Retrieval (approximate filtering) followed by heavy Ranking scoring.
  • Embedding-Based Cosine Match: Performs spatial vector similarity scores between user interest profiles and video vector models.
  • Bubble & Diversity Filters: Caps recommendations per creator or category to prevent users falling into monotonous feedback loops.
  • Asynchronous Live Feedback Loop: Dynamic shift calibration moving the user's preference embedding vectors closer to high-watch retention videos.

Explicit Boundaries (Out-of-Scope)

  • Deep Learning Training Architectures: Focuses on serving pre-computed models and performing real-time vector arithmetic.
  • Heavy Video Transcoding / CDN: Handled by storage layers; recommendation logic operates strictly over meta features.

Production reference implementations demonstrating vector distance solvers, multi-stage pipelines, and real-time online updates in Java and Python:

// ─── JAVA BLUEPRINT ──────────────────────────────────────────────────────────
import java.util.*;
import java.util.concurrent.*;
import java.util.stream.Collectors;

class VideoFeature {
    private final String videoId;
    private final String title;
    private final String category;
    private final String creatorId;
    private final double[] embedding;
    private int views;
    private int likes;
    private final long createdAt;

    public VideoFeature(String videoId, String title, String category, String creatorId, double[] embedding) {
        this.videoId = videoId;
        this.title = title;
        this.category = category;
        this.creatorId = creatorId;
        this.embedding = embedding;
        this.views = 1; // Avoid divide-by-zero
        this.likes = 0;
        this.createdAt = System.currentTimeMillis();
    }

    public String getVideoId() { return videoId; }
    public String getTitle() { return title; }
    public String getCategory() { return category; }
    public String getCreatorId() { return creatorId; }
    public double[] getEmbedding() { return embedding; }
    public int getViews() { return views; }
    public int getLikes() { return likes; }
    public long getCreatedAt() { return createdAt; }

    public void incrementViews() { this.views++; }
    public void incrementLikes() { this.likes++; }
    public double getEngagementRate() { return (double) likes / views; }
}

class UserFeature {
    private final String userId;
    private final List<String> watchHistory;
    private final Map<String, Integer> categoryPreferences;
    private final double[] embedding;
    private long lastUpdated;

    public UserFeature(String userId, double[] initialEmbedding) {
        this.userId = userId;
        this.watchHistory = new CopyOnWriteArrayList<>();
        this.categoryPreferences = new ConcurrentHashMap<>();
        this.embedding = Arrays.copyOf(initialEmbedding, initialEmbedding.length);
        this.lastUpdated = System.currentTimeMillis();
    }

    public String getUserId() { return userId; }
    public List<String> getWatchHistory() { return watchHistory; }
    public Map<String, Integer> getCategoryPreferences() { return categoryPreferences; }
    public double[] getEmbedding() { return embedding; }
    public long getLastUpdated() { return lastUpdated; }

    public void addWatchedVideo(String videoId, String category) {
        watchHistory.add(videoId);
        categoryPreferences.merge(category, 1, Integer::sum);
        lastUpdated = System.currentTimeMillis();
    }

    // Dynamic user embedding shift based on watched video embedding
    public void shiftEmbeddingTowards(double[] videoEmbedding, double learningRate) {
        for (int i = 0; i < embedding.length; i++) {
            embedding[i] = embedding[i] + learningRate * (videoEmbedding[i] - embedding[i]);
        }
        normalizeEmbedding();
        lastUpdated = System.currentTimeMillis();
    }

    private void normalizeEmbedding() {
        double sumSq = 0.0;
        for (double val : embedding) {
            sumSq += val * val;
        }
        double norm = Math.sqrt(sumSq);
        if (norm > 0) {
            for (int i = 0; i < embedding.length; i++) {
                embedding[i] /= norm;
            }
        }
    }
}

class EmbeddingMath {
    public static double cosineSimilarity(double[] vecA, double[] vecB) {
        if (vecA == null || vecB == null || vecA.length != vecB.length) return 0.0;
        double dotProduct = 0.0;
        double normA = 0.0;
        double normB = 0.0;
        for (int i = 0; i < vecA.length; i++) {
            dotProduct += vecA[i] * vecB[i];
            normA += vecA[i] * vecA[i];
            normB += vecB[i] * vecB[i];
        }
        if (normA == 0 || normB == 0) return 0.0;
        return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
    }
}

// Stage 1: Retrieval (Candidate Generation Pool)
class CandidateGenerator {
    private final ConcurrentHashMap<String, VideoFeature> videoCatalog = new ConcurrentHashMap<>();

    public void addVideo(VideoFeature video) {
        videoCatalog.put(video.getVideoId(), video);
    }

    public List<VideoFeature> generateCandidates(UserFeature user, int limit) {
        // Collect subset using embedding similarity (ANN replacement) + fallback fresh videos
        List<VideoFeature> pool = new ArrayList<>(videoCatalog.values());
        
        // Filter out videos the user has already watched
        Set<String> watched = new HashSet<>(user.getWatchHistory());
        
        return pool.stream()
            .filter(v -> !watched.contains(v.getVideoId()))
            .sorted((v1, v2) -> {
                double sim1 = EmbeddingMath.cosineSimilarity(user.getEmbedding(), v1.getEmbedding());
                double sim2 = EmbeddingMath.cosineSimilarity(user.getEmbedding(), v2.getEmbedding());
                return Double.compare(sim2, sim1); // Highest similarity first
            })
            .limit(limit * 3) // Retrieve larger candidate pool for next ranking stage
            .collect(Collectors.toList());
    }
}

// Stage 2: Heavyweight Multi-Feature Ranking Model
class RankingModel {
    private final double wSimilarity = 0.6;
    private final double wEngagement = 0.3;
    private final double wFreshness = 0.1;

    public double calculateRankScore(UserFeature user, VideoFeature video) {
        // Feature 1: Semantic Embedding Match
        double similarity = EmbeddingMath.cosineSimilarity(user.getEmbedding(), video.getEmbedding());
        
        // Scale cosine sim from [-1, 1] to [0, 1]
        double normalizedSim = (similarity + 1.0) / 2.0;

        // Feature 2: Historical Engagement (Likes/Views)
        double engagement = video.getEngagementRate();

        // Feature 3: Freshness (Half-life decay score)
        double timeDeltaHrs = (System.currentTimeMillis() - video.getCreatedAt()) / (1000.0 * 3600.0);
        double freshness = 1.0 / (1.0 + (timeDeltaHrs / 24.0)); // decaying over hours

        return (wSimilarity * normalizedSim) + (wEngagement * engagement) + (wFreshness * freshness);
    }
}

// Stage 3: Post-Processing Diversity & Bubble Filter
class DiversityFilter {
    // Deduplicates recommendations matching creators or categories to bypass echo chambers
    public static List<VideoFeature> applyDiversity(
        List<VideoFeature> rankedCandidates, 
        Map<VideoFeature, Double> scores, 
        int categoryCap, 
        int targetLimit
    ) {
        List<VideoFeature> diversified = new ArrayList<>();
        Map<String, Integer> categoryCounts = new HashMap<>();
        Map<String, Integer> creatorCounts = new HashMap<>();

        // Penalize repetitive items
        List<VideoFeature> candidates = new ArrayList<>(rankedCandidates);
        candidates.sort((v1, v2) -> {
            double s1 = scores.getOrDefault(v1, 0.0);
            double s2 = scores.getOrDefault(v2, 0.0);
            return Double.compare(s2, s1);
        });

        for (VideoFeature video : candidates) {
            if (diversified.size() >= targetLimit) break;

            String cat = video.getCategory();
            String creator = video.getCreatorId();

            int catCount = categoryCounts.getOrDefault(cat, 0);
            int creatorCount = creatorCounts.getOrDefault(creator, 0);

            // Skip or apply penalty if category or creator cap has been exceeded
            if (catCount < categoryCap && creatorCount < 2) {
                diversified.add(video);
                categoryCounts.put(cat, catCount + 1);
                creatorCounts.put(creator, creatorCount + 1);
            }
        }
        return diversified;
    }
}

// Complete Orchestrator Recommendation Engine
class RecommendationService {
    private final CandidateGenerator candidateGenerator = new CandidateGenerator();
    private final RankingModel rankingModel = new RankingModel();
    private final ConcurrentHashMap<String, UserFeature> userFeatureStore = new ConcurrentHashMap<>();

    public void registerUser(String userId, double[] initialEmbedding) {
        userFeatureStore.put(userId, new UserFeature(userId, initialEmbedding));
    }

    public void addVideoToCatalog(VideoFeature video) {
        candidateGenerator.addVideo(video);
    }

    public UserFeature getUser(String userId) {
        return userFeatureStore.get(userId);
    }

    // Multi-stage pipeline: Retrieval -> Heavy Ranking -> Diversity Filter
    public List<VideoFeature> getRecommendations(String userId, int targetCount) {
        UserFeature user = userFeatureStore.get(userId);
        if (user == null) return Collections.emptyList();

        // 1. Candidate Retrieval (Retrieves 3x target count)
        List<VideoFeature> candidates = candidateGenerator.generateCandidates(user, targetCount);

        // 2. Heavyweight Scoring
        Map<VideoFeature, Double> scores = new HashMap<>();
        for (VideoFeature video : candidates) {
            scores.put(video, rankingModel.calculateRankScore(user, video));
        }

        // Sort by raw scores
        candidates.sort((v1, v2) -> Double.compare(scores.get(v2), scores.get(v1)));

        // 3. Diversity Filtering
        return DiversityFilter.applyDiversity(candidates, scores, 2, targetCount);
    }

    // Dynamic Online Feedback Loop
    public void recordWatchEvent(String userId, VideoFeature video, double completionRatio, boolean liked) {
        UserFeature user = userFeatureStore.get(userId);
        if (user == null) return;

        // Increment engagement counters on video
        video.incrementViews();
        if (liked) {
            video.incrementLikes();
        }

        // Record history
        user.addWatchedVideo(video.getVideoId(), video.getCategory());

        // Shift user embedding vector slowly if they watched a major portion of the video (e.g. > 50%)
        if (completionRatio > 0.5) {
            double learningRate = 0.1 * completionRatio;
            user.shiftEmbeddingTowards(video.getEmbedding(), learningRate);
        }
    }
}

public class Main {
    public static void main(String[] args) {
        System.out.println("=== JAVA VIDEO RECOMMENDATION ENGINE SIMULATION ===");
        RecommendationService service = new RecommendationService();

        // Register user who likes Tech (interest vector [0.1, 0.9])
        service.registerUser("user-101", new double[]{0.1, 0.9});

        // Add videos
        VideoFeature v1 = new VideoFeature("v1", "TypeScript advanced tricks", "Tech", "creator-a", new double[]{0.2, 0.9});
        VideoFeature v2 = new VideoFeature("v2", "Elden Ring Speedrun", "Gaming", "creator-b", new double[]{0.9, 0.1});
        service.addVideoToCatalog(v1);
        service.addVideoToCatalog(v2);

        // Get Initial Recommendations
        List<VideoFeature> recs1 = service.getRecommendations("user-101", 2);
        System.out.println("Initial recommendations for user-101:");
        for (VideoFeature v : recs1) {
            System.out.println(" - " + v.getTitle() + " (" + v.getCategory() + ")");
        }

        // Record high watch completion for Gaming video (v2) to shift interest
        System.out.println("\nUser watches Elden Ring video fully...");
        service.recordWatchEvent("user-101", v2, 1.0, true);

        // Get Recommendations again
        List<VideoFeature> recs2 = service.getRecommendations("user-101", 2);
        System.out.println("Recommendations after watch event:");
        for (VideoFeature v : recs2) {
            System.out.println(" - " + v.getTitle() + " (" + v.getCategory() + ")");
        }

        System.out.println("=== END OF JAVA SIMULATION ===");
    }
}