Machine Coding Problem

Search Engine with Trie

maco30maco60macoAllsearchtrie-suggestionsinverted-indextf-idf-strategyboolean-query-intersections
Commonly Asked By:GoogleMicrosoftYahoo

Functional Scope (In-Scope)

  • Trie Prefix Autocomplete: Build efficient Trie prefix indexes returning top-5 matching keywords.
  • Inverted Posting Index: Map individual word terms to document IDs and occurrence frequencies.
  • Dynamic Ranking Strategies (Strategy Pattern): Support switching algorithms (TF-IDF vs Simple term occurrence counts) dynamically.
  • AND Intersection Queries: Perform sorted posting lists intersections in O(N+M) complexity.

Explicit Boundaries (Out-of-Scope)

  • No Intelligent NLP Synonym Parsing: Excludes lemmatization, semantic word embedding checks, or multilingual parsers.
  • No Real crawling System: Bypasses web page scrapers, HTML parsers, or outbound HTTP fetching pipelines.

Robust reference designs demonstrating Trie prefixes and TF-IDF posting in Java and Python:

// ─── JAVA BLUEPRINT ──────────────────────────────────────────────────────────
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.locks.*;

class TrieNode {
    private final Map<Character, TrieNode> children = new ConcurrentHashMap<>();
    private final List<String> suggestions = new CopyOnWriteArrayList<>();
    private final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();

    public Map<Character, TrieNode> getChildren() { return children; }
    
    public List<String> getSuggestions() {
        rwLock.readLock().lock();
        try {
            return new ArrayList<>(suggestions);
        } finally {
            rwLock.readLock().unlock();
        }
    }

    public void addSuggestion(String word) {
        rwLock.writeLock().lock();
        try {
            if (!suggestions.contains(word)) {
                suggestions.add(word);
                // Sort suggestions by length to simulate relevance
                suggestions.sort(Comparator.comparingInt(String::length));
                if (suggestions.size() > 5) {
                    suggestions.remove(suggestions.size() - 1);
                }
            }
        } finally {
            rwLock.writeLock().unlock();
        }
    }
}

class Trie {
    private final TrieNode root = new TrieNode();

    public void insert(String word) {
        TrieNode curr = root;
        for (char c : word.toLowerCase().toCharArray()) {
            curr = curr.getChildren().computeIfAbsent(c, k -> new TrieNode());
            curr.addSuggestion(word);
        }
    }

    public List<String> getSuggestions(String prefix) {
        TrieNode curr = root;
        for (char c : prefix.toLowerCase().toCharArray()) {
            curr = curr.getChildren().get(c);
            if (curr == null) return Collections.emptyList();
        }
        return curr.getSuggestions();
    }
}

class Posting {
    private final String docId;
    private final int termFrequency;

    public Posting(String docId, int termFrequency) {
        this.docId = docId;
        this.termFrequency = termFrequency;
    }

    public String getDocId() { return docId; }
    public int getTermFrequency() { return termFrequency; }
}

class SearchResult {
    private final String docId;
    private final double score;

    public SearchResult(String docId, double score) {
        this.docId = docId;
        this.score = score;
    }

    public String getDocId() { return docId; }
    public double getScore() { return score; }
}

// ─── STRATEGY PATTERN (RANKING) ──────────────────────────────────────────────
interface RankingStrategy {
    List<SearchResult> rank(List<String> terms, Map<String, List<Posting>> index, int totalDocuments);
}

class TFIDFRankingStrategy implements RankingStrategy {
    @Override
    public List<SearchResult> rank(List<String> terms, Map<String, List<Posting>> index, int totalDocs) {
        Map<String, Double> scores = new HashMap<>();

        for (String term : terms) {
            List<Posting> postings = index.getOrDefault(term.toLowerCase(), Collections.emptyList());
            if (postings.isEmpty()) continue;

            // IDF = ln(1 + totalDocs / documentFrequency)
            double idf = Math.log(1.0 + (double) totalDocs / postings.size());

            for (Posting posting : postings) {
                // TF-IDF = tf * idf
                double tfIdf = posting.getTermFrequency() * idf;
                scores.put(posting.getDocId(), scores.getOrDefault(posting.getDocId(), 0.0) + tfIdf);
            }
        }

        List<SearchResult> results = new ArrayList<>();
        for (Map.Entry<String, Double> entry : scores.entrySet()) {
            results.add(new SearchResult(entry.getKey(), entry.getValue()));
        }
        results.sort((a, b) -> Double.compare(b.getScore(), a.getScore()));
        return results;
    }
}

class SimpleFrequencyRankingStrategy implements RankingStrategy {
    @Override
    public List<SearchResult> rank(List<String> terms, Map<String, List<Posting>> index, int totalDocs) {
        Map<String, Integer> scores = new HashMap<>();
        for (String term : terms) {
            List<Posting> postings = index.getOrDefault(term.toLowerCase(), Collections.emptyList());
            for (Posting p : postings) {
                scores.put(p.getDocId(), scores.getOrDefault(p.getDocId(), 0) + p.getTermFrequency());
            }
        }

        List<SearchResult> results = new ArrayList<>();
        for (Map.Entry<String, Integer> entry : scores.entrySet()) {
            results.add(new SearchResult(entry.getKey(), entry.getValue()));
        }
        results.sort((a, b) -> Double.compare(b.getScore(), a.getScore()));
        return results;
    }
}

class SearchEngine {
    private final Trie autocompleteTrie = new Trie();
    private final Map<String, List<Posting>> invertedIndex = new ConcurrentHashMap<>();
    private final Set<String> registeredDocIds = ConcurrentHashMap.newKeySet();
    private RankingStrategy rankingStrategy;

    public SearchEngine(RankingStrategy rankingStrategy) {
        this.rankingStrategy = rankingStrategy;
    }

    public void setRankingStrategy(RankingStrategy rankingStrategy) {
        this.rankingStrategy = rankingStrategy;
    }

    public void indexDocument(String docId, String content) {
        registeredDocIds.add(docId);
        String[] words = content.toLowerCase().split("\\W+");

        Map<String, Integer> freqs = new HashMap<>();
        for (String word : words) {
            if (word.trim().isEmpty()) continue;
            freqs.put(word, freqs.getOrDefault(word, 0) + 1);
            autocompleteTrie.insert(word);
        }

        for (Map.Entry<String, Integer> entry : freqs.entrySet()) {
            String word = entry.getKey();
            int freq = entry.getValue();
            invertedIndex.compute(word, (k, postings) -> {
                if (postings == null) postings = new CopyOnWriteArrayList<>();
                postings.add(new Posting(docId, freq));
                return postings;
            });
        }
    }

    public List<String> autocomplete(String prefix) {
        return autocompleteTrie.getSuggestions(prefix);
    }

    public List<SearchResult> search(String query) {
        // Simple space-separated AND query resolver
        String[] terms = query.toLowerCase().split("\\s+");
        List<String> queryTerms = Arrays.asList(terms);
        return rankingStrategy.rank(queryTerms, invertedIndex, registeredDocIds.size());
    }
}

public class Main {
    public static void main(String[] args) {
        System.out.println("=== Search Engine with Trie ===");
        SearchEngine engine = new SearchEngine(new TFIDFRankingStrategy());

        engine.indexDocument("DOC-1", "Concurrency in Java is highly complex and beautiful.");
        engine.indexDocument("DOC-2", "Java offers thread-safe collections and safe locks.");
        engine.indexDocument("DOC-3", "Trie autocomplete supports real-time fast suggestions.");

        // Autocomplete
        System.out.println("Autocomplete for 'con': " + engine.autocomplete("con"));
        System.out.println("Autocomplete for 'ja': " + engine.autocomplete("ja"));

        // Search with TF-IDF Strategy
        System.out.println("--- Search Results for 'Java' (TF-IDF) ---");
        for (SearchResult res : engine.search("Java")) {
            System.out.println(String.format("Doc ID: %s, Score: %.4f", res.getDocId(), res.getScore()));
        }

        // Change Strategy to simple frequency
        engine.setRankingStrategy(new SimpleFrequencyRankingStrategy());
        System.out.println("--- Search Results for 'Java' (Simple Frequency) ---");
        for (SearchResult res : engine.search("Java")) {
            System.out.println(String.format("Doc ID: %s, Score: %.4f", res.getDocId(), res.getScore()));
        }
    }
}