# Sort by most‑popular (higher source_count) → higher quality quality_order = "4k": 4, "1080p": 3, "720p": 2, "480p": 1, None: 0 matches.sort( key=lambda x: ( -x["source_count"], -quality_order.get(x["quality"].lower() if x["quality"] else None, 0), ) )
results.append( "source": "Filmy4wap", "title": title, "year": year, "language": language, "quality": quality, "url": href, ) return results
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "+")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("div.result-item") results = [] for c in cards: a = c.select_one("a.title") if not a: continue title = a.get_text(strip=True) href = cls._clean_link(a["href"])
# Some sites embed details in data‑attributes: year = c.get("data-year") language = c.get("data-language") quality = c.get("data-quality") # Sort by most‑popular (higher source_count) → higher
# Apply matching logic matches = match_results(deduped, query_norm)
results.append( "source": "FilmyFly", "title": title, "year": year, "language": language, "quality": quality, "url": href, ) return results
# Deduplicate by URL (same file may appear on multiple sites) seen_urls = set() deduped = [] for entry in raw: if entry["url"] in seen_urls: continue seen_urls.add(entry["url"]) deduped.append(entry) ) ) results.append( "source": "Filmy4wap"
query_str = " ".join(args.title) data = search_movie(query_str)
# ---------------------------------------------------------------------- # 1️⃣ Helper – normalise user query # ---------------------------------------------------------------------- def normalize(text: str) -> str: """Lower‑case, strip accents, collapse whitespace, remove punctuation.""" text = unicodedata.normalize("NFKD", text) text = text.encode("ascii", "ignore").decode() text = re.sub(r"[^\w\s-]", "", text) # keep hyphens (some titles use them) text = re.sub(r"\s+", " ", text).strip() return text.lower()
# Fuzzy fallback – we score against the **title** only. titles = [r["title"] for r in results] scored = process.extract( query_norm, titles, scorer=fuzz.token_sort_ratio, limit=None, ) matched_titles = title for title, score, _ in scored if score >= min_fuzzy return [r for r in results if r["title"] in matched_titles] ) return results @classmethod def search(cls
# ---------------------------------------------------------------------- # 4️⃣ Orchestrator – pull everything together # ---------------------------------------------------------------------- def search_movie(query: str) -> Dict[str, Any]: """ Core function – call it from your UI, API endpoint or CLI. Returns a dict with: - query (original) - normalized_query - total_matches - results (list, deduped) """ query_norm = normalize(query)
class FilmyFlyScraper(BaseScraper): SEARCH_URL = "https://www.filmyfly.in/search/query"
# Year & language are usually in a <p> like "2022 | Hindi | 720p" meta = c.select_one("p.movie-meta") year, language, quality = None, None, None if meta: parts = [p.strip() for p in meta.get_text(separator="|").split("|")] for p in parts: if re.fullmatch(r"\d4", p): year = p elif p.lower() in "hindi", "english", "telugu", "marathi": language = p else: quality = p