1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
| import requests from bs4 import BeautifulSoup import re
def is_adult_content(url): """ 伪装成手机设备访问指定网站,并判断是否包含成人内容。 :param url: 目标网站 URL :return: 是否为成人内容(True/False) """ headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) " "AppleWebKit/537.36 (KHTML, like Gecko) Version/14.0 " "Mobile/15E148 Safari/537.36" } try: response = requests.get(url, headers=headers, timeout=10) if response.status_code != 200: print(f"访问失败: {url}, 状态码: {response.status_code}") return False response.encoding = response.apparent_encoding
soup = BeautifulSoup(response.text, "html.parser") text_content = soup.get_text().lower()
adult_keywords = ["porn", "sex", "xxx", "adult", "erotic", "nude", "hentai", "18+", "爱情", "女神","线路","澳门","vip","撸", "麻豆","天美","精东","糖心","有码","无码"] for keyword in adult_keywords: if keyword in text_content: return True return False except requests.RequestException as e: print(f"请求异常: {url}, 错误: {e}") return False
def check_domains(file_path="domain_results.txt"): """ 读取 domain_results.txt,检测其中的域名是否包含成人内容。 :param file_path: 域名列表文件 """ try: with open(file_path, "r") as f: domains = [line.strip() for line in f if line.strip()] for domain in domains: full_url = f"http://{domain}" if is_adult_content(full_url): print(f"⚠️ 检测到成人内容: {domain}") with open("data/sex.txt", "a", encoding="utf-8") as f: f.write(full_url+ "\n") else: print(f"✅ 安全: {domain}") except FileNotFoundError: print(f"文件 {file_path} 不存在。")
test_file = "data/test.txt" check_domains(test_file)
|