上传文件至 /

2025-01-20 17:08:21 +08:00
parent 7e4d3d9ced
commit 23fe0488cd
1 changed files with 51 additions and 0 deletions
--- a/pachong.py
+++ b/pachong.py
@ -0,0 +1,51 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+import random
+
+def pachong(url):
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
+    }
+
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()  # 检查请求是否成功
+        soup = BeautifulSoup(response.text, "html.parser")
+
+        # 提取所有链接
+        links = []
+        for a_tag in soup.find_all("a", href=True):
+            link = a_tag["href"]
+            links.append(link)
+
+        print(f"从 {url} 爬取到 {len(links)} 个链接。")
+        return links
+
+    except requests.RequestException as e:
+        print(f"无法访问 {url}，错误：{e}")
+        return []
+
+def main():
+    # 用户输入多个网站的 URL
+    urls = input("请输入多个网站的 URL，用逗号分隔：").strip().split(",")
+
+    for url in urls:
+        url = url.strip()  # 去除多余的空格
+        if url:  # 确保 URL 不为空
+            print(f"正在爬取 {url}...")
+            links = pachong(url)
+
+            # 为每个网站创建一个独立的文件
+            filename = f"{url.replace('https://', '').replace('www.', '').replace('/', '_')}.txt"
+            with open(filename, "w", encoding="utf-8") as file:
+                for link in links:
+                    file.write(link + "\n")
+
+            print(f"链接已保存到 {filename}")
+            time.sleep(random.uniform(1, 3))  # 随机延迟，避免对服务器造成过大压力
+
+    print("所有网站的链接已分别保存到对应的文件中。")
+
+if __name__ == "__main__":
+    main()