From 23fe0488cd151713812218848b474e8560ee3fb2 Mon Sep 17 00:00:00 2001
From: xt66642 <136733984@qq.com>
Date: Mon, 20 Jan 2025 17:08:21 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20/?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pachong.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 pachong.py

diff --git a/pachong.py b/pachong.py
new file mode 100644
index 0000000..4cc365b
--- /dev/null
+++ b/pachong.py
@@ -0,0 +1,51 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+import random
+
+def pachong(url):
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
+    }
+
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()  # 检查请求是否成功
+        soup = BeautifulSoup(response.text, "html.parser")
+
+        # 提取所有链接
+        links = []
+        for a_tag in soup.find_all("a", href=True):
+            link = a_tag["href"]
+            links.append(link)
+
+        print(f"从 {url} 爬取到 {len(links)} 个链接。")
+        return links
+
+    except requests.RequestException as e:
+        print(f"无法访问 {url}，错误：{e}")
+        return []
+
+def main():
+    # 用户输入多个网站的 URL
+    urls = input("请输入多个网站的 URL，用逗号分隔：").strip().split(",")
+
+    for url in urls:
+        url = url.strip()  # 去除多余的空格
+        if url:  # 确保 URL 不为空
+            print(f"正在爬取 {url}...")
+            links = pachong(url)
+
+            # 为每个网站创建一个独立的文件
+            filename = f"{url.replace('https://', '').replace('www.', '').replace('/', '_')}.txt"
+            with open(filename, "w", encoding="utf-8") as file:
+                for link in links:
+                    file.write(link + "\n")
+
+            print(f"链接已保存到 {filename}")
+            time.sleep(random.uniform(1, 3))  # 随机延迟，避免对服务器造成过大压力
+
+    print("所有网站的链接已分别保存到对应的文件中。")
+
+if __name__ == "__main__":
+    main()