pip install requests beautifulsoup4
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | import requests from bs4 import BeautifulSoup def seo_scraper(url): response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') title = soup.title.string if soup.title else 'No Title' meta_description = soup.find('meta', attrs={'name': 'description'}) description = meta_description['content'] if meta_description else 'No Description' print(f"網站標題: {title}") print(f"網站描述: {description}") # 測試範例 seo_scraper('https://example.com') |
範例二、提取 SEO 相關資訊,如標題、描述、H1 標籤及所有內部連結
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import requests from bs4 import BeautifulSoup from urllib.parse import urljoin def seo_analyze(url): try: # 發送 GET 請求 response = requests.get(url) response.raise_for_status() # 確保請求成功 # 使用 BeautifulSoup 解析 HTML soup = BeautifulSoup(response.text, 'html.parser') # 提取標題 title = soup.title.string if soup.title else 'No Title' print(f"網站標題: {title}") # 提取 meta 描述 meta_description = soup.find('meta', attrs={'name': 'description'}) description = meta_description['content'] if meta_description else 'No Description' print(f"網站描述: {description}") # 提取 H1 標籤 h1_tags = soup.find_all('h1') h1_texts = [h1.get_text(strip=True) for h1 in h1_tags] print(f"H1 標籤: {h1_texts if h1_texts else 'No H1 Tags'}") # 提取內部連結 internal_links = set() for a_tag in soup.find_all('a', href=True): href = a_tag['href'] full_url = urljoin(url, href) if url in full_url: internal_links.add(full_url) print("\n內部連結:") for link in internal_links: print(link) except requests.RequestException as e: print(f"請求失敗: {e}") # 測試範例 website_url = 'https://example.com' # 替換為你要分析的網址 seo_analyze(website_url) |
範例三、檢查圖片的 Alt 屬性,確保圖片有適當的描述
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import requests from bs4 import BeautifulSoup from urllib.parse import urljoin def seo_analyze(url): try: # 發送 GET 請求 response = requests.get(url) response.raise_for_status() # 確保請求成功 # 使用 BeautifulSoup 解析 HTML soup = BeautifulSoup(response.text, 'html.parser') # 提取標題 title = soup.title.string if soup.title else 'No Title' print(f"網站標題: {title}") # 提取 meta 描述 meta_description = soup.find('meta', attrs={'name': 'description'}) description = meta_description['content'] if meta_description else 'No Description' print(f"網站描述: {description}") # 提取 H1 標籤 h1_tags = soup.find_all('h1') h1_texts = [h1.get_text(strip=True) for h1 in h1_tags] print(f"H1 標籤: {h1_texts if h1_texts else 'No H1 Tags'}") # 提取內部連結 internal_links = set() for a_tag in soup.find_all('a', href=True): href = a_tag['href'] full_url = urljoin(url, href) if url in full_url: internal_links.add(full_url) print("\n內部連結:") for link in internal_links: print(link) except requests.RequestException as e: print(f"請求失敗: {e}") # 測試範例 website_url = 'https://example.com' # 替換為你要分析的網址 seo_analyze(website_url) |
範例四、提取所有標題 (H1–H6),來分析標題結構是否符合 SEO 最佳實踐
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import requests from bs4 import BeautifulSoup def extract_headings(url): try: # 發送 GET 請求 response = requests.get(url) response.raise_for_status() # 確保請求成功 # 使用 BeautifulSoup 解析 HTML soup = BeautifulSoup(response.text, 'html.parser') # 定義標題標籤列表 heading_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] # 存放標題及其層級 headings = [] for tag in heading_tags: for heading in soup.find_all(tag): headings.append((tag.upper(), heading.get_text(strip=True))) if not headings: print("未找到任何標題標籤。") return # 輸出標題結構 print(f"共找到 {len(headings)} 個標題標籤:\n") for level, text in headings: print(f"{level}: {text}") # 分析標題層級 analyze_heading_structure(headings) except requests.RequestException as e: print(f"請求失敗: {e}") def analyze_heading_structure(headings): print("\n🔍 標題結構分析:") previous_level = 0 level_map = {"H1": 1, "H2": 2, "H3": 3, "H4": 4, "H5": 5, "H6": 6} for level, text in headings: current_level = level_map[level] # 檢查是否跳過標題層級 if previous_level and current_level > previous_level + 1: print(f"⚠️ 標題層級跳躍:從 {previous_level} 跳到 {current_level} - '{text}'") previous_level = current_level print("✅ 標題層級檢查完成。") # 測試範例 website_url = 'https://example.com' # 請替換為你要檢查的網址 extract_headings(website_url) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | import requests from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse def seo_score(url): try: # 發送 GET 請求 response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') print(f"\n🔍 正在分析網站: {url}\n") # 初始化分數 score = 0 max_score = 100 deductions = [] # 1. 檢查標題標籤 (Title) title = soup.title.string if soup.title else None if title: score += 15 else: deductions.append("❌ 缺少標題標籤 (Title) (-15分)") # 2. 檢查 Meta 描述 meta_desc = soup.find("meta", attrs={"name": "description"}) if meta_desc and meta_desc.get("content"): score += 15 else: deductions.append("❌ 缺少 Meta 描述 (-15分)") # 3. 檢查 H1 標籤 h1 = soup.find("h1") if h1: score += 10 else: deductions.append("❌ 缺少 H1 標籤 (-10分)") # 4. 檢查圖片的 Alt 屬性 images = soup.find_all("img") images_with_alt = [img for img in images if img.get("alt")] if images and len(images_with_alt) == len(images): score += 15 elif images: deductions.append(f"❌ 部分圖片缺少 Alt 屬性 (-10分)") score += 5 else: deductions.append("❌ 未找到圖片 (-15分)") # 5. 檢查內部連結數量 internal_links = set() domain = urlparse(url).netloc for link in soup.find_all("a", href=True): full_url = urljoin(url, link["href"]) if urlparse(full_url).netloc == domain: internal_links.add(full_url) if len(internal_links) >= 3: score += 15 else: deductions.append(f"❌ 內部連結少於 3 個 (-15分)") # 6. 檢查外部連結數量 external_links = set() for link in soup.find_all("a", href=True): full_url = urljoin(url, link["href"]) if urlparse(full_url).netloc != domain: external_links.add(full_url) if len(external_links) >= 1: score += 10 else: deductions.append("❌ 缺少外部連結 (-10分)") # 7. 檢查 HTTPS if urlparse(url).scheme == "https": score += 20 else: deductions.append("❌ 未使用 HTTPS (-20分)") # 顯示評分 print(f"✅ SEO 總分: {score}/{max_score}\n") if deductions: print("🔻 扣分項目:") for deduction in deductions: print(deduction) else: print("🎉 恭喜!所有 SEO 檢查項目都通過了。") except requests.RequestException as e: print(f"❌ 請求失敗: {e}") # 測試範例 website_url = 'https://example.com' # 請替換為你要分析的網址 seo_score(website_url) |