finished web crawler

2024-04-25 10:58:39 -04:00
parent 416282d722
commit b265161131
1 changed files with 31 additions and 0 deletions
--- a/WebCrawler.py
+++ b/WebCrawler.py
@@ -0,0 +1,31 @@
+import requests
+from bs4 import BeautifulSoup
+
+def fetch_page(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        return response.text
+    except requests.RequestException as e:
+        print(f"Error fetching {url}: {e}")
+        return None
+
+def parse_links(html_content):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    links = [a.get('href') for a in soup.find_all('a', href=True)]
+    return links
+
+def web_crawler(start_url):
+    html_content = fetch_page(start_url)
+    if html_content:
+        links = parse_links(html_content)
+        return links
+    return []
+
+start_url = input('Enter a url: ')
+found_links = web_crawler(start_url)
+for link in found_links:
+
+    print("Found link:", link)
+
+