finished web crawler

This commit is contained in:
EggMan 2024-04-25 10:58:39 -04:00
parent 416282d722
commit b265161131

View File

@ -0,0 +1,31 @@
import requests
from bs4 import BeautifulSoup
def fetch_page(url):
try:
response = requests.get(url)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
def parse_links(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
links = [a.get('href') for a in soup.find_all('a', href=True)]
return links
def web_crawler(start_url):
html_content = fetch_page(start_url)
if html_content:
links = parse_links(html_content)
return links
return []
start_url = input('Enter a url: ')
found_links = web_crawler(start_url)
for link in found_links:
print("Found link:", link)