finished web crawler
This commit is contained in:
parent
416282d722
commit
b265161131
@ -0,0 +1,31 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def fetch_page(url):
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except requests.RequestException as e:
|
||||
print(f"Error fetching {url}: {e}")
|
||||
return None
|
||||
|
||||
def parse_links(html_content):
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
links = [a.get('href') for a in soup.find_all('a', href=True)]
|
||||
return links
|
||||
|
||||
def web_crawler(start_url):
|
||||
html_content = fetch_page(start_url)
|
||||
if html_content:
|
||||
links = parse_links(html_content)
|
||||
return links
|
||||
return []
|
||||
|
||||
start_url = input('Enter a url: ')
|
||||
found_links = web_crawler(start_url)
|
||||
for link in found_links:
|
||||
|
||||
print("Found link:", link)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user