finished web crawler
This commit is contained in:
parent
416282d722
commit
b265161131
@ -0,0 +1,31 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def fetch_page(url):
|
||||||
|
try:
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.text
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"Error fetching {url}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def parse_links(html_content):
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
links = [a.get('href') for a in soup.find_all('a', href=True)]
|
||||||
|
return links
|
||||||
|
|
||||||
|
def web_crawler(start_url):
|
||||||
|
html_content = fetch_page(start_url)
|
||||||
|
if html_content:
|
||||||
|
links = parse_links(html_content)
|
||||||
|
return links
|
||||||
|
return []
|
||||||
|
|
||||||
|
start_url = input('Enter a url: ')
|
||||||
|
found_links = web_crawler(start_url)
|
||||||
|
for link in found_links:
|
||||||
|
|
||||||
|
print("Found link:", link)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user