From b2651611311fcbd5569801f00311618fd7407236 Mon Sep 17 00:00:00 2001 From: EggMan Date: Thu, 25 Apr 2024 10:58:39 -0400 Subject: [PATCH] finished web crawler --- WebCrawler.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/WebCrawler.py b/WebCrawler.py index e69de29..8cdccd5 100644 --- a/WebCrawler.py +++ b/WebCrawler.py @@ -0,0 +1,31 @@ +import requests +from bs4 import BeautifulSoup + +def fetch_page(url): + try: + response = requests.get(url) + response.raise_for_status() + return response.text + except requests.RequestException as e: + print(f"Error fetching {url}: {e}") + return None + +def parse_links(html_content): + soup = BeautifulSoup(html_content, 'html.parser') + links = [a.get('href') for a in soup.find_all('a', href=True)] + return links + +def web_crawler(start_url): + html_content = fetch_page(start_url) + if html_content: + links = parse_links(html_content) + return links + return [] + +start_url = input('Enter a url: ') +found_links = web_crawler(start_url) +for link in found_links: + + print("Found link:", link) + +