from selenium import webdriver
import re

# define the website to scrape
url = "https://www.example.com"

# set up the selenium webdriver
driver = webdriver.Chrome()
driver.get(url)

# find all the links on the page using xpath
links = driver.find_elements_by_xpath("//a[@href]")

# extract the email addresses from each link
emails = []
for link in links:
    href = link.get_attribute("href")
    # check if the link contains an email address
    if re.match(r"[^@]+@[^@]+\.[^@]+", href):
        email = href.replace("mailto:", "")
        if email not in emails:
            emails.append(email)

# print the list of emails
for email in emails:
    print(email)

# close the webdriver
driver.quit()


#################### log areas scraped:
from selenium import webdriver
import re

# Initialize webdriver
driver = webdriver.Chrome()

# Set the starting URL
start_url = "http://www.example.com/"

# Initialize set to track scraped URLs
scraped_urls = set()

# Initialize set to track emails found
emails_found = set()

# Define function to extract emails from a given string
def extract_emails(text):
    pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    emails = re.findall(pattern, text)
    return emails

# Define function to spider a given URL
def spider(url):
    driver.get(url)
    scraped_urls.add(url)
    # Find all links on the page
    links = driver.find_elements_by_xpath("//a[@href]")
    for link in links:
        href = link.get_attribute("href")
        # If the link is not already scraped, spider it
        if href not in scraped_urls:
            if start_url