BeautifulSoup4 find vs find_all
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
html = """
Ultra Running Events
Ultra Running Events
50 Mile Races
-
Rocky Mountain 50
Date: August 10, 2025
Location: Boulder, Colorado
-
Desert Dash 50
Date: September 14, 2025
Location: Moab, Utah
100 Mile Races
-
Mountain Madness 100
Date: July 5, 2025
Location: Lake Tahoe, California
-
Endurance Beast 100
Date: October 3, 2025
Location: Asheville, North Carolina
"""
URL = 'https://books.toscrape.com/'
Part 1 Parsing the HTML From your first page. snapshot of html at that time
response = requests.get(URL)
if response.status_code == 200:
print("Page fetched successfully!")
else:
print("Failed to retrieve page:", response.status_code)

soup = BeautifulSoup(response.text, 'html.parser')
soup_html = BeautifulSoup(html, 'html.parser')
soup

Example 3 use soup.prettify()
print(soup.prettify())

Example 4 - Grab Page title
soup_html.title

Example 5 - Grab Page H2 (This only grabs the first one...)
soup_html.h2

Example 6 - Grab Page title text
soup_html.title.get_text()

soup_html.h2['class']

#Next two examples take a look at find vs find all
#| Method | Returns | Use When |
#| ———— | —————————— | ————————————— |
#| `find()` | The **first** matching element | You want a single element |
#| `find_all()` | A **list** of all matches | You want to loop through multiple items |
Example 8 Find
find() only returns the first match — it doesn’t let you directly access the second, third, etc.
soup_html.find('h2')

Example 10 Find with Class
soup_html.find('h2', class_ = 'section-title-50').get_text()

soup_html.find('h2', class_ = 'section-title-100').get_text()

Example 11 Find Chain Requests
#Finds the first (list item) element in the document
#From that element, it then finds the first (anchor) tag inside that .
soup_html.find('li').find('a')

Example 12 Seperating out Chain Requests
list_item = soup_html.find('li')
list_item_a = list_item.find('a')
list_item_a

#Example 13 Find All Races
soup_html.find_all('h2')

Example 14 Find First or 2second race
soup_html.find_all('h2')[0]

soup_html.find_all('h2')[0].get_text()

soup_html.find_all('h2')[1]

soup_html.find_all('h2')[1].get_text()

Example 15 final all and print out the text
race_types = soup_html.find_all('h2')
for race in race_types:
print(race.get_text())

Example 16 find all with a class race dates
soup_html.find_all('p', class_ = 'race-date')

Example 17 find all with Either class
soup_html.find_all("p", class_=["race-date", "race-location"])

Example 18 Find OR Attricbutes href, title, id, class, src, alt, type
soup_html.find_all("p", attrs={"class": ["race-date", "race-location"]})

soup_html.find_all("a", attrs={"href": ["#races-50", "#races-100"]})

#Example 19 Search for Strings
soup_html.find_all("a", string='Mountain Madness 100')

#Example 20 Search for Strings with regex
soup_html.find_all("a", string=re.compile('Madness'))

Example 21 Parent Sibling Child
h3_races = soup_html.find_all("h3")
h3_races

for h3 in h3_races:
print("Race Name:", h3.get_text())
# Get next siblings that are tags
for sibling in h3.find_next_siblings('p'):
print(" ", sibling.get_text())

working with a real site now instead of basic html.
https://books.toscrape.com/
print(soup.prettify())

Example 22 Find all books on a page and print them out
#searches for all elements in the HTML that have the class "product_pod".
books = soup.find_all("article", class_="product_pod")

Example 23 Grab multiple things at once
for book in books:
title = book.h3.a['title']
price = book.find('p', class_='price_color').get_text()
relative_url = book.h3.a['href']
book_url = URL + relative_url
print(f"Title: {title} | Price: {price} | ULR {book_url}")

Example 24 Save Multiple Things to a Dataframe
data = []
for book in books:
title = book.h3.a['title']
price = book.find('p', class_='price_color').text
relative_url = book.h3.a['href']
book_url = URL + relative_url
data.append({
'Title': title,
'Price': price,
'URL': book_url
})
df = pd.DataFrame(data)
df

Example 25 Clean Data Frame Price Colum
df['price_clean'] = df['Price'].str.replace('£', '', regex=False).astype(float)
#Convert GBP to USD (example rate: 1 GBP = 1.0737 USD) (CHECK THIS)
exchange_rate = 1.0737
df['price_usd'] = df['price_clean'] * exchange_rate
df_final = df[['Title', 'Price_usd', 'URL']]
df_final

Example 26 Export as a CSV File
df_final.to_csv('scrapped_book_data.csv')
df_final.to_excel('scrapped_book_data.xlsx')
Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.