BeautifulSoup4 find vs find_all
import requests from bs4 import BeautifulSoup import pandas as pd import re
html = """ Ultra Running Events Ultra Running Events
50 Mile Races
-
Rocky Mountain 50
Date: August 10, 2025
Location: Boulder, Colorado
-
Desert Dash 50
Date: September 14, 2025
Location: Moab, Utah
100 Mile Races
-
Mountain Madness 100
Date: July 5, 2025
Location: Lake Tahoe, California
-
Endurance Beast 100
Date: October 3, 2025
Location: Asheville, North Carolina
"""
URL = 'https://books.toscrape.com/'
Part 1 Parsing the HTML From your first page. snapshot of html at that time
response = requests.get(URL)
if response.status_code == 200: print("Page fetched successfully!") else: print("Failed to retrieve page:", response.status_code)

soup = BeautifulSoup(response.text, 'html.parser')
soup_html = BeautifulSoup(html, 'html.parser')
soup

Example 3 use soup.prettify()
print(soup.prettify())

Example 4 - Grab Page title
soup_html.title

Example 5 - Grab Page H2 (This only grabs the first one...)
soup_html.h2

Example 6 - Grab Page title text
soup_html.title.get_text()

soup_html.h2['class']

#Next two examples take a look at find vs find all
#| Method | Returns | Use When |
#| ———— | —————————— | ————————————— |
#| `find()` | The **first** matching element | You want a single element |
#| `find_all()` | A **list** of all matches | You want to loop through multiple items |
Example 8 Find
find() only returns the first match — it doesn’t let you directly access the second, third, etc.
soup_html.find('h2')

Example 10 Find with Class
soup_html.find('h2', class_ = 'section-title-50').get_text()

soup_html.find('h2', class_ = 'section-title-100').get_text()

Example 11 Find Chain Requests
#Finds the first (list item) element in the document #From that element, it then finds the first (anchor) tag inside that . soup_html.find('li').find('a')

Example 12 Seperating out Chain Requests
list_item = soup_html.find('li')
list_item_a = list_item.find('a')
list_item_a

#Example 13 Find All Races soup_html.find_all('h2')

Example 14 Find First or 2second race
soup_html.find_all('h2')[0]

soup_html.find_all('h2')[0].get_text()

soup_html.find_all('h2')[1]

soup_html.find_all('h2')[1].get_text()

Example 15 final all and print out the text
race_types = soup_html.find_all('h2')
for race in race_types: print(race.get_text())

Example 16 find all with a class race dates
soup_html.find_all('p', class_ = 'race-date')

Example 17 find all with Either class
soup_html.find_all("p", class_=["race-date", "race-location"])

Example 18 Find OR Attricbutes href, title, id, class, src, alt, type
soup_html.find_all("p", attrs={"class": ["race-date", "race-location"]})

soup_html.find_all("a", attrs={"href": ["#races-50", "#races-100"]})

#Example 19 Search for Strings soup_html.find_all("a", string='Mountain Madness 100')

#Example 20 Search for Strings with regex soup_html.find_all("a", string=re.compile('Madness'))

Example 21 Parent Sibling Child
h3_races = soup_html.find_all("h3")
h3_races

for h3 in h3_races: print("Race Name:", h3.get_text()) # Get next siblings that are tags for sibling in h3.find_next_siblings('p'): print(" ", sibling.get_text())

working with a real site now instead of basic html.
https://books.toscrape.com/
Â
print(soup.prettify())

Example 22 Find all books on a page and print them out
#searches for all elements in the HTML that have the class "product_pod". books = soup.find_all("article", class_="product_pod")

Example 23 Grab multiple things at once
for book in books: title = book.h3.a['title'] price = book.find('p', class_='price_color').get_text() relative_url = book.h3.a['href'] book_url = URL + relative_url print(f"Title: {title} | Price: {price} | ULR {book_url}")

Example 24 Save Multiple Things to a Dataframe
data = []
for book in books: title = book.h3.a['title'] price = book.find('p', class_='price_color').text relative_url = book.h3.a['href'] book_url = URL + relative_url data.append({ 'Title': title, 'Price': price, 'URL': book_url })
df = pd.DataFrame(data)
df

Example 25 Clean Data Frame Price Colum
df['price_clean'] = df['Price'].str.replace('£', '', regex=False).astype(float)
#Convert GBP to USD (example rate: 1 GBP = 1.0737 USD) (CHECK THIS) exchange_rate = 1.0737
df['price_usd'] = df['price_clean'] * exchange_rate
df_final = df[['Title', 'Price_usd', 'URL']]
df_final

Example 26 Export as a CSV File
df_final.to_csv('scrapped_book_data.csv')
df_final.to_excel('scrapped_book_data.xlsx')
Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.