BeautifulSoup4 find vs find_all

  import requests from bs4 import BeautifulSoup import pandas as pd import re
  html = """ <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Ultra Running Events</title></head> <body class="site-body"> <header class="site-header"> <h1 class="site-title">Ultra Running Events</h1> <nav class="main-nav"> <ul class="nav-list"> <li><a class="nav-link" href="#races-50">50 Mile Races</a></li> <li><a class="nav-link" href="#races-100">100 Mile Races</a></li></ul></nav></header> <section id="races-50" class="race-section race-50"> <h2 class="section-title-50">50 Mile Races</h2> <ul class="race-list-50"> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/rocky-mountain-50">Rocky Mountain 50</a></h3> <p class="race-date highlighted">Date: August 10, 2025</p> <p class="race-location">Location: Boulder, Colorado</p></li> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/desert-dash-50">Desert Dash 50</a></h3> <p class="race-date">Date: September 14, 2025</p> <p class="race-location">Location: Moab, Utah</p></li></ul></section> <section id="races-100" class="race-section race-100"> <h2 class="section-title-100">100 Mile Races</h2> <ul class="race-list-100"> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/mountain-madness-100">Mountain Madness 100</a></h3> <p class="race-date">Date: July 5, 2025</p> <p class="race-location">Location: Lake Tahoe, California</p></li> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/endurance-beast-100">Endurance Beast 100</a></h3> <p class="race-date">Date: October 3, 2025</p> <p class="race-location">Location: Asheville, North Carolina</p></li></ul></section> <footer class="site-footer"> <p>&copy; 2025 Ultra Running Events</p></footer><script id="wc-add-to-cart-js-extra">var wc_add_to_cart_params={"ajax_url":"\/wp-admin\/admin-ajax.php","wc_ajax_url":"\/?wc-ajax=%%endpoint%%","i18n_view_cart":"View cart","cart_url":"https:\/\/ryanandmattdatascience.com\/cart\/","is_cart":"","cart_redirect_after_add":"no"};</script>
<script id="woocommerce-js-extra">var woocommerce_params={"ajax_url":"\/wp-admin\/admin-ajax.php","wc_ajax_url":"\/?wc-ajax=%%endpoint%%","i18n_password_show":"Show password","i18n_password_hide":"Hide password"};</script>
<script id="WCPAY_ASSETS-js-extra">var wcpayAssets={"url":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/woocommerce-payments\/dist\/"};</script>
<script>var stm_wpcfto_ajaxurl='https://ryanandmattdatascience.com/wp-admin/admin-ajax.php';</script>
<script>var stm_wpcfto_nonces={"wpcfto_save_settings":"3040dd9fd9","get_image_url":"c8ef0ed5d1","wpcfto_upload_file":"eaefb35f0a","wpcfto_search_posts":"21e926921b"};</script>
<script>var elementskit_module_parallax_url="https://ryanandmattdatascience.com/wp-content/plugins/elementskit/modules/parallax/";</script>
<script>(function (){
var c=document.body.className;
c=c.replace(/woocommerce-no-js/, 'woocommerce-js');
document.body.className=c;
})();</script>
<script id="contact-form-7-js-before">var wpcf7={
"api": {
"root": "https:\/\/ryanandmattdatascience.com\/wp-json\/",
"namespace": "contact-form-7\/v1"
},
"cached": 1
};</script>
<script id="elementor-frontend-js-before">var elementorFrontendConfig={"environmentMode":{"edit":false,"wpPreview":false,"isScriptDebug":false},"i18n":{"shareOnFacebook":"Share on Facebook","shareOnTwitter":"Share on Twitter","pinIt":"Pin it","download":"Download","downloadImage":"Download image","fullscreen":"Fullscreen","zoom":"Zoom","share":"Share","playVideo":"Play Video","previous":"Previous","next":"Next","close":"Close","a11yCarouselPrevSlideMessage":"Previous slide","a11yCarouselNextSlideMessage":"Next slide","a11yCarouselFirstSlideMessage":"This is the first slide","a11yCarouselLastSlideMessage":"This is the last slide","a11yCarouselPaginationBulletMessage":"Go to slide"},"is_rtl":false,"breakpoints":{"xs":0,"sm":480,"md":768,"lg":1025,"xl":1440,"xxl":1600},"responsive":{"breakpoints":{"mobile":{"label":"Mobile Portrait","value":767,"default_value":767,"direction":"max","is_enabled":true},"mobile_extra":{"label":"Mobile Landscape","value":880,"default_value":880,"direction":"max","is_enabled":false},"tablet":{"label":"Tablet Portrait","value":1024,"default_value":1024,"direction":"max","is_enabled":true},"tablet_extra":{"label":"Tablet Landscape","value":1200,"default_value":1200,"direction":"max","is_enabled":false},"laptop":{"label":"Laptop","value":1366,"default_value":1366,"direction":"max","is_enabled":false},"widescreen":{"label":"Widescreen","value":2400,"default_value":2400,"direction":"min","is_enabled":false}},"hasCustomBreakpoints":false},"version":"3.29.2","is_static":false,"experimentalFeatures":{"additional_custom_breakpoints":true,"container":true,"e_local_google_fonts":true,"theme_builder_v2":true,"nested-elements":true,"editor_v2":true,"home_screen":true,"cloud-library":true,"e_opt_in_v4_page":true},"urls":{"assets":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/elementor\/assets\/","ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","uploadUrl":"https:\/\/ryanandmattdatascience.com\/wp-content\/uploads"},"nonces":{"floatingButtonsClickTracking":"6001b37e8f"},"swiperClass":"swiper","settings":{"page":[],"editorPreferences":[]},"kit":{"active_breakpoints":["viewport_mobile","viewport_tablet"],"global_image_lightbox":"yes","lightbox_enable_counter":"yes","lightbox_enable_fullscreen":"yes","lightbox_enable_zoom":"yes","lightbox_enable_share":"yes","lightbox_title_src":"title","lightbox_description_src":"description","woocommerce_notices_elements":[]},"post":{"id":26654,"title":"BeautifulSoup4%3A%20find%20vs%20find_all%20Explained%20with%20Examples","excerpt":"","featuredImage":false}};</script>
<script id="wc-order-attribution-js-extra">var wc_order_attribution={"params":{"lifetime":1.0e-5,"session":30,"base64":false,"ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","prefix":"wc_order_attribution_","allowTracking":true},"fields":{"source_type":"current.typ","referrer":"current_add.rf","utm_campaign":"current.cmp","utm_source":"current.src","utm_medium":"current.mdm","utm_content":"current.cnt","utm_id":"current.id","utm_term":"current.trm","utm_source_platform":"current.plt","utm_creative_format":"current.fmt","utm_marketing_tactic":"current.tct","session_entry":"current_add.ep","session_start_time":"current_add.fd","session_pages":"session.pgs","session_count":"udata.vst","user_agent":"udata.uag"}};</script>
<script id="wpcf7-recaptcha-js-before">var wpcf7_recaptcha={
"sitekey": "6LcKOJoqAAAAAK1qRzkn1Yfhv4Q3nU-7lRfcSY9g",
"actions": {
"homepage": "homepage",
"contactform": "contactform"
}};</script>
<script id="elementor-pro-frontend-js-before">var ElementorProFrontendConfig={"ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","nonce":"0cb36e8935","urls":{"assets":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/elementor-pro\/assets\/","rest":"https:\/\/ryanandmattdatascience.com\/wp-json\/"},"settings":{"lazy_load_background_images":true},"popup":{"hasPopUps":false},"shareButtonsNetworks":{"facebook":{"title":"Facebook","has_counter":true},"twitter":{"title":"Twitter"},"linkedin":{"title":"LinkedIn","has_counter":true},"pinterest":{"title":"Pinterest","has_counter":true},"reddit":{"title":"Reddit","has_counter":true},"vk":{"title":"VK","has_counter":true},"odnoklassniki":{"title":"OK","has_counter":true},"tumblr":{"title":"Tumblr"},"digg":{"title":"Digg"},"skype":{"title":"Skype"},"stumbleupon":{"title":"StumbleUpon","has_counter":true},"mix":{"title":"Mix"},"telegram":{"title":"Telegram"},"pocket":{"title":"Pocket","has_counter":true},"xing":{"title":"XING","has_counter":true},"whatsapp":{"title":"WhatsApp"},"email":{"title":"Email"},"print":{"title":"Print"},"x-twitter":{"title":"X"},"threads":{"title":"Threads"}},"woocommerce":{"menu_cart":{"cart_page_url":"https:\/\/ryanandmattdatascience.com\/cart\/","checkout_page_url":"https:\/\/ryanandmattdatascience.com\/checkout\/","fragments_nonce":"832cf328e5"}},"facebook_sdk":{"lang":"en_US","app_id":""},"lottie":{"defaultAnimationUrl":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/elementor-pro\/modules\/lottie\/assets\/animations\/default.json"}};</script>
<script id="elementskit-elementor-js-extra">var ekit_config={"ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","nonce":"f431a88994"};</script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/jquery/jquery.min.js?ver=3.7.1" id="jquery-core-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/jquery/jquery-migrate.min.js?ver=3.4.1" id="jquery-migrate-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/9f90f89ca1c2e91b-back-to-top.js" id="keydesign-go-top-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/jquery-blockui/jquery.blockUI.min.js?ver=2.7.0-wc.9.9.4" id="jquery-blockui-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/frontend/add-to-cart.min.js?ver=9.9.4" id="wc-add-to-cart-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/js-cookie/js.cookie.min.js?ver=2.1.4-wc.9.9.4" id="js-cookie-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/frontend/woocommerce.min.js?ver=9.9.4" id="woocommerce-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/3774baff78af5230-reading-bar.js" id="keydesign-rebar-js"></script>
<script id="google_gtagjs-js-after">window.dataLayer=window.dataLayer||[];function gtag(){dataLayer.push(arguments);}
gtag("set","linker",{"domains":["ryanandmattdatascience.com"]});
gtag("js", new Date());
gtag("set", "developer_id.dZTNiMT", true);
gtag("config", "GT-MR8Z58JX");</script>
<script>document.getElementById("ak_js_1").setAttribute("value",(new Date()).getTime());</script>
<script type="speculationrules">{"prefetch":[{"source":"document","where":{"and":[{"href_matches":"\/*"},{"not":{"href_matches":["\/wp-*.php","\/wp-admin\/*","\/wp-content\/uploads\/*","\/wp-content\/*","\/wp-content\/plugins\/*","\/wp-content\/themes\/sierra\/*","\/*\\?(.+)"]}},{"not":{"selector_matches":"a[rel~=\"nofollow\"]"}},{"not":{"selector_matches":".no-prefetch, .no-prefetch a"}}]},"eagerness":"conservative"}]}</script>
<script>const lazyloadRunObserver=()=> {
const lazyloadBackgrounds=document.querySelectorAll(`.e-con.e-parent:not(.e-lazyloaded)`);
const lazyloadBackgroundObserver=new IntersectionObserver(( entries)=> {
entries.forEach(( entry)=> {
if(entry.isIntersecting){
let lazyloadBackground=entry.target;
if(lazyloadBackground){
lazyloadBackground.classList.add('e-lazyloaded');
}
lazyloadBackgroundObserver.unobserve(entry.target);
}});
}, { rootMargin: '200px 0px 200px 0px' });
lazyloadBackgrounds.forEach(( lazyloadBackground)=> {
lazyloadBackgroundObserver.observe(lazyloadBackground);
});
};
const events=[
'DOMContentLoaded',
'elementor/lazyload/observe',
];
events.forEach(( event)=> {
document.addEventListener(event, lazyloadRunObserver);
});</script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/dist/hooks.min.js?ver=4d63a3d491d11ffd8ac6" id="wp-hooks-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/dist/i18n.min.js?ver=5e580eb46a90c2b997e6" id="wp-i18n-js"></script>
<script id="wp-i18n-js-after">wp.i18n.setLocaleData({ 'text direction\u0004ltr': [ 'ltr' ] });</script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/96e7dc3f0e8559e4-index.js" id="swv-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/2912c657d0592cc5-index.js" id="contact-form-7-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/components/prism-core.min.js?ver=1.23.0" id="prismjs_core-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/autoloader/prism-autoloader.min.js?ver=1.23.0" id="prismjs_loader-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/normalize-whitespace/prism-normalize-whitespace.min.js?ver=1.23.0" id="prismjs_normalize-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/line-numbers/prism-line-numbers.min.js?ver=1.23.0" id="prismjs_line_numbers-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/toolbar/prism-toolbar.min.js?ver=1.23.0" id="prismjs_toolbar-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/copy-to-clipboard/prism-copy-to-clipboard.min.js?ver=1.23.0" id="prismjs_copy_to_clipboard-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/elementor/assets/js/webpack.runtime.min.js?ver=3.29.2" id="elementor-webpack-runtime-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/elementor/assets/js/frontend-modules.min.js?ver=3.29.2" id="elementor-frontend-modules-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/jquery/ui/core.min.js?ver=1.13.3" id="jquery-ui-core-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/elementor/assets/js/frontend.min.js?ver=3.29.2" id="elementor-frontend-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/6c2b63649cebbd01-front-end.js" id="sierra-scripts-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/comment-reply.min.js?ver=6.8.1" id="comment-reply-js" async data-wp-strategy="async"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/94d041d462db321c-frontend-script.js" id="elementskit-framework-js-frontend-js"></script>
<script id="elementskit-framework-js-frontend-js-after">var elementskit={
resturl: 'https://ryanandmattdatascience.com/wp-json/elementskit/v1/',
}</script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/7f83f9f56851a309-widget-scripts.js" id="ekit-widget-scripts-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/sourcebuster/sourcebuster.min.js?ver=9.9.4" id="sourcebuster-js-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/frontend/order-attribution.min.js?ver=9.9.4" id="wc-order-attribution-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-includes/js/dist/vendor/wp-polyfill.min.js?ver=3.15.0" id="wp-polyfill-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/ec0187677793456f-index.js" id="wpcf7-recaptcha-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/092b9b120c6f0b41-keydesign-framework.js" id="keydesign-scripts-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/91954b488a9bfcad-akismet-frontend.js" id="akismet-frontend-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementor-pro/assets/js/webpack-pro.runtime.min.js?ver=3.28.3" id="elementor-pro-webpack-runtime-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementor-pro/assets/js/frontend.min.js?ver=3.28.3" id="elementor-pro-frontend-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementor-pro/assets/js/elements-handlers.min.js?ver=3.28.3" id="pro-elements-handlers-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementskit-lite/widgets/init/assets/js/animate-circle.min.js?ver=3.5.3" id="animate-circle-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/d1fe3f49c432e65a-elementor.js" id="elementskit-elementor-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/3572f383338e5760-elementor.js" id="elementskit-elementor-pro-js"></script>
<script>window._wpemojiSettings={"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.1.0\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.1.0\/svg\/","svgExt":".svg","source":{"concatemoji":"https:\/\/ryanandmattdatascience.com\/wp-includes\/js\/wp-emoji-release.min.js?ver=6.8.1"}};
!function(i,n){var o,s,e;function c(e){try{var t={supportTests:e,timestamp:(new Date).valueOf()};sessionStorage.setItem(o,JSON.stringify(t))}catch(e){}}function p(e,t,n){e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(t,0,0);var t=new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data),r=(e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(n,0,0),new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data));return t.every(function(e,t){return e===r[t]})}function u(e,t,n){switch(t){case"flag":return n(e,"\ud83c\udff3\ufe0f\u200d\u26a7\ufe0f","\ud83c\udff3\ufe0f\u200b\u26a7\ufe0f")?!1:!n(e,"\ud83c\uddfa\ud83c\uddf3","\ud83c\uddfa\u200b\ud83c\uddf3")&&!n(e,"\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f","\ud83c\udff4\u200b\udb40\udc67\u200b\udb40\udc62\u200b\udb40\udc65\u200b\udb40\udc6e\u200b\udb40\udc67\u200b\udb40\udc7f");case"emoji":return!n(e,"\ud83d\udc26\u200d\ud83d\udd25","\ud83d\udc26\u200b\ud83d\udd25")}return!1}function f(e,t,n){var r="undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?new OffscreenCanvas(300,150):i.createElement("canvas"),a=r.getContext("2d",{willReadFrequently:!0}),o=(a.textBaseline="top",a.font="600 32px Arial",{});return e.forEach(function(e){o[e]=t(a,e,n)}),o}function t(e){var t=i.createElement("script");t.src=e,t.defer=!0,i.head.appendChild(t)}"undefined"!=typeof Promise&&(o="wpEmojiSettingsSupports",s=["flag","emoji"],n.supports={everything:!0,everythingExceptFlag:!0},e=new Promise(function(e){i.addEventListener("DOMContentLoaded",e,{once:!0})}),new Promise(function(t){var n=function(){try{var e=JSON.parse(sessionStorage.getItem(o));if("object"==typeof e&&"number"==typeof e.timestamp&&(new Date).valueOf()<e.timestamp+604800&&"object"==typeof e.supportTests)return e.supportTests}catch(e){}return null}();if(!n){if("undefined"!=typeof Worker&&"undefined"!=typeof OffscreenCanvas&&"undefined"!=typeof URL&&URL.createObjectURL&&"undefined"!=typeof Blob)try{var e="postMessage("+f.toString()+"("+[JSON.stringify(s),u.toString(),p.toString()].join(",")+"));",r=new Blob([e],{type:"text/javascript"}),a=new Worker(URL.createObjectURL(r),{name:"wpTestEmojiSupports"});return void(a.onmessage=function(e){c(n=e.data),a.terminate(),t(n)})}catch(e){}c(n=f(s,u,p))}t(n)}).then(function(e){for(var t in e)n.supports[t]=e[t],n.supports.everything=n.supports.everything&&n.supports[t],"flag"!==t&&(n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&n.supports[t]);n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&!n.supports.flag,n.DOMReady=!1,n.readyCallback=function(){n.DOMReady=!0}}).then(function(){return e}).then(function(){var e;n.supports.everything||(n.readyCallback(),(e=n.source||{}).concatemoji?t(e.concatemoji):e.wpemoji&&e.twemoji&&(t(e.twemoji),t(e.wpemoji)))}))}((window,document),window._wpemojiSettings);</script>
</body></html>"""
  URL = 'https://books.toscrape.com/'

Part 1 Parsing the HTML From your first page. snapshot of html at that time

  response = requests.get(URL)
  if response.status_code == 200: print("Page fetched successfully!") else: print("Failed to retrieve page:", response.status_code)
  soup = BeautifulSoup(response.text, 'html.parser')
  soup_html = BeautifulSoup(html, 'html.parser')
  soup

Example 3 use soup.prettify()

  print(soup.prettify())

Example 4 - Grab Page title

  soup_html.title

Example 5 - Grab Page H2 (This only grabs the first one...)

  soup_html.h2

Example 6 - Grab Page title text

  soup_html.title.get_text()
  soup_html.h2['class']
#Next two examples take a look at find vs find all
#| Method | Returns | Use When |
#| ———— | —————————— | ————————————— |
#| `find()` | The **first** matching element | You want a single element |
#| `find_all()` | A **list** of all matches | You want to loop through multiple items |
Example 8 Find
find() only returns the first match — it doesn’t let you directly access the second, third, etc.
  soup_html.find('h2')

Example 10 Find with Class

  soup_html.find('h2', class_ = 'section-title-50').get_text()
  soup_html.find('h2', class_ = 'section-title-100').get_text()

Example 11 Find Chain Requests

  #Finds the first <li> (list item) element in the document #From that <li> element, it then finds the first <a> (anchor) tag inside that <li>. soup_html.find('li').find('a')

Example 12 Seperating out Chain Requests

  list_item = soup_html.find('li')
  list_item_a = list_item.find('a')
  list_item_a
  #Example 13 Find All Races soup_html.find_all('h2')

Example 14 Find First or 2second race

  soup_html.find_all('h2')[0]
  soup_html.find_all('h2')[0].get_text()
  soup_html.find_all('h2')[1]
  soup_html.find_all('h2')[1].get_text()

Example 15 final all and print out the text

  race_types = soup_html.find_all('h2')
  for race in race_types: print(race.get_text())

Example 16 find all with a class race dates

  soup_html.find_all('p', class_ = 'race-date')

Example 17 find all with Either class

  soup_html.find_all("p", class_=["race-date", "race-location"])

Example 18 Find OR Attricbutes href, title, id, class, src, alt, type

  soup_html.find_all("p", attrs={"class": ["race-date", "race-location"]})
  soup_html.find_all("a", attrs={"href": ["#races-50", "#races-100"]})
  #Example 19 Search for Strings soup_html.find_all("a", string='Mountain Madness 100')
  #Example 20 Search for Strings with regex soup_html.find_all("a", string=re.compile('Madness'))

Example 21 Parent Sibling Child

  h3_races = soup_html.find_all("h3")
  h3_races
  for h3 in h3_races: print("Race Name:", h3.get_text()) # Get next siblings that are <p> tags for sibling in h3.find_next_siblings('p'): print(" ", sibling.get_text())
working with a real site now instead of basic html.
https://books.toscrape.com/
Â
  print(soup.prettify())

Example 22 Find all books on a page and print them out

  #searches for all <article> elements in the HTML that have the class "product_pod". books = soup.find_all("article", class_="product_pod")
  #.h3: accesses the <h3> tag inside the article. #.a: accesses the <a> tag inside the <h3>, which contains the link to the book's detail page. #["title"]: extracts the title attribute of the <a> tag, which holds the title of the book. for book in books: print(book.h3.a["title"])

Example 23 Grab multiple things at once

  for book in books: title = book.h3.a['title'] price = book.find('p', class_='price_color').get_text() relative_url = book.h3.a['href'] book_url = URL + relative_url print(f"Title: {title} | Price: {price} | ULR {book_url}")

Example 24 Save Multiple Things to a Dataframe

  data = []
  for book in books: title = book.h3.a['title'] price = book.find('p', class_='price_color').text relative_url = book.h3.a['href'] book_url = URL + relative_url data.append({ 'Title': title, 'Price': price, 'URL': book_url })
  df = pd.DataFrame(data)
  df

Example 25 Clean Data Frame Price Colum

  df['price_clean'] = df['Price'].str.replace('£', '', regex=False).astype(float)
  #Convert GBP to USD (example rate: 1 GBP = 1.0737 USD) (CHECK THIS) exchange_rate = 1.0737
  df['price_usd'] = df['price_clean'] * exchange_rate
  df_final = df[['Title', 'Price_usd', 'URL']]
  df_final

Example 26 Export as a CSV File

  df_final.to_csv('scrapped_book_data.csv')
  df_final.to_excel('scrapped_book_data.xlsx')

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.

Leave a Reply

Your email address will not be published. Required fields are marked *