beautifulsoup4 Selectors

  import requests from bs4 import BeautifulSoup import pandas as pd import re
  html = """ <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Ultra Running Events</title></head> <body class="site-body"> <header class="site-header"> <h1 class="site-title">Ultra Running Events</h1> <nav class="main-nav"> <ul class="nav-list"> <li><a class="nav-link" href="#races-50">50 Mile Races</a></li> <li><a class="nav-link" href="#races-100">100 Mile Races</a></li></ul></nav></header> <section id="races-50" class="race-section race-50"> <h2 class="section-title-50">50 Mile Races</h2> <ul class="race-list-50"> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/rocky-mountain-50">Rocky Mountain 50</a></h3> <p class="race-date">Date: August 10, 2025</p> <p class="race-location">Location: Boulder, Colorado</p></li> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/desert-dash-50">Desert Dash 50</a></h3> <p class="race-date">Date: September 14, 2025</p> <p class="race-location">Location: Moab, Utah</p></li></ul></section> <section id="races-100" class="race-section race-100"> <h2 class="section-title-100">100 Mile Races</h2> <ul class="race-list-100"> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/mountain-madness-100">Mountain Madness 100</a></h3> <p class="race-date">Date: July 5, 2025</p> <p class="race-location">Location: Lake Tahoe, California</p></li> <li class="race-item"> <h3 class="race-name"><a href="https://www.ryandataraces.com/endurance-beast-100">Endurance Beast 100</a></h3> <p class="race-date">Date: October 3, 2025</p> <p class="race-location">Location: Asheville, North Carolina</p></li></ul></section> <section id="important-notes"> <h2>Important Notes</h2> <p><strong>All races start at 6:00 AM sharp.</strong></p> <p><strong>Mandatory pre-race check-in the evening before.</strong></p></section> <footer class="site-footer"> <p>&copy; 2025 Ultra Running Events</p></footer><script id="wc-add-to-cart-js-extra">var wc_add_to_cart_params={"ajax_url":"\/wp-admin\/admin-ajax.php","wc_ajax_url":"\/?wc-ajax=%%endpoint%%","i18n_view_cart":"View cart","cart_url":"https:\/\/ryanandmattdatascience.com\/cart\/","is_cart":"","cart_redirect_after_add":"no"};</script>
<script id="woocommerce-js-extra">var woocommerce_params={"ajax_url":"\/wp-admin\/admin-ajax.php","wc_ajax_url":"\/?wc-ajax=%%endpoint%%","i18n_password_show":"Show password","i18n_password_hide":"Hide password"};</script>
<script id="WCPAY_ASSETS-js-extra">var wcpayAssets={"url":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/woocommerce-payments\/dist\/"};</script>
<script>var stm_wpcfto_ajaxurl='https://ryanandmattdatascience.com/wp-admin/admin-ajax.php';</script>
<script>var stm_wpcfto_nonces={"wpcfto_save_settings":"42959e699c","get_image_url":"0a50b5eddb","wpcfto_upload_file":"4668b9bd6a","wpcfto_search_posts":"5087b830b2"};</script>
<script>var elementskit_module_parallax_url="https://ryanandmattdatascience.com/wp-content/plugins/elementskit/modules/parallax/";</script>
<script>(function (){
var c=document.body.className;
c=c.replace(/woocommerce-no-js/, 'woocommerce-js');
document.body.className=c;
})();</script>
<script id="contact-form-7-js-before">var wpcf7={
"api": {
"root": "https:\/\/ryanandmattdatascience.com\/wp-json\/",
"namespace": "contact-form-7\/v1"
},
"cached": 1
};</script>
<script id="elementor-frontend-js-before">var elementorFrontendConfig={"environmentMode":{"edit":false,"wpPreview":false,"isScriptDebug":false},"i18n":{"shareOnFacebook":"Share on Facebook","shareOnTwitter":"Share on Twitter","pinIt":"Pin it","download":"Download","downloadImage":"Download image","fullscreen":"Fullscreen","zoom":"Zoom","share":"Share","playVideo":"Play Video","previous":"Previous","next":"Next","close":"Close","a11yCarouselPrevSlideMessage":"Previous slide","a11yCarouselNextSlideMessage":"Next slide","a11yCarouselFirstSlideMessage":"This is the first slide","a11yCarouselLastSlideMessage":"This is the last slide","a11yCarouselPaginationBulletMessage":"Go to slide"},"is_rtl":false,"breakpoints":{"xs":0,"sm":480,"md":768,"lg":1025,"xl":1440,"xxl":1600},"responsive":{"breakpoints":{"mobile":{"label":"Mobile Portrait","value":767,"default_value":767,"direction":"max","is_enabled":true},"mobile_extra":{"label":"Mobile Landscape","value":880,"default_value":880,"direction":"max","is_enabled":false},"tablet":{"label":"Tablet Portrait","value":1024,"default_value":1024,"direction":"max","is_enabled":true},"tablet_extra":{"label":"Tablet Landscape","value":1200,"default_value":1200,"direction":"max","is_enabled":false},"laptop":{"label":"Laptop","value":1366,"default_value":1366,"direction":"max","is_enabled":false},"widescreen":{"label":"Widescreen","value":2400,"default_value":2400,"direction":"min","is_enabled":false}},"hasCustomBreakpoints":false},"version":"3.29.2","is_static":false,"experimentalFeatures":{"additional_custom_breakpoints":true,"container":true,"e_local_google_fonts":true,"theme_builder_v2":true,"nested-elements":true,"editor_v2":true,"home_screen":true,"cloud-library":true,"e_opt_in_v4_page":true},"urls":{"assets":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/elementor\/assets\/","ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","uploadUrl":"https:\/\/ryanandmattdatascience.com\/wp-content\/uploads"},"nonces":{"floatingButtonsClickTracking":"373c932912"},"swiperClass":"swiper","settings":{"page":[],"editorPreferences":[]},"kit":{"active_breakpoints":["viewport_mobile","viewport_tablet"],"global_image_lightbox":"yes","lightbox_enable_counter":"yes","lightbox_enable_fullscreen":"yes","lightbox_enable_zoom":"yes","lightbox_enable_share":"yes","lightbox_title_src":"title","lightbox_description_src":"description","woocommerce_notices_elements":[]},"post":{"id":26602,"title":"BeautifulSoup%20Selectors%3A%20CSS%20%26%20Tag%20Select%20Made%20Simple","excerpt":"","featuredImage":false}};</script>
<script id="wc-order-attribution-js-extra">var wc_order_attribution={"params":{"lifetime":1.0e-5,"session":30,"base64":false,"ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","prefix":"wc_order_attribution_","allowTracking":true},"fields":{"source_type":"current.typ","referrer":"current_add.rf","utm_campaign":"current.cmp","utm_source":"current.src","utm_medium":"current.mdm","utm_content":"current.cnt","utm_id":"current.id","utm_term":"current.trm","utm_source_platform":"current.plt","utm_creative_format":"current.fmt","utm_marketing_tactic":"current.tct","session_entry":"current_add.ep","session_start_time":"current_add.fd","session_pages":"session.pgs","session_count":"udata.vst","user_agent":"udata.uag"}};</script>
<script id="wpcf7-recaptcha-js-before">var wpcf7_recaptcha={
"sitekey": "6LcKOJoqAAAAAK1qRzkn1Yfhv4Q3nU-7lRfcSY9g",
"actions": {
"homepage": "homepage",
"contactform": "contactform"
}};</script>
<script id="elementor-pro-frontend-js-before">var ElementorProFrontendConfig={"ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","nonce":"d4fae0860b","urls":{"assets":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/elementor-pro\/assets\/","rest":"https:\/\/ryanandmattdatascience.com\/wp-json\/"},"settings":{"lazy_load_background_images":true},"popup":{"hasPopUps":false},"shareButtonsNetworks":{"facebook":{"title":"Facebook","has_counter":true},"twitter":{"title":"Twitter"},"linkedin":{"title":"LinkedIn","has_counter":true},"pinterest":{"title":"Pinterest","has_counter":true},"reddit":{"title":"Reddit","has_counter":true},"vk":{"title":"VK","has_counter":true},"odnoklassniki":{"title":"OK","has_counter":true},"tumblr":{"title":"Tumblr"},"digg":{"title":"Digg"},"skype":{"title":"Skype"},"stumbleupon":{"title":"StumbleUpon","has_counter":true},"mix":{"title":"Mix"},"telegram":{"title":"Telegram"},"pocket":{"title":"Pocket","has_counter":true},"xing":{"title":"XING","has_counter":true},"whatsapp":{"title":"WhatsApp"},"email":{"title":"Email"},"print":{"title":"Print"},"x-twitter":{"title":"X"},"threads":{"title":"Threads"}},"woocommerce":{"menu_cart":{"cart_page_url":"https:\/\/ryanandmattdatascience.com\/cart\/","checkout_page_url":"https:\/\/ryanandmattdatascience.com\/checkout\/","fragments_nonce":"69991ae56c"}},"facebook_sdk":{"lang":"en_US","app_id":""},"lottie":{"defaultAnimationUrl":"https:\/\/ryanandmattdatascience.com\/wp-content\/plugins\/elementor-pro\/modules\/lottie\/assets\/animations\/default.json"}};</script>
<script id="elementskit-elementor-js-extra">var ekit_config={"ajaxurl":"https:\/\/ryanandmattdatascience.com\/wp-admin\/admin-ajax.php","nonce":"bcaeece9b0"};</script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/jquery/jquery.min.js?ver=3.7.1" id="jquery-core-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/jquery/jquery-migrate.min.js?ver=3.4.1" id="jquery-migrate-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/9f90f89ca1c2e91b-back-to-top.js" id="keydesign-go-top-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/jquery-blockui/jquery.blockUI.min.js?ver=2.7.0-wc.9.9.4" id="jquery-blockui-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/frontend/add-to-cart.min.js?ver=9.9.4" id="wc-add-to-cart-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/js-cookie/js.cookie.min.js?ver=2.1.4-wc.9.9.4" id="js-cookie-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/frontend/woocommerce.min.js?ver=9.9.4" id="woocommerce-js" defer data-wp-strategy="defer"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/3774baff78af5230-reading-bar.js" id="keydesign-rebar-js"></script>
<script id="google_gtagjs-js-after">window.dataLayer=window.dataLayer||[];function gtag(){dataLayer.push(arguments);}
gtag("set","linker",{"domains":["ryanandmattdatascience.com"]});
gtag("js", new Date());
gtag("set", "developer_id.dZTNiMT", true);
gtag("config", "GT-MR8Z58JX");</script>
<script>document.getElementById("ak_js_1").setAttribute("value",(new Date()).getTime());</script>
<script type="speculationrules">{"prefetch":[{"source":"document","where":{"and":[{"href_matches":"\/*"},{"not":{"href_matches":["\/wp-*.php","\/wp-admin\/*","\/wp-content\/uploads\/*","\/wp-content\/*","\/wp-content\/plugins\/*","\/wp-content\/themes\/sierra\/*","\/*\\?(.+)"]}},{"not":{"selector_matches":"a[rel~=\"nofollow\"]"}},{"not":{"selector_matches":".no-prefetch, .no-prefetch a"}}]},"eagerness":"conservative"}]}</script>
<script>const lazyloadRunObserver=()=> {
const lazyloadBackgrounds=document.querySelectorAll(`.e-con.e-parent:not(.e-lazyloaded)`);
const lazyloadBackgroundObserver=new IntersectionObserver(( entries)=> {
entries.forEach(( entry)=> {
if(entry.isIntersecting){
let lazyloadBackground=entry.target;
if(lazyloadBackground){
lazyloadBackground.classList.add('e-lazyloaded');
}
lazyloadBackgroundObserver.unobserve(entry.target);
}});
}, { rootMargin: '200px 0px 200px 0px' });
lazyloadBackgrounds.forEach(( lazyloadBackground)=> {
lazyloadBackgroundObserver.observe(lazyloadBackground);
});
};
const events=[
'DOMContentLoaded',
'elementor/lazyload/observe',
];
events.forEach(( event)=> {
document.addEventListener(event, lazyloadRunObserver);
});</script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/dist/hooks.min.js?ver=4d63a3d491d11ffd8ac6" id="wp-hooks-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/dist/i18n.min.js?ver=5e580eb46a90c2b997e6" id="wp-i18n-js"></script>
<script id="wp-i18n-js-after">wp.i18n.setLocaleData({ 'text direction\u0004ltr': [ 'ltr' ] });</script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/96e7dc3f0e8559e4-index.js" id="swv-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/2912c657d0592cc5-index.js" id="contact-form-7-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/components/prism-core.min.js?ver=1.23.0" id="prismjs_core-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/autoloader/prism-autoloader.min.js?ver=1.23.0" id="prismjs_loader-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/normalize-whitespace/prism-normalize-whitespace.min.js?ver=1.23.0" id="prismjs_normalize-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/line-numbers/prism-line-numbers.min.js?ver=1.23.0" id="prismjs_line_numbers-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/toolbar/prism-toolbar.min.js?ver=1.23.0" id="prismjs_toolbar-js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/copy-to-clipboard/prism-copy-to-clipboard.min.js?ver=1.23.0" id="prismjs_copy_to_clipboard-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/elementor/assets/js/webpack.runtime.min.js?ver=3.29.2" id="elementor-webpack-runtime-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/elementor/assets/js/frontend-modules.min.js?ver=3.29.2" id="elementor-frontend-modules-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/jquery/ui/core.min.js?ver=1.13.3" id="jquery-ui-core-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/plugins/elementor/assets/js/frontend.min.js?ver=3.29.2" id="elementor-frontend-js"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/6c2b63649cebbd01-front-end.js" id="sierra-scripts-js"></script>
<script src="https://ryanandmattdatascience.com/wp-includes/js/comment-reply.min.js?ver=6.8.1" id="comment-reply-js" async data-wp-strategy="async"></script>
<script src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/94d041d462db321c-frontend-script.js" id="elementskit-framework-js-frontend-js"></script>
<script id="elementskit-framework-js-frontend-js-after">var elementskit={
resturl: 'https://ryanandmattdatascience.com/wp-json/elementskit/v1/',
}</script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/7f83f9f56851a309-widget-scripts.js" id="ekit-widget-scripts-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/sourcebuster/sourcebuster.min.js?ver=9.9.4" id="sourcebuster-js-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/woocommerce/assets/js/frontend/order-attribution.min.js?ver=9.9.4" id="wc-order-attribution-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-includes/js/dist/vendor/wp-polyfill.min.js?ver=3.15.0" id="wp-polyfill-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/ec0187677793456f-index.js" id="wpcf7-recaptcha-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/092b9b120c6f0b41-keydesign-framework.js" id="keydesign-scripts-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/91954b488a9bfcad-akismet-frontend.js" id="akismet-frontend-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementor-pro/assets/js/webpack-pro.runtime.min.js?ver=3.28.3" id="elementor-pro-webpack-runtime-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementor-pro/assets/js/frontend.min.js?ver=3.28.3" id="elementor-pro-frontend-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementor-pro/assets/js/elements-handlers.min.js?ver=3.28.3" id="pro-elements-handlers-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/plugins/elementskit-lite/widgets/init/assets/js/animate-circle.min.js?ver=3.5.3" id="animate-circle-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/d1fe3f49c432e65a-elementor.js" id="elementskit-elementor-js"></script>
<script defer src="https://ryanandmattdatascience.com/wp-content/cache/speedycache/ryanandmattdatascience.com/assets/3572f383338e5760-elementor.js" id="elementskit-elementor-pro-js"></script>
<script>window._wpemojiSettings={"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.1.0\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.1.0\/svg\/","svgExt":".svg","source":{"concatemoji":"https:\/\/ryanandmattdatascience.com\/wp-includes\/js\/wp-emoji-release.min.js?ver=6.8.1"}};
!function(i,n){var o,s,e;function c(e){try{var t={supportTests:e,timestamp:(new Date).valueOf()};sessionStorage.setItem(o,JSON.stringify(t))}catch(e){}}function p(e,t,n){e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(t,0,0);var t=new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data),r=(e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(n,0,0),new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data));return t.every(function(e,t){return e===r[t]})}function u(e,t,n){switch(t){case"flag":return n(e,"\ud83c\udff3\ufe0f\u200d\u26a7\ufe0f","\ud83c\udff3\ufe0f\u200b\u26a7\ufe0f")?!1:!n(e,"\ud83c\uddfa\ud83c\uddf3","\ud83c\uddfa\u200b\ud83c\uddf3")&&!n(e,"\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f","\ud83c\udff4\u200b\udb40\udc67\u200b\udb40\udc62\u200b\udb40\udc65\u200b\udb40\udc6e\u200b\udb40\udc67\u200b\udb40\udc7f");case"emoji":return!n(e,"\ud83d\udc26\u200d\ud83d\udd25","\ud83d\udc26\u200b\ud83d\udd25")}return!1}function f(e,t,n){var r="undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?new OffscreenCanvas(300,150):i.createElement("canvas"),a=r.getContext("2d",{willReadFrequently:!0}),o=(a.textBaseline="top",a.font="600 32px Arial",{});return e.forEach(function(e){o[e]=t(a,e,n)}),o}function t(e){var t=i.createElement("script");t.src=e,t.defer=!0,i.head.appendChild(t)}"undefined"!=typeof Promise&&(o="wpEmojiSettingsSupports",s=["flag","emoji"],n.supports={everything:!0,everythingExceptFlag:!0},e=new Promise(function(e){i.addEventListener("DOMContentLoaded",e,{once:!0})}),new Promise(function(t){var n=function(){try{var e=JSON.parse(sessionStorage.getItem(o));if("object"==typeof e&&"number"==typeof e.timestamp&&(new Date).valueOf()<e.timestamp+604800&&"object"==typeof e.supportTests)return e.supportTests}catch(e){}return null}();if(!n){if("undefined"!=typeof Worker&&"undefined"!=typeof OffscreenCanvas&&"undefined"!=typeof URL&&URL.createObjectURL&&"undefined"!=typeof Blob)try{var e="postMessage("+f.toString()+"("+[JSON.stringify(s),u.toString(),p.toString()].join(",")+"));",r=new Blob([e],{type:"text/javascript"}),a=new Worker(URL.createObjectURL(r),{name:"wpTestEmojiSupports"});return void(a.onmessage=function(e){c(n=e.data),a.terminate(),t(n)})}catch(e){}c(n=f(s,u,p))}t(n)}).then(function(e){for(var t in e)n.supports[t]=e[t],n.supports.everything=n.supports.everything&&n.supports[t],"flag"!==t&&(n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&n.supports[t]);n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&!n.supports.flag,n.DOMReady=!1,n.readyCallback=function(){n.DOMReady=!0}}).then(function(){return e}).then(function(){var e;n.supports.everything||(n.readyCallback(),(e=n.source||{}).concatemoji?t(e.concatemoji):e.wpemoji&&e.twemoji&&(t(e.twemoji),t(e.wpemoji)))}))}((window,document),window._wpemojiSettings);</script>
</body></html>"""
  soup_html = BeautifulSoup(html, 'html.parser')
  soup_html.select_one('h2')
  soup_html.select_one('h2').get_text()
  soup_html.select_one('strong').get_text()
  soup_html.select('p') #grabs all p tags
  soup_html.select('p')[0].get_text()
  soup_html.select('p')[1].get_text()
  all_p = soup_html.select('p') #grabs all p tags
  for p in all_p: print(p.get_text())
  soup_html.select('a')
  a_element = soup_html.select('a')
  for link in a_element: print(link['href'])
  a_element[2]['href']
  soup_html.select('#races-100')
  #Example 10 This selector finds any <a> descendant of an <h3> element with the class race-name #Descendant" means any level deep inside the h3, not just direct children soup_html.select('h3.race-name a')
  #Example 11 Direct Descendents #Let’s say you want to extract the <a> tag directly under each <h3 class="race-name"> (but only if it's a direct child) #It will not match nested links. #Omit > when you want any level of nesting soup_html.select('h3.race-name > a')
  #Example 12 After an Elements Siblings #Paragraphs after h3 tag #Have the same parent as an <h3>, and #Appear after that <h3> in the HTML, regardless of how many elements are in between #It's useful when you want to grab siblings after a specific element, but not necessarily immediately after soup_html.select("h3 ~p")
  #Example 13 Element in one of two classes (e.g., .race-50 OR .race-100) sections = soup_html.select('section.race-50, section.race-100')
  for section in sections: title = section.find('h2').text.strip() print(f"Section Title: {title}")
  #Example 14 Element in both classes (e.g., .race-section AND .race-50) (Order doesnt matter) race50_section = soup_html.select('section.race-section.race-50')
  race50_section_v2 = soup_html.select('section.race-50.race-section')
  for section in race50_section: print(f"Found section: {section['id']}")
  for section in race50_section_v2: print(f"Found section: {section['id']}")
  URL = "http://books.toscrape.com/"
  response = requests.get(URL)
  soup = BeautifulSoup(response.text, 'html.parser')
  #Example 15 Select category links from sidebar category_links = soup.select("ul.nav-list ul a")
  for a in category_links: print(a.text.strip(), URL + a['href'])
  books = soup.select("article.product_pod h3 a")
  books
  books = soup.select("article.product_pod h3 a")
  for book in books: print(book.get_text())
  books = soup.select("article.product_pod")
  for book in books: title = book.select_one("h3 a")["title"] price = book.select_one(".price_color").text rating = book.select_one("p.star-rating")["class"][-1] # e.g. 'Three' book_data.append({"title": title, "price": price, "rating": rating})
  df = pd.DataFrame(book_data)
  df.head(10)
  df['price_clean'] = df['price'].str.replace('£', '', regex=False).astype(float)
  #Convert GBP to USD (example rate: 1 GBP = 1.0737 USD) (CHECK THIS) exchange_rate = 1.0737
  df['price_usd'] = df['price_clean'] * exchange_rate
  df['Price_usd'] = df['price_usd'].apply(lambda x: f"${x:.2f}")
  df_five_star = df.loc[df['rating'] == 'Five', ['title', 'Price_usd']]
  df_five_star
  df_five_star.to_csv('scrapped_book_data.csv')
  df_five_star.to_excel('scrapped_book_data.xlsx')

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.

Leave a Reply

Your email address will not be published. Required fields are marked *