Scraping


Uncategorized

Updated Jul 13th, 2023

Scraping Snippets

To grab the text content of all headings on the page

 const h1s = Array.from(document.querySelectorAll("h1")).map(x => x.textContent)
  const h2s = Array.from(document.querySelectorAll("h2")).map(x => x.textContent)
  const h3s = Array.from(document.querySelectorAll("h3")).map(x => x.textContent)
  const h4s = Array.from(document.querySelectorAll("h4")).map(x => x.textContent)
  const h5s = Array.from(document.querySelectorAll("h5")).map(x => x.textContent)
  const h6s = Array.from(document.querySelectorAll("h6")).map(x => x.textContent)

  for (const i of h1s) {
    console.log("h1 textContent: ", i)
  }
  for (const i of h2s) {
    console.log("h2 textContent: ", i)
  }
  for (const i of h3s) {
    console.log("h3 textContent: ", i)
  }
  for (const i of h4s) {
    console.log("h4 textContent: ", i)
  }
  for (const i of h5s) {
    console.log("h5 textContent: ", i)
  }
  for (const i of h6s) {
    console.log("h6 textContent: ", i)
  }

  const numLoggedItems = h1s.length + h2s.length + h3s.length + h4s.length + h5s.length + h6s.length

  console.log("total # of logged items: ", numLoggedItems)