You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
44 lines
964 B
44 lines
964 B
#!/usr/bin/python |
|
# -*- coding: utf-8 -*- |
|
|
|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
# GET with requests, then parse with BeautifulSoup |
|
r = requests.get("https://gentoo.org/") |
|
print r.content |
|
bt = BeautifulSoup(r.content, "lxml") # It's recommended to use LXML, not the default html parser. |
|
print bt.title |
|
print bt.title.string |
|
|
|
|
|
# Find all metatags |
|
allMetaTags = bt.find_all('meta') |
|
print allMetaTags |
|
|
|
|
|
allMetaTags = bt.find_all('meta') |
|
print allMetaTags[2] |
|
|
|
#allMetaTags = bt.find_all('meta') |
|
#print allMetaTags[0]['content'] # Works in video, not here. wat? |
|
|
|
|
|
# Print all links from the site |
|
allLinks = bt.find_all('a') |
|
print len(allLinks) # How many links do we have? |
|
#print allLinks[1] |
|
print allLinks[4]['href'] |
|
#print allLinks[1].string |
|
|
|
|
|
# Print all text output, could be great for password list generations |
|
print bt.get_text() |
|
|
|
|
|
#Print all links |
|
for link in allLinks: |
|
print link['href'] |
|
|
|
|
|
# print bt.meta.next.next.next.next.next.next # Don't.
|
|
|