Und hier mit Python3:
Code:
#!/usr/bin/env python3
import mechanicalsoup
import requests
import sys
import os
filelist = [ f for f in os.listdir(".") if f.endswith(".jpg") ]
for f in filelist:
os.remove(os.path.join(".", f))
browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'},user_agent='MyBot/0.1: mysite.example.com/bot_info')
if len(sys.argv) < 2:
print("Parameter fehlt!")
exit(1)
url = sys.argv[1]
page = browser.get(url)
if len(page.soup.find_all("div",class_="outcomemessage-warning")) > 0:
print("Problem mit Anzeige")
exit(1)
print("----------------------------------------------------")
print("Titel:")
print(page.soup.find(property="og:title").attrs['content'])
print("----------------------------------------------------")
print("Preis:")
a = page.soup.find(id="viewad-price")
if a == None:
print("Kein Preis")
else:
print(a.text.strip())
print("----------------------------------------------------")
print("Bilder:")
i=1
for x in page.soup.find_all(id="viewad-image"):
print(x.attrs['src'])
lnk=x.attrs['src']
with open(str(i)+".jpg","wb") as f:
f.write(requests.get(lnk).content)
i=i+1
print("----------------------------------------------------")
print("Kategorie:")
for x in page.soup.find_all(itemprop="title"):
print(x.text.strip())
print("----------------------------------------------------")
print("Text:")
s=""
for x in page.soup.find(itemprop="description").contents:
if isinstance(x, str):
s+=x.strip()
else:
s+="\n"
print(s)
print("----------------------------------------------------")
print("Details:")
for x in page.soup.find_all("span",class_="addetailslist--detail--value"):
print(x.text.strip())
print("----------------------------------------------------")
Man braucht MechanicalSoup:
https://mechanicalsoup.readthedocs.i...l#installation
Infos werden nicht in Datei geschrieben, nur ausgegeben.