In the following tutorials you will learn that how to scrap specific data from any website and save scraped data in text file using Python programming language.
from lxml import html
import requests
import re
page = requests.get('http://www.example-website.com/fashion/western-wear/tops.html?limit=96')
tree = html.fromstring(page.content)
productName = tree.xpath('//ul[class=""="products-grid"]//h2[class=""="product-name"]//a[@href]/text()')
productBrand = tree.xpath('//ul[class=""="products-grid"]//h2[class=""="product-name"]//div[class=""="cstm_brnd"]//span[1]/text()')
oldPrices = tree.xpath('//ul[class=""="products-grid"]//p[class=""="old-price"]//span[class=""="price"]/text()')
newPrices = tree.xpath('//ul[class=""="products-grid"]//p[class=""="special-price"]//span[class=""="price"]/text()')
discount = tree.xpath('//ul[class=""="products-grid"]//span[class=""="discount_Span"]/text()')
imagePath = tree.xpath('//ul[class=""="products-grid"]//img[class=""="b-lazy"]//@data-src')
#imagePath = tree.xpath('//img/@data-src')
limit = 0;
for arrayRange in productName:
limit = limit + 1
def index_exists(ls, i):
return (0 <= i < len(ls)) or (-len(ls) <= i < 0)
pName = "";
pBrand = "";
pOldPrices = "";
pNewPrices = "";
pDiscount = "";
pImagePath = "";
cursor = cnx.cursor()
for indexNumber in range(limit):
if index_exists(productName, indexNumber):
pName = productName[indexNumber]
else:
pName = "NA"
if index_exists(productBrand, indexNumber):
pBrand = productBrand[indexNumber];
else:
pBrand = "NA"
if index_exists(oldPrices, indexNumber):
pOldPrices = re.sub('\s+','',oldPrices[indexNumber])
else:
pOldPrices = "NA"
if index_exists(newPrices, indexNumber):
pNewPrices = re.sub('\s+','',newPrices[indexNumber])
else:
pNewPrices = "NA"
if index_exists(discount, indexNumber):
pDiscount = discount[indexNumber]
else:
pDiscount = "NA"
if index_exists(imagePath, indexNumber):
pImagePath = imagePath[indexNumber]
else:
pImagePath = "NA"
print(pName)
print(pBrand)
print(pOldPrices)
print(pNewPrices)
print(pDiscount)
print("https://www.example-website.com/"+pImagePath)
print("----------------")
pnf = open("C://1/products.txt", "a")
for indexNumber in range(limit):
pnf.write(productName[indexNumber] +"\n")
pnf.write(productBrand[indexNumber] +"\n")
pnf.write(oldPrices[indexNumber] +"\n")
pnf.write(newPrices[indexNumber] +"\n")
pnf.write(discount[indexNumber] +"\n")
pnf.write(imagePath[indexNumber] +"\n\n")
pnf.close
print("All scrap data is saved in products text file")
Note: The above tutorials are created for educational and learning purposes.