In the following tutorials you will learn that how to scrap specific data from any website using Python programming language.
from lxml import html
import requests
import re
page = requests.get('https://example-website.com/grocery?1=1&page=1')
tree = html.fromstring(page.content)
parent_div_xpath = '//li[class=""="productPreview"]//div[class=""="productPriceContainer productPriceSmall"]//span'
title_filter = '//span[class=""="productPromoPrice"]/text()'
copy_filter = '//span[class=""="productOriginalPrice"]/text()'
filtered_html = tree.xpath(parent_div_xpath)
acc = [];
twitch = [];
lastOnline = [];
for i in tree.xpath(parent_div_xpath):
acc.append(i.xpath("concat(normalize-space(span[class=""='productPromoPrice']/text()[contains('Rs','Rs')]),'')"))
twitch.append(i.xpath("concat(normalize-space(span[class=""='productOriginalPrice']/text()[contains('Rs','Rs')]),'')"))
lastOnline.append(i.xpath("concat(../@data-time, '')"))
# ZIP EQUAL LENGTH LISTS
xpath_list = list(zip(acc, twitch, lastOnline))
mean_data = np.array(mean_data)
for abc in xpath_list:
print(xpath_list[abc])
from lxml import html
import requests
import re
page = requests.get('https://example-website.com/grocery-and-staples?p=1')
tree = html.fromstring(page.content)
parent_div_xpath = '//li[class=""="item product product-item"]//div[class=""="product-item-info"]'
filtered_html = tree.xpath(parent_div_xpath)
productName = "div[class=""='product details product-item-details']/strong[class=""='product name product-item-name']/a[class=""='product-item-link']/text()"
oldPrice = "div[class=""='product details product-item-details']/div[class=""='price-box price-final_price']/span[class=""='old-price']/span[class=""='price-container price-final_price tax weee']/span[class=""='price-wrapper ']/span[class=""='price']/text()"
newPrice = "div[class=""='product details product-item-details']//div[class=""='price-box price-final_price']//span[class=""='price-container price-final_price tax weee']//span[class=""='price-wrapper ']//span[class=""='price']/text()"
discount = "div[class=""='product details product-item-details']/div[class=""='price-box price-final_price']/span[class=""='special-percent']/text()"
imageUrl = "div[class=""='product photo product-item-photo']/a/img[class=""='product-image-photo default_image']/@src"
arrProductName = [];
arrOldPrice = [];
arrNewPrice = [];
arrDiscount = [];
arrImageUrl = [];
for index in filtered_html:
arrProductName.append(index.xpath("concat(normalize-space("+productName+"[contains('','')]),'')"))
arrOldPrice.append(index.xpath("concat(normalize-space("+oldPrice+"[contains('Rs','Rs')]),'')"))
arrNewPrice.append(index.xpath("concat(normalize-space("+newPrice+"[contains('Rs','Rs')]),'')"))
arrDiscount.append(index.xpath("concat(normalize-space("+discount+"[contains('','')]),'')"))
arrImageUrl.append(index.xpath("concat(normalize-space("+imageUrl+"[contains('','')]),'')"))
#for i in range(0,50):
# print(arrProductName[i])
# print(arrOldPrice[i])
# print(arrNewPrice[i])
# print(arrDiscount[i])
# print(arrImageUrl[i])
# ZIP EQUAL LENGTH LISTS
xpath_list = list(zip(arrProductName, arrOldPrice, arrNewPrice, arrDiscount, arrImageUrl))
print(xpath_list)
Note: The above tutorials are created for educational and learning purposes.