Scraping Data from Website in Python

In the following tutorials you will learn that how to scrap specific data from any website using Python programming language.

Scraping Data from Website

Program

from lxml import html
import requests
import re

page = requests.get('https://example-website.com/grocery?1=1&page=1')
tree = html.fromstring(page.content)

parent_div_xpath = '//li[class=""="productPreview"]//div[class=""="productPriceContainer productPriceSmall"]//span'
title_filter = '//span[class=""="productPromoPrice"]/text()'
copy_filter = '//span[class=""="productOriginalPrice"]/text()'
filtered_html = tree.xpath(parent_div_xpath)

acc = [];
twitch = [];
lastOnline = [];

for i in tree.xpath(parent_div_xpath):
    acc.append(i.xpath("concat(normalize-space(span[class=""='productPromoPrice']/text()[contains('Rs','Rs')]),'')"))
    twitch.append(i.xpath("concat(normalize-space(span[class=""='productOriginalPrice']/text()[contains('Rs','Rs')]),'')"))
    lastOnline.append(i.xpath("concat(../@data-time, '')"))

# ZIP EQUAL LENGTH LISTS
xpath_list = list(zip(acc, twitch, lastOnline))
mean_data = np.array(mean_data)
for abc in xpath_list:
    print(xpath_list[abc])
Output
199   280   3 hrs ago
499   710   5 hrs ago

Scraping Data from Website another Example

Program

from lxml import html
import requests
import re

page = requests.get('https://example-website.com/grocery-and-staples?p=1')
tree = html.fromstring(page.content)

parent_div_xpath = '//li[class=""="item product product-item"]//div[class=""="product-item-info"]'
filtered_html = tree.xpath(parent_div_xpath)

productName = "div[class=""='product details product-item-details']/strong[class=""='product name product-item-name']/a[class=""='product-item-link']/text()"
oldPrice = "div[class=""='product details product-item-details']/div[class=""='price-box price-final_price']/span[class=""='old-price']/span[class=""='price-container price-final_price tax weee']/span[class=""='price-wrapper ']/span[class=""='price']/text()"
newPrice = "div[class=""='product details product-item-details']//div[class=""='price-box price-final_price']//span[class=""='price-container price-final_price tax weee']//span[class=""='price-wrapper ']//span[class=""='price']/text()"
discount = "div[class=""='product details product-item-details']/div[class=""='price-box price-final_price']/span[class=""='special-percent']/text()"
imageUrl = "div[class=""='product photo product-item-photo']/a/img[class=""='product-image-photo default_image']/@src"

arrProductName = [];
arrOldPrice = [];
arrNewPrice = [];
arrDiscount = [];
arrImageUrl = [];

for index in filtered_html:
    arrProductName.append(index.xpath("concat(normalize-space("+productName+"[contains('','')]),'')"))
    arrOldPrice.append(index.xpath("concat(normalize-space("+oldPrice+"[contains('Rs','Rs')]),'')"))
    arrNewPrice.append(index.xpath("concat(normalize-space("+newPrice+"[contains('Rs','Rs')]),'')"))
    arrDiscount.append(index.xpath("concat(normalize-space("+discount+"[contains('','')]),'')"))
    arrImageUrl.append(index.xpath("concat(normalize-space("+imageUrl+"[contains('','')]),'')"))

#for i in range(0,50):
#    print(arrProductName[i])
#    print(arrOldPrice[i])
#    print(arrNewPrice[i])
#    print(arrDiscount[i])
#    print(arrImageUrl[i])

# ZIP EQUAL LENGTH LISTS
xpath_list = list(zip(arrProductName, arrOldPrice, arrNewPrice, arrDiscount, arrImageUrl))
print(xpath_list)
Output
product name ...   product old price ...   product new price ...   product discount ...   product image URL ...
product name ...   product old price ...   product new price ...   product discount ...   product image URL ...
product name ...   product old price ...   product new price ...   product discount ...   product image URL ...

Note: The above tutorials are created for educational and learning purposes.