Scraping Data from AJAX Based Website in Python

In the following tutorial you will learn that how to scrap a specific data from any AJAX based dynamic website using Python programming language.

Scraping Data from AJAX Based Website

Note: load website in browser and copy all HTML source text using page source in browser and save it in text file.

Program

from lxml import html
import requests
import re
import mysql.connector

cnx = mysql.connector.connect(user='root', password='',
                              host='127.0.0.1',
                              database='python_db')

page = ""
with open('C://scraping/page.txt', 'r') as myfile:
  page = myfile.read()
tree = html.fromstring(page)

page = requests.get('http://localhost/scraping/page.php')
tree = html.fromstring(page.content)

productName = tree.xpath('//li[class=""="product"]//a[@href]//div[class=""="pro-con"]//h3/text()')
#productBrand = tree.xpath('//ul[class=""="products-grid"]//h2[class=""="product-name"]//div[class=""="cstm_brnd"]//span[1]/text()')
#oldPrices = tree.xpath('//ul[class=""="products-grid"]//p[class=""="old-price"]//span[class=""="price"]/text()')
newPrices = tree.xpath('//li[class=""="product"]//a[@href]//div[class=""="pro-con"]//h4/text()')
#discount = tree.xpath('//ul[class=""="products-grid"]//span[class=""="discount_Span"]/text()')
imagePath = tree.xpath('//li[class=""="product"]//a[@href]//img[class=""="lazz"]//@data-src')
#imagePath = tree.xpath('//img/@data-src')

limit = 0;
for arrayRange in productName:
    limit = limit + 1

def index_exists(ls, i):
    return (0 <= i < len(ls)) or (-len(ls) <= i < 0)

pName = "";
pBrand = "";
pOldPrices = "";
pNewPrices = "";
pDiscount = "";
pImagePath = "";

cursor = cnx.cursor()
for indexNumber in range(limit):
    if index_exists(productName, indexNumber):
        pName = productName[indexNumber]
    else:
        pName = "NA"
    if index_exists(productBrand, indexNumber):
        pBrand = productBrand[indexNumber];
    else:
        pBrand = "NA"
    if index_exists(oldPrices, indexNumber):
        pOldPrices = re.sub('\s+','',oldPrices[indexNumber])
    else:
        pOldPrices = "NA"
    if index_exists(newPrices, indexNumber):
         pNewPrices = re.sub('\s+','',newPrices[indexNumber])
    else:
        pNewPrices = "NA"
    if index_exists(discount, indexNumber):
        pDiscount = discount[indexNumber]
    else:
        pDiscount = "NA"
    if index_exists(imagePath, indexNumber):
        pImagePath = imagePath[indexNumber]
    else:
        pImagePath = "NA"
    cursor.execute("INSERT INTO data(product_name, product_brand, old_price, new_price, discount, image_path)VALUES"
                   "('"+pName+"','"+pBrand+"','"+pOldPrices+"','"+pNewPrices+"','"+pDiscount+"','"+pImagePath+"')")
cnx.close()

totalProducts = 0;
for indexNumber in range(limit):
    print(productName[indexNumber])
    print(newPrices[indexNumber])
    print(imagePath[indexNumber] +"\n")
    totalProducts = totalProducts + 1;

print("Total Products is: ", totalProducts)
print("--------All Products are saved in MySQL Database--------")
Output
Total Products is: 31
--------All Products are saved in MySQL Database--------

Note: The above tutorials are created for educational and learning purposes.