In the following tutorial you will learn that how to scrap a specific data from any AJAX based dynamic website using Python programming language.
Note: load website in browser and copy all HTML source text using page source in browser and save it in text file.
Program
from lxml import html
import requests
import re
import mysql.connector
cnx = mysql.connector.connect(user='root', password='',
host='127.0.0.1',
database='python_db')
page = ""
with open('C://scraping/page.txt', 'r') as myfile:
page = myfile.read()
tree = html.fromstring(page)
page = requests.get('http://localhost/scraping/page.php')
tree = html.fromstring(page.content)
productName = tree.xpath('//li[class=""="product"]//a[@href]//div[class=""="pro-con"]//h3/text()')
#productBrand = tree.xpath('//ul[class=""="products-grid"]//h2[class=""="product-name"]//div[class=""="cstm_brnd"]//span[1]/text()')
#oldPrices = tree.xpath('//ul[class=""="products-grid"]//p[class=""="old-price"]//span[class=""="price"]/text()')
newPrices = tree.xpath('//li[class=""="product"]//a[@href]//div[class=""="pro-con"]//h4/text()')
#discount = tree.xpath('//ul[class=""="products-grid"]//span[class=""="discount_Span"]/text()')
imagePath = tree.xpath('//li[class=""="product"]//a[@href]//img[class=""="lazz"]//@data-src')
#imagePath = tree.xpath('//img/@data-src')
limit = 0;
for arrayRange in productName:
limit = limit + 1
def index_exists(ls, i):
return (0 <= i < len(ls)) or (-len(ls) <= i < 0)
pName = "";
pBrand = "";
pOldPrices = "";
pNewPrices = "";
pDiscount = "";
pImagePath = "";
cursor = cnx.cursor()
for indexNumber in range(limit):
if index_exists(productName, indexNumber):
pName = productName[indexNumber]
else:
pName = "NA"
if index_exists(productBrand, indexNumber):
pBrand = productBrand[indexNumber];
else:
pBrand = "NA"
if index_exists(oldPrices, indexNumber):
pOldPrices = re.sub('\s+','',oldPrices[indexNumber])
else:
pOldPrices = "NA"
if index_exists(newPrices, indexNumber):
pNewPrices = re.sub('\s+','',newPrices[indexNumber])
else:
pNewPrices = "NA"
if index_exists(discount, indexNumber):
pDiscount = discount[indexNumber]
else:
pDiscount = "NA"
if index_exists(imagePath, indexNumber):
pImagePath = imagePath[indexNumber]
else:
pImagePath = "NA"
cursor.execute("INSERT INTO data(product_name, product_brand, old_price, new_price, discount, image_path)VALUES"
"('"+pName+"','"+pBrand+"','"+pOldPrices+"','"+pNewPrices+"','"+pDiscount+"','"+pImagePath+"')")
cnx.close()
totalProducts = 0;
for indexNumber in range(limit):
print(productName[indexNumber])
print(newPrices[indexNumber])
print(imagePath[indexNumber] +"\n")
totalProducts = totalProducts + 1;
print("Total Products is: ", totalProducts)
print("--------All Products are saved in MySQL Database--------")
Note: The above tutorials are created for educational and learning purposes.