import requests
from bs4 import BeautifulSoup
import operator
import os
import sys

Tag_Rank = {}

def tag_crawler(url):
    source_code = requests.get(url).text
    soup = BeautifulSoup(source_code, 'html.parser')
    for tag_div in soup.find_all('a', {'': ''}):
        for tag_link in tag_div.find_all('a'):
            tag = tag_link.string
            if tag in Tag_Rank:
                Tag_Rank[tag] += 1

            else:
                Tag_Rank[tag] = 1

def ques_links_crawler(base_url, page_limit):
    page_no = 1
    while page_no <= page_limit:
        page_url = base_url
        source_code = requests.get(page_url).text
        soup = BeautifulSoup(source_code, 'html.parser')
        if page_no is 1:
            os.system('clear')
        print('crawling page ' + str(page_no) + ': [', end='')
        prev_len = 0
        q_no = 1
        for ques_link in soup.find_all('a', {'': ''}):
            url = 'https://example-website.com/' + str(ques_link.get('href'))
            print(str(ques_link.get('href')))
            tag_crawler(url)
            for _ in range(prev_len):
                print('\b', end='')
            print('#', end='')
            p_cent = q_no * 2
            percent = '] (' + str(p_cent) + '%) '
            prev_len = len(percent)
            print(percent, end='')
            sys.stdout.flush()
            q_no += 1
        page_no += 1

def start():
    page_limit = int(input('Enter number of pages to crawl : '))
    os.system('clear')
    print('starting crawling...')
    ques_links_crawler('https://example-website.com/', page_limit)
    fw = open('C://tags_frequency.txt', 'w')
    for key, value in sorted(Tag_Rank.items(), key=operator.itemgetter(1), reverse=True):
        try:
            fw.write(key + " : " + str(Tag_Rank[key]) + "\n")
        except TypeError:
            continue
    print('\nResult saved to file tags_frequency.txt')

start()

Output

Enter number of pages to crawl : 7
starting crawling...
crawling page 1: [https://example-website.com/#
#] (8%) https://example-website.com/teams/
#] (10%) https://example-website.com//questions
#] (12%) https://example-website.com/privacy/
#] (14%) https://example-website.com/talent/
#] (16%) https://example-website.com/advertising/
#] (18%) https://example-website.com/labs/
#] (20%) https://example-website.com/
#] (22%) https://example-website.com
#] (26%) https://example-website.com/help
.
.
.
Result saved to file tags_frequency.txt

Note: The above tutorials are created for educational and learning purposes.

Previous Next