from urllib.request import urlopen from bs4 import BeautifulSoup import requests import pandas as pd import numpy as np re...
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
result=[]
for i in range(1,6): # Number of pages plus one ,
url = "https://www.imdb.com/search/title?groups=top_250&my_ratings=exclude&sort=user_rating&page={}".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content)
c=soup.find_all("div",{"class": "lister-item-content"})
print(type(c))
for i in c:
position = i.find('span',{'class':'lister-item-index unbold text-primary'}).text
name = i.find('a').text
year = i.find('span',{'class':'lister-item-year text-muted unbold'}).text
genre = i.find('span',{'class':'genre'}).text
rate = i.find('strong').text
lengthofmovie = i.find('span',{'class':'runtime'}).text
a_val = i.find_all('a')
res1 = map(lambda x:x.text,a_val)
res1 = list(res1)
director = res1[12]
stars = ','.join(res1[12:])
votes = i.find_all('span',{'name':'nv'})
gross = i.find_all('span',{'name':'nv'})
#certificate = i.find('span',{'class':'certificate'}).text
res = [position,name,year,rate,genre,director,stars,votes,gross,lengthofmovie]
result.append(res)
print(res)
print("================================================================================================")
print(result[2])
import csv
with open ('hrmds.csv','r+') as file:
writer=csv.writer(file)
writer.writerow(["Rank", "Movie Name", "Release Year", "Rating", "Genre", "Director", "Stars", "Votes", "Gross", "Length of Movie"])
for i in result:
writer.writerow(i)
df=pd.read_csv("hrmds.csv",encoding='ISO-8859-1')
df
Above code is written in jupyter notebook so try on jupyter notebook with python version 3. Here first you have to import necessary elements for web scraping. In above scrap beautiful soup is use for web scrap. If the site is complex and want to do automation web scrap then try the selenium.
COMMENTS