IMDB Top 250 Movies Web Scraping

from urllib.request import urlopen from bs4 import BeautifulSoup import requests import pandas as pd import numpy as np re...

from urllib.request import urlopen

from bs4 import BeautifulSoup

import requests

import pandas as pd

import numpy as np

result=[]

for i in range(1,6): # Number of pages plus one ,

url = "https://www.imdb.com/search/title?groups=top_250&my_ratings=exclude&sort=user_rating&page={}".format(i)

r = requests.get(url)

soup = BeautifulSoup(r.content)

c=soup.find_all("div",{"class": "lister-item-content"})

print(type(c))

for i in c:

position = i.find('span',{'class':'lister-item-index unbold text-primary'}).text

name = i.find('a').text

year = i.find('span',{'class':'lister-item-year text-muted unbold'}).text

genre = i.find('span',{'class':'genre'}).text

rate = i.find('strong').text

lengthofmovie = i.find('span',{'class':'runtime'}).text

a_val = i.find_all('a')

res1 = map(lambda x:x.text,a_val)

res1 = list(res1)

director = res1[12]

stars = ','.join(res1[12:])

votes = i.find_all('span',{'name':'nv'})

gross = i.find_all('span',{'name':'nv'})

#certificate = i.find('span',{'class':'certificate'}).text

res = [position,name,year,rate,genre,director,stars,votes,gross,lengthofmovie]

result.append(res)

print(res)

print("================================================================================================")

print(result[2])

import csv

with open ('hrmds.csv','r+') as file:

writer=csv.writer(file)

writer.writerow(["Rank", "Movie Name", "Release Year", "Rating", "Genre", "Director", "Stars", "Votes", "Gross", "Length of Movie"])

for i in result:

writer.writerow(i)

df=pd.read_csv("hrmds.csv",encoding='ISO-8859-1')

Above code is written in jupyter notebook so try on jupyter notebook with python version 3. Here first you have to import necessary elements for web scraping. In above scrap beautiful soup is use for web scrap. If the site is complex and want to do automation web scrap then try the selenium.

COMPUTER PROGRAMMING

IMDB Top 250 Movies Web Scraping

Labels:

COMMENTS

/fa-clock-o/ WEEK TRENDING$type=list

RECENT WITH THUMBS$type=blogging$m=0$cate=0$sn=0$rm=0$c=4$va=0

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts

/fa-fire/ YEAR POPULAR$type=one

IMDB Top 250 Movies Web Scraping

Labels:

SHARE:

COMMENTS

/fa-clock-o/ WEEK TRENDING$type=list

RECENT WITH THUMBS$type=blogging$m=0$cate=0$sn=0$rm=0$c=4$va=0

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts

/fa-fire/ YEAR POPULAR$type=one