1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
|
import bs4 import requests import csv import pandas as pd
headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 " "Safari/537.36" } movies_df = pd.DataFrame(columns=['排名', '电影标题', '评分', '评价']) print("豆瓣评分top250的电影:")
for start_num in range(0, 250, 25): response = requests.get(f"https://movie.douban.com/top250?start={start_num}", headers=headers) text = response.text soup = bs4.BeautifulSoup(text, "lxml")
all_movies = soup.find_all("div", {"class": "item"})
for idx, movie in enumerate(all_movies, start=start_num + 1): title_tag = movie.find("span", {"class": "title"}) star_tag = movie.find("span", {"class": "rating_num"}) appraise_tag = movie.find("span", {"class": "inq"}) title = title_tag.get_text(strip=True) if title_tag else "未知电影" star = star_tag.get_text(strip=True) if star_tag else "未知评分" appraise = appraise_tag.get_text(strip=True) if appraise_tag else "未知评价" movies_df = movies_df.append({ '排名': idx, '电影标题': title, '评分': star, '评价': appraise }, ignore_index=True) print(f"{idx}:{title}\n打分:{star}\n评价:{appraise}") movies_df.to_excel('douban_top250_movies.xlsx', index=False, engine='openpyxl',encoding='utf-8')
print("数据已保存到douban_top250_movies.xlsx文件中。")
|