神码ai火车头采集器伪原创【php源码】( 五 )


5.3 豆瓣电影 Top 250
结果保存到excel中的
import requests;from bs4 import BeautifulSoupimport xlwt# https://movie.douban.com/top250?start=25&filter=headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.42'}#获取翻页页面连接def top250Urls():purls=[]urls=[]for i in range(0,25,25):url = 'https://movie.douban.com/top250?start='+str(i)+'&filter='purls.append(url)for purl in purls:html=requests.get(url=purl,headers=headers).content.decode('utf-8')soup=BeautifulSoup(html,'html.parser')movie_div = soup.find_all('div', class_='item')for movie in movie_div:movieUrl = movie.find_all("div", class_="pic")[0]movieUrl=movieUrl.find('a')movieUrl=movieUrl.attrs.get('href')urls.append(movieUrl)return urls,def Top250():moviesTop=[]urls=top250Urls()[0]for url in urls:html=requests.get(url=url,headers=headers).content.decode('utf-8')soup=BeautifulSoup(html,"html.parser")title=soup.find_all('span',property="v:itemreviewed")[0].text;move_info=soup.find_all('div',id="info")[0]performer=move_info.find_all('a',rel="v:starring")actors=[]for per in performer:actors.append(per.text)typeSpan=move_info.find_all('span',property="v:genre")types=[]for type in typeSpan:types.append(type.text)content = soup.find_all('span', property="v:summary")[0].text.strip('\n')movies={'title': title,'performer': actors,'type': types,'content': content}moviesTop.append(movies)WriteExcle(moviesTop)# return moviesTop;def WriteExcle(movies):try:#1、创建Workbook对象,就是创建Excel文件work_boot=xlwt.Workbook(encoding='utf-8');#2、创建sheet表单sheet=work_boot.add_sheet('formName') #formName为表单名称#3、写入Excel表头header=['电影名','演员','类型','电影简介'];for i in range(len(header)):sheet.write(0,i,header[i]);#write(行,列,内容)#写入Excel内容for i inrange(len(movies)):sheet.write(i+1,0,movies[i]['title'])sheet.write(i+1, 1, movies[i]['performer'])sheet.write(i+1, 2, movies[i]['type'])sheet.write(i+1, 3, movies[i]['content'])#保存文件work_boot.save("小电影.xls")print('写入成功!')except:print('写入失败!')Top250()