Python爬取620首虾米歌曲,揭秘五月天为什么狂吸粉?!( 二 )


xiami(专辑信息).py
import requestsfrom bs4 import BeautifulSoupimport csvimport timeimport datetimeimport pandas as pdalbumCategory=[]albumId=[]albumLogo=[]albumName=[]artistName=[]collects=[]language=[]playCount=[]recommends=[]songCount=[]albumStringId=[]albumStatus=[]gmtPublish=[]grade=[]gradeCount=[]url='https://www.xiami.com/api/album/getArtistAlbums?_q=%7B%22pagingVO%22:%7B%22page%22:1,%22pageSize%22:60%7D,%22artistId%22:3110,%22category%22:0%7D&_s=dd6d0ef72dda69944fc2fbaa33c5bc6c'headers={'Connection': 'keep-alive','Cookie':'','User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'}res = requests.get(url, headers=headers)content=json.loads(res.text,encoding='utf-8')for album in content['result']['data']['albums']:albumCategory.append(album['albumCategory'])albumId.append(album['albumId'])albumLogo.append(album['albumLogo'])albumName.append(album['albumName'])artistName.append(album['artistName'])collects.append(album['collects'])language.append(album['language'])playCount.append(album['playCount'])recommends.append(album['recommends'])songCount.append(album['songCount'])albumStringId.append(album['albumStringId'])albumStatus.append(album['albumStatus'])gmtPublish.append(datetime.datetime.fromtimestamp(int(album['gmtPublish']/1000)))grade.append(album['grade'])gradeCount.append(album['gradeCount'])result={'专辑种类':albumCategory,'专辑id':albumId,'专辑封面':albumLogo,'专辑名字':albumName,'艺术家':artistName,'收藏':collects,'语言':language,'播放数':playCount,'推荐':recommends,'歌曲数量':songCount,'专辑字符':albumStringId,'状态':albumStatus,'评分':grade,'评分人数':gradeCount,'发布时间':gmtPublish}results=pd.DataFrame(result)results.info()results.to_excel('五月天专辑信息.xlsx')
(歌曲详情).py
【Python爬取620首虾米歌曲,揭秘五月天为什么狂吸粉?!】import requestsfrom bs4 import BeautifulSoupimport timeimport datetimeimport pandas as pdimport reimport randomsongName=[]songIds=[]favCount=[]commentCount=[]lyrics=[]newSubName=[]songwriters=[] #作词composer=[] #作曲arrangement=[] #编曲albumId=[]albumName=[]hotComment1=[]commentLike1=[]playCount=[]df=pd.read_excel('五月天歌曲清单.xlsx')albumString=df['歌曲ID']url='https://www.xiami.com/song/{}'#cookie需要经常保持更新for songid in albumString:print('正在爬取{}'.format(songid))full_url=url.format(songid)headers={'Connection': 'keep-alive','Cookie':'','User-Agent': ''}res = requests.get(full_url, headers=headers)soup=BeautifulSoup(res.text,'html.parser')songName.append(soup.select('.song-name')[0].text)songIds.append(songid)search_data=http://www.kingceram.com/post/re.findall('()(.*?)span class="a86e-8b06-0852-b692 ripple" style="height',res.text,re.S)favCount.append(str(search_data[1])[-10:-3].replace('n','').replace('>','').replace('n',''))if(soup.select('.lyric-content')):lyrics.append(soup.select('.lyric-content')[0].text)else:lyrics.append('无')if(soup.select('.song-subname')):newSubName.append(soup.select('.song-subname')[0].text)else:newSubName.append('')creatInfo=soup.select('.info-value')albumName.append(creatInfo[0].text)songwriters.append(creatInfo[1].text) #作词composer.append(creatInfo[2].text) #作曲arrangement.append(creatInfo[3].text) #编曲playCount.append(soup.select('.count')[0].text[1:])commentCount.append(soup.select('.count'))#第一条热门评论if(soup.select('.comment-text')):hotComment1.append(soup.select('.comment-text')[0].text)else:hotComment1.append('')#第一条热门评论的点赞数(其实数据不太准) if(len(soup.select('.count'))>8):commentLike1.append(soup.select('.count')[8].text)else:commentLike1.append('无')time.sleep(random.random()*5)result={'歌曲名字':songName,'别名':newSubName,'歌曲Id':songIds,'收藏数量':favCount,'播放数量':playCount,'评论数量':commentCount,'作词':songwriters,'作曲':composer,'编曲':arrangement,'专辑名字':albumName,'歌词':lyrics,'热门评论1':hotComment1,'热门评论赞1':commentLike1}results=pd.DataFrame(result)results.info()results.to_excel('五月天歌曲详情1500.xlsx')