Python实现的下载8000首儿歌的代码分享

作者:junjie 时间:2021-02-03 05:41:51 

下载8000首儿歌的python的代码:


#-*- coding: UTF-8 -*-

from pyquery import PyQuery as py
from lxml import etree
import urllib
import re
import os
import sys
import logging

def format(filename):
    tuple=(' ',''','\'')
    for char in tuple:
        if (filename.find(char)!=-1):
            filename=filename.replace(char,"_")
    return filename

def download_mp3(mp3_url, filename,dir):      
    f = dir+"\\"+filename
    if os.path.exists(f):
      logger.debug(f+" is existed.")
      return
     
    try:
        open(f, 'wb').write(urllib.urlopen(mp3_url).read())
        logger.debug(  filename + ' is downloaded.')
    except:
        logger.debug( filename + ' is not downloaded.')

       
def download_all_mp3(start,end,dir,logger):
  for x in range(start,end):
    try:
        url = "http://www.youban.com/mp3-d" + str(x) + ".html"
        logger.debug(str(x) + ": "+url)
        doc = py(url=url)
        e = doc('.mp3downloadbox')
        if e is None or e == '':
          logger.debug(url+" is not existed.")
          return
         
        e = unicode(e)
        #logger.debug( e)
        regex = re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",re.UNICODE|re.S)
        m = regex.search(e)
        if m is not None:
          title = m.group(1).strip()
          title2 = str(x)+"_"+title + ".mp3"
          #title2 = re.sub(' ','_',title2)
          title2 = format(title2)
          link = m.group(2)
          #logger.debug( "title:" + title + " link:" + link)
          if link == '' or title == '':
            logger.debug(url + " is not useful")
            continue
          logger.debug(str(x)+": "+link)
          download_mp3(link,title2,dir)
    except:
        logger.debug(url+" met exception.")
        continue
     

     
if __name__ == "__main__":
    dir_root = "e:\\song"
    if sys.argv[3] != '': dir_root=sys.argv[3]
   
    start,end = 1,8000
    if sys.argv[1] >= 0 and sys.argv[2]>=0:
      start,end = int(sys.argv[1]),int(sys.argv[2])
      print ("Download from %s to %s.\n" % (start,end))     
   
    dir = dir_root + "\\"+str(start)+"-"+str(end)
    if not os.path.exists(dir):
      os.mkdir(dir)    
    print "Download to " + dir + ".\n"
   
    logger = logging.getLogger("simple")
    logger.setLevel(logging.DEBUG) 
    fh = logging.FileHandler(dir+"\\"+"download.log")
    ch = logging.StreamHandler()
    formatter = logging.Formatter("%(message)s")
    ch.setFormatter(formatter)
    fh.setFormatter(formatter)
    logger.addHandler(ch)
    logger.addHandler(fh)
    download_all_mp3(start,end,dir,logger)

有需要的可以参考继续修改。

标签:Python,下载儿歌
0
投稿

猜你喜欢

  • antd form表单使用setFildesValue 赋值失效的解决

    2024-04-10 13:50:19
  • TCP关闭问题详细介绍

    2022-07-26 16:58:02
  • Python人工智能深度学习CNN

    2023-11-27 06:19:15
  • 详解Python的collections模块中的deque双端队列结构

    2021-08-15 19:35:57
  • 浅析vue中的nextTick

    2024-04-26 17:42:21
  • Pytest 自动化测试框架的使用

    2022-11-12 16:47:46
  • PHP根据key删除数组中指定的元素

    2023-07-17 19:46:27
  • Python文件读取的3种方法及路径转义

    2021-12-27 03:48:28
  • ASP中Session技巧 默认过期时间为20分钟

    2012-12-04 20:28:26
  • YOLOv5构建安全帽检测和识别系统使用详解

    2023-11-30 14:45:00
  • Python中self用法实例详解

    2022-12-18 08:27:48
  • python manim实现排序算法动画示例

    2021-11-10 10:41:58
  • JavaScript判断各种浏览器类型及版本

    2008-09-29 15:17:00
  • Python人工智能学习PyTorch实现WGAN示例详解

    2022-10-20 18:49:32
  • 用JavaScript脚本实现的图灵机

    2009-01-21 18:10:00
  • 详解python3中的真值测试

    2022-03-10 13:56:59
  • 一文秒懂pandas中iloc()函数

    2023-07-31 18:20:42
  • Go语言将string解析为time.Time时两种常见报错

    2024-05-22 17:45:48
  • 走中国特色的网站重构道路

    2010-04-08 16:10:00
  • python实现数组求和与平均值

    2021-09-27 07:15:49
  • asp之家 网络编程 m.aspxhome.com