python进阶-04课程源码

后端

74.21KB

37 需要积分: 1

立即下载

资源介绍:

python进阶-04课程源码

# Scrapy settings for tutorial project # # For simplicity, this file contains only settings considered important or # commonly used. You can find more settings consulting the documentation: # # https://docs.scrapy.org/en/latest/topics/settings.html # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html # https://docs.scrapy.org/en/latest/topics/spider-middleware.html import random BOT_NAME = 'tutorial' SPIDER_MODULES = ['tutorial.spiders'] NEWSPIDER_MODULE = 'tutorial.spiders' # Crawl responsibly by identifying yourself (and your website) on the user-agent #USER_AGENT = 'tutorial (+http://www.yourdomain.com)' # Obey robots.txt rules ROBOTSTXT_OBEY = False # Configure maximum concurrent requests performed by Scrapy (default: 16) #CONCURRENT_REQUESTS = 32 # Configure a delay for requests for the same website (default: 0) # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay # See also autothrottle settings and docs DOWNLOAD_DELAY = 0 #每次请求间隔 0 秒 # The download delay setting will honor only one of: #CONCURRENT_REQUESTS_PER_DOMAIN = 16 #CONCURRENT_REQUESTS_PER_IP = 16 # Disable cookies (enabled by default) #COOKIES_ENABLED = False # Disable Telnet Console (enabled by default) #TELNETCONSOLE_ENABLED = False # Override the default request headers: #DEFAULT_REQUEST_HEADERS = { # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # 'Accept-Language': 'en', #} # Enable or disable spider middlewares # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html #SPIDER_MIDDLEWARES = { # 'tutorial.middlewares.TutorialSpiderMiddleware': 543, #} # Enable or disable downloader middlewares # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html #DOWNLOADER_MIDDLEWARES = { # 'tutorial.middlewares.TutorialDownloaderMiddleware': 543, #} # Enable or disable extensions # See https://docs.scrapy.org/en/latest/topics/extensions.html #EXTENSIONS = { # 'scrapy.extensions.telnet.TelnetConsole': None, #} # Configure item pipelines # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html ITEM_PIPELINES = { # 'tutorial.pipelines.TutorialPipeline': 300, # 'tutorial.save_Image_pipeline.SaveImagePipeline': 300, # 'tutorial.video_download_pipeline.VideoDownloadPipeline': 500, 'tutorial.text_download_pipeline.TextDownloadPipeline':300 } # Enable and configure the AutoThrottle extension (disabled by default) # See https://docs.scrapy.org/en/latest/topics/autothrottle.html #AUTOTHROTTLE_ENABLED = True # The initial download delay #AUTOTHROTTLE_START_DELAY = 5 # The maximum download delay to be set in case of high latencies #AUTOTHROTTLE_MAX_DELAY = 60 # The average number of requests Scrapy should be sending in parallel to # each remote server #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 # Enable showing throttling stats for every response received: #AUTOTHROTTLE_DEBUG = False # Enable and configure HTTP caching (disabled by default) # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings #HTTPCACHE_ENABLED = True #HTTPCACHE_EXPIRATION_SECS = 0 #HTTPCACHE_DIR = 'httpcache' #HTTPCACHE_IGNORE_HTTP_CODES = [] #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' ####################################### #图片处理 ####################################### #设置图片过期时间天数 #专业图片服务器：FastDFS, TFS IMAGES_EXPIRES = 90 # IMAGES_THUMBS = { 'small': (50, 50), 'big': (270, 270), } import os IMAGES_STORE = 'F:\\zhaoxi_project\\base_project\\part2_spider\\day04\\toscrape\\images' if not os.path.exists(IMAGES_STORE): os.makedirs(IMAGES_STORE) #linux IMAGES_STORE = '/opt/images' ################################################ #日志功能 #debug, info, error, warn 从低到高 #level 设置打印日志的水位线 #打印日志原则：level=info，凡是比level的优先级高或者等，则打印日志 #线上日志一般level=error LOG_LEVEL = "INFO" from datetime import datetime LOG_DIR = "log" if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) today = datetime.now() LOG_FILE = f"{LOG_DIR}/scrapy_{today.year}_{today.month}_{today.day}.log" ################################################### USER_AGENT_LIST = [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" ] USER_AGENT = random.choice(USER_AGENT_LIST)

资源文件列表:

python进阶-04课程源码.zip 大约有82个文件

__pycache__/
__pycache__/main.cpython-36.pyc 351B
api/
api/__init__.py
api/__pycache__/
api/__pycache__/__init__.cpython-36.pyc 100B
api/crawler/
api/crawler/__pycache__/
api/crawler/__pycache__/fanyi.cpython-36.pyc 2.97KB
api/crawler/__pycache__/params.cpython-36.pyc 372B
api/crawler/fanyi.py 5.19KB
api/crawler/params.py 200B
api/crawler/text.html 12.97KB
api/system/
api/system/__pycache__/
api/system/__pycache__/user.cpython-36.pyc 3.54KB
api/system/user/
api/system/user/__pycache__/
api/system/user/__pycache__/params.cpython-36.pyc 650B
api/system/user/__pycache__/user.cpython-36.pyc 3.1KB
api/system/user/params.py 310B
api/system/user/user.py 3.96KB
common/
common/common/
common/commonclass/
config/
config/db_config.ini 114B
main.py 4.67KB
scrapy_project.py 185B
tutorial/
tutorial/DownLoadText/
tutorial/geckodriver.log 34.63KB
tutorial/images/
tutorial/log/
tutorial/log/scrapy_2024_11_25.log 98.5KB
tutorial/log/scrapy_2024_11_26.log 49.63KB
tutorial/quotes-1.html 10.8KB
tutorial/quotes-2.html 13.42KB
tutorial/quotes.jsonlines 10.84KB
tutorial/scrapy.cfg 259B
tutorial/tutorial/
tutorial/tutorial/__init__.py
tutorial/tutorial/__pycache__/
tutorial/tutorial/__pycache__/__init__.cpython-36.pyc 146B
tutorial/tutorial/__pycache__/items.cpython-36.pyc 732B
tutorial/tutorial/__pycache__/pipelines.cpython-36.pyc 612B
tutorial/tutorial/__pycache__/save_Image_pipeline.cpython-36.pyc 1.01KB
tutorial/tutorial/__pycache__/settings.cpython-36.pyc 2.87KB
tutorial/tutorial/__pycache__/text_download_pipeline.cpython-36.pyc 1.09KB
tutorial/tutorial/__pycache__/video_download_pipeline.cpython-36.pyc 1.58KB
tutorial/tutorial/items.py 754B
tutorial/tutorial/middlewares.py 3.57KB
tutorial/tutorial/pipelines.py 362B
tutorial/tutorial/save_Image_pipeline.py 1.24KB
tutorial/tutorial/settings.py 6.4KB
tutorial/tutorial/spiders/
tutorial/tutorial/spiders/__init__.py 161B
tutorial/tutorial/spiders/__pycache__/
tutorial/tutorial/spiders/__pycache__/__init__.cpython-36.pyc 154B
tutorial/tutorial/spiders/__pycache__/quotes_spider.cpython-36.pyc 2.38KB
tutorial/tutorial/spiders/__pycache__/xbiqugu.cpython-36.pyc 1.7KB
tutorial/tutorial/spiders/dload_files.py 1.05KB
tutorial/tutorial/spiders/quotes_spider.py 3.58KB
tutorial/tutorial/spiders/xbiqugu.py 1.54KB
tutorial/tutorial/text_download_pipeline.py 1.27KB
tutorial/tutorial/video_download_pipeline.py 1.36KB
tutorial/videos/
utils/
utils/__init__.py
utils/__pycache__/
utils/__pycache__/__init__.cpython-312.pyc 148B
utils/__pycache__/__init__.cpython-36.pyc 134B
utils/__pycache__/config_helper.cpython-36.pyc 862B
utils/__pycache__/module1.cpython-312.pyc 270B
utils/__pycache__/module1.cpython-36.pyc 250B
utils/__pycache__/module2.cpython-312.pyc 270B
utils/__pycache__/module2.cpython-36.pyc 250B
utils/__pycache__/orm_helper.cpython-36.pyc 6.75KB
utils/__pycache__/pymysql_helper.cpython-36.pyc 2.37KB
utils/config_helper.py 867B
utils/orm_helper.py 5.83KB
utils/pymysql_helper.py 2.86KB