# Python
# 一、Python简介
# 开发工具选择
- vscode开发
- pycharm
# Python pip批量安装和卸载
- 获取已经按照的包
pip freeze
- 导出已安装列表
pip freeze > packages.txt
- 批量卸载
pip uninstall -r packages.txt
- 批量安装
pip install -r packages.txt
# 基本语法
# 二、常用库
# 爬取网页
- 动态UA
ua_info.py
ua_list = [
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Baiduspider',
]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14
- 简单使用 豆瓣搜索西游记网页爬取
import random
import requests
import ua_info
headers = {
'User-Agent': random.choice(ua_info.ua_list)
}
keywords = {
"q": "西游记"
}
# 豆瓣搜索西游记,获取网页数据
r = requests.get("https://www.douban.com/search", params=keywords, headers=headers)
print(r.text)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14