python爬虫脚本小解
import requests
# 定义要爬取的网站 URL
url = 'https://www.example.com'
# 设置请求头,模拟浏览器请求
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 设置请求参数,如果需要的话
params = {'key1': 'value1', 'key2': 'value2'}
# 发起 GET 请求,设置超时时间
try:
response = requests.get(url, headers=headers, params=params, timeout=10)
except requests.exceptions.RequestException as e:
print(e)
exit()
# 检查响应状态码,如果不是 200,则输出错误信息并退出程序
if response.status_code != 200:
print('Error: Status code:', response.status_code)
exit()
# 从响应中获取 HTML 内容,并保存到文件中
html_content = response.text
with open('output.html', 'w', encoding='utf-8') as f:
f.write(html_content)
# 输出成功信息
print('Success: HTML content saved to output.html')
# 定义要爬取的网站 URL
url = 'https://www.example.com'
# 设置请求头,模拟浏览器请求
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 设置请求参数,如果需要的话
params = {'key1': 'value1', 'key2': 'value2'}
# 发起 GET 请求,设置超时时间
try:
response = requests.get(url, headers=headers, params=params, timeout=10)
except requests.exceptions.RequestException as e:
print(e)
exit()
# 检查响应状态码,如果不是 200,则输出错误信息并退出程序
if response.status_code != 200:
print('Error: Status code:', response.status_code)
exit()
# 从响应中获取 HTML 内容,并保存到文件中
html_content = response.text
with open('output.html', 'w', encoding='utf-8') as f:
f.write(html_content)
# 输出成功信息
print('Success: HTML content saved to output.html')
上一技术:没有了
下一技术:3389爆破监听工具