欧美极品高清xxxxhd,国产日产欧美最新,无码AV国产东京热AV无码,国产精品人与动性XXX,国产传媒亚洲综合一区二区,四库影院永久国产精品,毛片免费免费高清视频,福利所导航夜趣136
標題:
關于用socket實現一個超簡單的爬蟲
[打印本頁]
作者:
19981998
時間:
2018-11-29 20:42
標題:
關于用socket實現一個超簡單的爬蟲
#僅供參考
import socket
import ssl
import re
import os
url = "http://csse.xjtlu.edu.cn/classes/CSE205/"
protocol = url.split('://')[0]
u = url.split('://')[1]
i = u.find('/')
host = u[:i]
path = u[i:]
file_path='D:\\{}'.format(host)
def get_html(url):
if protocol =='https':
sock= ssl.wrap_socket(socket.socket())
port = 443
else:
sock = socket.socket()
port = 80
sock.connect((host,port))
request = 'GET {} HTTP/1.1\r\nhost:{}\r\n\r\n'.format(path, host)
print( request)
sock.send(request.encode())
response = b''
buffer_size = 1024
while True:
r = sock.recv(buffer_size)
response += r
if len(r) < buffer_size:
break
response = response.decode()
print(response)
return response
def get_img(response):
imgre = re.compile(r"""<img\s.*?\s?src\s*=\s*['|"]?([^\s'"]+).*?>""",re.I)
imglist = re.findall(imgre,response)
for src in imglist:
if protocol =='https':
sock2= ssl.wrap_socket(socket.socket())
port = 443
else:
sock2= socket.socket()
port = 80
sock2.connect((host,port))
request2='GET {}{} HTTP/1.1\r\nhost:{}\r\n\r\n'.format(path,src,host)
print(request2)
sock2.send(request2.encode())
response2 = b''
buffer_size = 1024
while True:
r = sock2.recv(buffer_size)
response2 += r
if len(r) < buffer_size:
break
data=response2.split("\r\n\r\n".encode())[1]
src=src.replace('/','.');
file_path='D:\\{}\{}'.format(host,src)
with open (file_path,'wb') as f:
f.write(data)
def mkdir(file_path):
isExists=os.path.exists(file_path)
if not isExists:
os.makedirs(file_path)
return True
else:
return False
mkdir(file_path)
response = get_html(url)
get_img(response)
歡迎光臨 (http://m.raoushi.com/bbs/)
Powered by Discuz! X3.1