Cyrus Flag

flag{S0_bangbang_7ha7_u_f1nd_h3r3}

爬虫入门 0x00 单页面抓取文件

抓取一个页面上指定类型的文件并保存。

最后一版Python2.7写的代码,之后就开始用3.6了。

最后一版不会用re写的代码,之后……之后大概也没怎么学吧。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54

# Author = Cyrus# Py Edition = 2.7import requests
import os

os.system("cls")
global success
success=0
global error
error=0

def download(url,filename):
global success
global error
s=requests.get(url,stream=True)
if (s.status_code==200):
with open(filename, 'wb') as f:
for chunk in s:
f.write(chunk)
print ' - Sucess'
success+=1
else:
print ' - Error'
error+=1

def find(url,a,path):
print "******************************************"
print "URL = ",url
print "FILETYPE = ",a
s=requests.post(url=url).content
while (s.partition(a)[1]!=""):
r1=url+s.partition(a)[0]
while (r1.partition("\"")[1]!=""):
r1=r1.partition("\"")[2]
filename=r1
while (filename.partition("/")[1]!=""):
filename=filename.partition("/")[2]
filename=path+filename+a
if (r1.partition("http://")[1]!=""):
r1=r1+a
else:
r1=url+r1+a
print r1,
download(r1,filename)
s=s.partition(a)[2]
print ""

url="http://www.uestc.edu.cn/"
find(url,".jpg","")
find(url,".png","")
find(url,".html","")

print "******************************************"
print "Success download ",success," file(s),"
print "Failed download ",error," file(s)."

【运行】