下载HDUOJ所有AC题目代码

通过对杭电OJ的观察,很容易找到规律,根据此规则写相应的代码.
Requests 实现模拟登陆
需要用 BeautifulSoup 处理 status 页面的表格, 获得runid,problemid,代码链接,其实runid就是代码链接

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3
#coding=utf-8

from bs4 import BeautifulSoup
import requests
import time

def saveFile(data, fname = "temp.html"):
with open(fname, "w") as f:
f.write(data)
print(fname + '保存成功')

def getCode(codeurl, proid):
url = hdu_url + codeurl
code_html = session.get(url, cookies=cookieJar)
code_html.encoding = 'gb2312'
data = code_html.text
soup = BeautifulSoup(data) #初始化
code = soup.textarea.text #代码区域的标签tag
saveFile(code, 'HDU'+proid + '.cpp') #保存路径以及文件名,我的格式是当前目录,文件名规则为HDU1001.cpp

def getStatus():
first = ''
vis = set() #标记是否保存过
while(True):
payload = {'first':first, "user":uname, "pid":"", "lang":"", "status":5}
status_html = session.get(status_url, cookies=cookieJar, params=payload)
print(status_html.url)
status_html.encoding = 'gb2312' #设置编码
data = status_html.text #转换成str
soup = BeautifulSoup(data) #初始化
runid = "" #每次将first初始化为空

for table in soup.findAll('table'):
for row in table.findAll('tr'):
i = 100
for tr in row.findAll('td'):
if(len(tr.text) == 8 and '1' in tr.text): #找到RunId
runid = tr.text
i = 0
if(i == 3):
proid = tr.text
if(proid in vis):
break
else:
vis |= {proid} #保存过的代码,不再重新保存,节省时间
if(i == 6):
getCode(tr.a.get('href'), proid) #获取到代码的链接,保存代码
time.sleep(1) #休眠一秒,不然服务器反应不过来
i += 1
if(runid == ''): #本页没有runid, 说明是最后一页
break
first = str(int(runid)-1) #下一页 是以本页最后一个Runid - 1,规律


uname = input("请输入用户名: ")
upass = input("请输入密码: ")

hdu_url = "http://acm.hdu.edu.cn/"
login_url = hdu_url + "userloginex.php?action=login"
status_url = hdu_url + "status.php"

login_data = {'username': uname, "userpass": upass,
"login":"Sign In"}
#保持会话
cookieJar = requests.cookies.RequestsCookieJar()
session = requests.Session()

#登陆
login_resp = session.request("POST", login_url,
cookies=cookieJar, data=login_data)

getStatus()