爬取实习僧信息并在微信公众号平台展示出来

 

<?php

define("TOKEN", "weixin");                    //token自己改


$wechatObj = new wechatCallbackapiTest();
if (!isset($_GET['echostr'])) {
	$wechatObj->responseMsg();
}else{
    $wechatObj->valid();
}

class wechatCallbackapiTest
{
    public function valid()
    {
        $echoStr = $_GET["echostr"];
        if($this->checkSignature()){
            echo $echoStr;
            exit;
        }
    }

    private function checkSignature()
    {
        $signature = $_GET["signature"];
        $timestamp = $_GET["timestamp"];
        $nonce = $_GET["nonce"];
        $token = TOKEN;
        $tmpArr = array($token, $timestamp, $nonce);
        sort($tmpArr);
        $tmpStr = implode($tmpArr);
        $tmpStr = sha1($tmpStr);

        if($tmpStr == $signature){
            return true;
        }else{
            return false;
        }
    }

    public function responseMsg()
    {
        $postStr = $GLOBALS["HTTP_RAW_POST_DATA"];
        if (!empty($postStr)){
            $postObj = simplexml_load_string($postStr, 'SimpleXMLElement', LIBXML_NOCDATA);
            $RX_TYPE = trim($postObj->MsgType);

            //用户发送的消息类型判断
            switch ($RX_TYPE)
            {
                case "text":
                    $result = $this->receiveText($postObj);
                    break;
                case "image":
                    $result = $this->receiveImage($postObj);
                    break;
                case "voice":
                    $result = $this->receiveVoice($postObj);
                    break;
                case "video":
                    $result = $this->receiveVideo($postObj);
                    break;
                default:
                    $result = "unknow msg type: ".$RX_TYPE;
                    break;
            }
            echo $result;
        }else {
            echo "";
            exit;
        }
    }

    private function receiveText($object)
    {
		$openid=$object->FromUserName;
		$keyword= $object->Content;

		preg_match('/实习(.*)/',$keyword, $match);
		$other= $match[1];
		if(!empty($other)){
		$wz=strpos($match[1],'@');
		if($wz>0){
			$info=explode('@',$match[1]);
			$k=urlencode($info[0]);
			$c=urlencode($info[1]);
			$url='http://www.shixiseng.com/interns?k='.$k.'&c='.$c.'&s=0,0&d=&m=&x=&t=zh&ch=&p=1';
		}else{
			$k=urlencode($match[1]);
			$url="http://www.shixiseng.com/interns?k=".$k."&p=1";
		}

			$curlobj = curl_init();
			curl_setopt($curlobj, CURLOPT_URL, $url);
			curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, true);
			$s=curl_exec($curlobj);
			curl_close($curlobj);

			$s=preg_replace("/[\t\n\r]+/","",$s);//去除换行等信息


			//2.匹配有用信息
			$s=str_replace('<i class="addr">&#xe60b;</i>','',$s);	//去除地址图标
			$s=str_replace('<i class="money">&#xe61b;</i>','',$s);	//去除薪酬图标
			$s=str_replace('<i class="days">&#xe61a;</i>','',$s);	//去除时间图标
			$s=str_replace('<i class="job_time">&#xe60c;</i>','',$s);	//去除时间图标
			$s=str_replace('/天','#天',$s);		//由于匹配时/有问题,因此先替换
			$s=str_replace('/周','#周',$s);		//由于匹配时/有问题,因此先替换

				$e='/<div class=\"job_inf_inf\"><div>.*?<\/div><\/div>/i';
				preg_match_all($e, $s, $matches);
				$s0=$matches[0];
				$num=count($s0);
				if($num>8){ $n=8; }
				else{ $n=$num; }

				if(!empty($s0)){

					$content = array();
					$content[] = array("Title"=>"?亲,实习信息如下",
										"Description"=>"",
										"PicUrl"=>"",
										"Url" =>"");

					for($i=0;$i<$n;$i++){

						$etitle='/<a href=\"(.*?)\" title=\"(.*?)\" target=\"_blank\"><h3>(.*?)<\/h3><\/a>/iUs';
						preg_match($etitle, $s0[$i], $mtitle);	//匹配标题以及链接
						//print_r($mtitle);
						$uu="http://www.shixiseng.com".$mtitle[1];
						$title=$mtitle[3];

						$cominfo='/<p class=""><a href=\"(.*?)\"class=\"company_name\" target=\"_blank\" title="(.*?)">(.*?)<\/a><span>(.*?)<\/span><\/p>/i';
						preg_match($cominfo, $s0[$i], $com);	//匹配公司信息以及需求方向
						//print_r($com);
						$company=$com[3];
						$goal=str_replace('-','',$com[4]);

						$c='/<span class=\"addr_box\" title=\"(.*?)\"><span>(.*?)<\/span><\/span>/i';
						preg_match($c, $s0[$i], $ct);//匹配工作地点
						//print_r($ct);
						$city=$ct[2];

						$m='/<span class=\"money_box\">(.*?)<\/span><span class=\"day_box\">(.*?)<\/span>/i';
						preg_match($m, $s0[$i], $mt);//匹配费用以及工作时间要求
						//print_r($mt);
						$wkmoney=str_replace('#','/',$mt[1]);

						$wktime=str_replace('#','/',$mt[2]);

						$u='/<span class=\"time_box\">(.*?)<\/span>/i';
						preg_match($u, $s0[$i], $ut);//更新时间
						//print_r($ut);
						$uptime=$ut[1];

						$wz1=strpos($s0[$i],'http');
						$wz2=strpos($s0[$i],'"/></a>');
						$len=$wz2-$wz1;
						$img=substr($s0[$i],$wz1,$len);
						//echo $img;

						$xs="职位:".$title."\n方向:".$goal."\n"."公司:".$company." 地点:".$city."\n". "薪酬:".$wkmoney." 时间:".$wktime."\n"."更新时间:".$uptime;

						$content[] = array("Title"=>$xs,   //显示的内容可以修改
											"Description"=>"",
											"PicUrl"=>$img,
											"Url" =>$uu);
					}

						$content[] = array("Title"=>"?? 欢迎推荐给你的好友哦",
										"Description"=>"",
										"PicUrl"=>"",
										"Url" =>"");

						$result = $this->transmitNews($object, $content);

				}
				else{

						$content = array();

						$content[] = array("Title"=>"⚠亲,没有查到具体信息!",
										"Description"=>"",
										"PicUrl"=>"",
										"Url" =>"");

						$content[] = array("Title"=>"??发送实习+工作名称\n如:实习网络\n 或者实习+工作名称@地点\n如:实习网络@绵阳",  //显示的内容可以修改
										"Description"=>"",
										"PicUrl"=>"",
										"Url" =>"");

						$result = $this->transmitNews($object, $content);
					}
		}
		else{
				$content = array();

				$content[] = array("Title"=>"⚠亲,没有查到具体信息!",
										"Description"=>"",
										"PicUrl"=>"",
										"Url" =>"");

				$content[] = array("Title"=>"??发送实习+工作名称\n如:实习网络\n 或者实习+工作名称@地点\n如:实习网络@深圳",  //显示的内容可以修改
										"Description"=>"",
										"PicUrl"=>"",
										"Url" =>"");

				$result = $this->transmitNews($object, $content);
		}
				return $result;
    }

    /*
     * 回复文本消息
     */
    private function transmitText($object, $content)
    {
        $textTpl = "<xml>
<ToUserName><![CDATA[%s]]></ToUserName>
<FromUserName><![CDATA[%s]]></FromUserName>
<CreateTime>%s</CreateTime>
<MsgType><![CDATA[text]]></MsgType>
<Content><![CDATA[%s]]></Content>
</xml>";
        $result = sprintf($textTpl, $object->FromUserName, $object->ToUserName, time(), $content);
        return $result;
    }

    /*
     * 回复图文消息
     */
    private function transmitNews($object, $arr_item)
    {
        if(!is_array($arr_item))
            return;

        $itemTpl = "    <item>
        <Title><![CDATA[%s]]></Title>
        <Description><![CDATA[%s]]></Description>
        <PicUrl><![CDATA[%s]]></PicUrl>
        <Url><![CDATA[%s]]></Url>
    </item>
";
        $item_str = "";
        foreach ($arr_item as $item)
            $item_str .= sprintf($itemTpl, $item['Title'], $item['Description'], $item['PicUrl'], $item['Url']);

        $newsTpl = "<xml>
<ToUserName><![CDATA[%s]]></ToUserName>
<FromUserName><![CDATA[%s]]></FromUserName>
<CreateTime>%s</CreateTime>
<MsgType><![CDATA[news]]></MsgType>
<Content><![CDATA[]]></Content>
<ArticleCount>%s</ArticleCount>
<Articles>
$item_str</Articles>
</xml>";

        $result = sprintf($newsTpl, $object->FromUserName, $object->ToUserName, time(), count($arr_item));
        return $result;
    }

}
?>

使用方法

图片[1]-爬取实习僧信息并在微信公众号平台展示出来-夏末浅笑

© 版权声明
THE END
喜欢就支持一下吧
点赞8 分享
评论 抢沙发
头像
欢迎您留下宝贵的见解!
提交
头像

昵称

取消
昵称表情代码图片

    暂无评论内容