This article mainly introduces Asp to use Microsoft.XMLHTTP to crawl web content (no garbled characters) and filter the required content
Asp uses Microsoft.XMLHTTP to crawl web content (no garbled code) and filter the required content
Sample source code:
Copy the code code as follows:
<%
Dim xmlUrl,http,strHTML,strBody
xmlUrl = Request.QueryString("u")
REM reads XML source asynchronously
Set http = server.CreateObject("Microsoft.XMLHTTP")
http.Open "POST",xmlUrl,false
http.setrequestheader "User-Agent", "Mozilla/4.0"
http.setrequestheader "Connection", "Keep-Alive"
http.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
http.Send()
strHTML = BytesToBstr(http.ResponseBody)
set http = nothing
REM captures the main content
strBody = GetBody(strHTML,"<divDiv_newsContentc""cnt"">","</div>",0,0)
strBody =Replace(strBody,"(This article was first published in ","")
strBody =Replace(strBody,"Wealth Power Network</a>, please indicate the source for reprinting.)","")
strBody =Replace(strBody,"This article was first published in, please indicate the source when reprinting.)","")
strBody =Replace(strBody,"Wealth Power Network</a>:","")
strBody =Replace(strBody,"This article was first published in","")
Response.Write RegRemoveHref(strBody)
REM gets the HTML of the corresponding URL response
Function BytesToBstr(body)
dim objstream
set objstream = Server.CreateObject("adodb.stream")
objstream.Type = 1
objstream.Mode =3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = "UTF-8"
'Convert the original default UTF-8 encoding to GB2312 encoding, otherwise use it directly
'XMLHTTP will get garbled code when calling a webpage with Chinese characters.
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
REM uses regular expressions to capture the content marked within
Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then
GetBody="$False$"
Exit Function
End If
DimConStrTemp
Dim Start,Over
ConStrTemp=Lcase(ConStr)
StartStr=Lcase(StartStr)
OverStr=Lcase(OverStr)
Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
If Start<=0 then
GetBody="$False$"
Exit Function
Else
If IncluL=False Then
Start=Start+LenB(StartStr)
End If
End If
Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
If Over<=0 Or Over<=Start then
GetBody="$False$"
Exit Function
Else
If InclR=True Then
Over=Over+LenB(OverStr)
End If
End If
GetBody=MidB(ConStr,Start,Over-Start)
End Function
REM filters a hyperlink
Function RegRemoveHref(HTMLstr)
Set ra = New RegExp
ra.IgnoreCase = True
ra.Global = True
ra.Pattern = "<a[^>]+>(.+?)<//a>"
RegRemoveHref = Replace(ra.replace(HTMLstr,"$1"),"href=""http://www.927953.com""","")
END Function
%>
The renderings are as follows: