2018-02-16 GetSameTypeQuestion
阿新 • • 發佈:2018-02-27
pca 下載圖片 jpg tar ext index documents 循環 ati
‘目前存在的BUG ‘圖片補丁存在多個URL Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String) Dim lngRetVal As Long lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0) If lngRetVal = 0 Then DeleteUrlCacheEntry ImageURL ‘清除緩存 ‘MsgBox "成功" Else ‘MsgBox "失敗" End If End Sub Sub LoopGetSubject() Dim StartTime As Variant Dim UsedTime As Variant StartTime = VBA.Timer Dim Sht As Worksheet Set Sht = ThisWorkbook.ActiveSheet With Sht EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row For i = 2 To EndRow SetFontRed .Cells(i, 1).Resize(1, 3) FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8) ExamUrl = .Cells(i, 2).Text Call GetExamTextByUrl(ExamUrl, FindText) Next i End With Set Sht = Nothing UsedTime = VBA.Timer - StartTime Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds") MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds") End Sub Sub GetSubject() SetFontRed Application.ActiveCell FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8) ExamUrl = Application.ActiveCell.Offset(0, -1).Text Call GetExamTextByUrl(ExamUrl, FindText) End Sub Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String) Dim Subject As String Dim Question As String Dim ImageURL As String Dim Answer As String Dim HasGetContent As Boolean Dim docName As String Dim docPath As String Dim Independent As Boolean Dim IsQuestion As Boolean Dim IsAnswer As Boolean Dim oneP As Object Dim nextTag As Object ‘send request With CreateObject("MSXML2.XMLHTTP") .Open "GET", ExamUrl, False .Send WebText = .responsetext ‘Debug.Print WebText End With With CreateObject("htmlfile") .write WebText Set examdiv = .getElementById("sina_keyword_ad_area2") ‘獲取試卷文本內容 ExamText = examdiv.innerText ‘判斷試卷是否含有獨立答案 Independent = ExamText Like "*參考答案*" ‘Debug.Print " Independent "; Independent ‘設定搜集題目Word文檔名稱和路徑 docName = Application.ActiveSheet.Name & "_題目搜集.doc" docPath = ThisWorkbook.Path & "\" & docName ‘判斷某個段落是否為題目/答案的開始 IsQuestion = False IsAnswer = False ‘判斷是否已經提取到內容 HasGetContent = False ‘循環所有段落 For Each oneP In .getElementsByTagName("p") If HasGetContent = False Then ‘判斷某段內容是否為題號行 If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then Subject = "" Question = "" ImageURL = "" Answer = "" ‘開始記錄題幹內容 Subject = oneP.innerText ‘Debug.Print OneP.innerText Else If InStr(oneP.innerText, FindText) = 0 Then ‘過濾不相幹的問題,僅保留符合條件的問題 If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then ‘繼續記錄問題內容 Subject = Subject & oneP.innerText End If End If End If ‘提取題目圖片的地址 Set nextTag = oneP.NextSibling If Not nextTag Is Nothing Then If UCase(nextTag.tagName) = "A" Then If nextTag.HasChildNodes Then If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src") ‘Debug.Print ImageURL End If End If End If End If ‘提取題目的序號和問題的序號 If InStr(oneP.innerText, FindText) > 0 Then SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*") Question = oneP.innerText questionIndex = RegGet(Question, "[\((](\d)[\))].*") ‘Debug.Print "題序:"; SubjectIndex; " 問序: "; questionIndex HasGetContent = True End If Else ‘提取內容後 開始找答案 ‘試卷不含獨立答案,答案就附在每道題後面 If Independent = False Then If IsAnswer = False Then If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then Answer = oneP.innerText IsAnswer = True ‘Exit For End If Else Debug.Print oneP.innerText If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then Exit For Else Answer = Answer & oneP.innerText End If End If Else ‘試卷還有獨立參考答案 ‘判斷某段內容的題號是否符合條件 If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then IsQuestion = True ‘Debug.Print isQuestion End If If IsQuestion = True Then ‘判斷某段內容的問題序號是否符合條件 If IsAnswer = False Then If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then ‘記錄問題答案 Answer = oneP.innerText IsAnswer = True ‘Exit For End If Else Debug.Print oneP.innerText If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then Exit For Else Answer = Answer & oneP.innerText End If End If End If End If End If Next oneP ‘圖片地址處理 ImageURL = Mid(ImageURL, 2) ‘測試 Debug.Print Subject Debug.Print ImageURL Debug.Print Question Debug.Print Answer End With ‘【補丁,有待改進】2017年下半年部分圖片提取不到的問題修正 If Len(ImageURL) = 0 Then hasimagetext = Split(WebText, FindText)(0) hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src"))) ImageURL = Split(hasimagetext, """")(1) End If ‘輸出題目內容到Word文檔 Dim wdApp As Object Dim Doc As Object On Error Resume Next Set wdApp = GetObject(, "Word.Application") On Error GoTo 0 If Not wdApp Is Nothing Then wdApp.Visible = True On Error Resume Next Set Doc = wdApp.documents(docName) On Error GoTo 0 If Doc Is Nothing Then Set Doc = wdApp.documents.Add() Doc.SaveAs docPath End If Else Set wdApp = CreateObject("Word.Application") wdApp.Visible = True Set Doc = wdApp.documents.Add() Doc.SaveAs docPath End If Doc.Activate wdApp.Selection.EndKey 6 wdApp.Selection.TypeParagraph wdApp.Selection.InsertBreak 7 ‘輸出題幹內容 wdApp.Selection.TypeText Text:=Subject wdApp.Selection.TypeParagraph ‘下載圖片並插入WORD文檔 If ImageURL <> "" Then If InStr(ImageURL, "|") = 0 Then ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg" DownloadImageName ImageURL, ImagePath wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True wdApp.Selection.TypeParagraph Kill ImagePath ‘Stop Else ImageURLs = Split(ImageURL, "|") For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1 ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg" DownloadImageName ImageURL, ImagePath wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True wdApp.Selection.TypeParagraph Kill ImagePath Next n End If End If ‘輸出問題內容 wdApp.Selection.TypeText Text:=Question wdApp.Selection.TypeParagraph ‘輸出答案內容 wdApp.Selection.TypeText Text:="【答案】" & Answer wdApp.Selection.TypeParagraph Set wdApp = Nothing Set Doc = Nothing Set oneP = Nothing End Sub Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean ‘傳遞參數 :原字符串, 匹配模式 Dim Regex As Object Set Regex = CreateObject("VBScript.RegExp") With Regex .Global = True .Pattern = Pattern End With RegTest = Regex.test(OrgText) Set Regex = Nothing End Function Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String ‘傳遞參數 :原字符串, 匹配模式 Dim Regex As Object Dim Mh As Object Set Regex = CreateObject("VBScript.RegExp") With Regex .Global = True .Pattern = Pattern End With If Regex.test(OrgText) Then Set Mh = Regex.Execute(OrgText) RegGet = Mh.Item(0).submatches(0) Else RegGet = "" End If Set Regex = Nothing End Function Sub SetFontRed(ByVal Rng As Range) With Rng.Font .Color = -16776961 .TintAndShade = 0 End With End Sub
2018-02-16 GetSameTypeQuestion