1. 程式人生 > >2018-02-16 GetSameTypeQuestion

2018-02-16 GetSameTypeQuestion

pca 下載圖片 jpg tar ext index documents 循環 ati

‘目前存在的BUG
‘圖片補丁存在多個URL
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
    Dim lngRetVal As Long
    lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
    If lngRetVal = 0 Then
        DeleteUrlCacheEntry ImageURL  ‘清除緩存
        ‘MsgBox "成功"
    Else
        ‘MsgBox "失敗"
    End If
End Sub
Sub LoopGetSubject()
    Dim StartTime As Variant
    Dim UsedTime As Variant
    StartTime = VBA.Timer
    Dim Sht As Worksheet
    Set Sht = ThisWorkbook.ActiveSheet
    With Sht
        EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row
        For i = 2 To EndRow
            SetFontRed .Cells(i, 1).Resize(1, 3)
            FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8)
            ExamUrl = .Cells(i, 2).Text
            Call GetExamTextByUrl(ExamUrl, FindText)
        Next i
    End With
    Set Sht = Nothing
    UsedTime = VBA.Timer - StartTime
    Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
    MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
End Sub
Sub GetSubject()
    SetFontRed Application.ActiveCell
    FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8)
    ExamUrl = Application.ActiveCell.Offset(0, -1).Text
    Call GetExamTextByUrl(ExamUrl, FindText)
End Sub
Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String)
    Dim Subject As String
    Dim Question As String
    Dim ImageURL As String
    Dim Answer As String
    Dim HasGetContent As Boolean
    Dim docName As String
    Dim docPath As String
    Dim Independent As Boolean
    Dim IsQuestion As Boolean
    Dim IsAnswer As Boolean
    Dim oneP As Object
    Dim nextTag As Object
    
    ‘send request
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", ExamUrl, False
        .Send
        WebText = .responsetext
        ‘Debug.Print WebText
    End With
    With CreateObject("htmlfile")
        .write WebText
        Set examdiv = .getElementById("sina_keyword_ad_area2")
        ‘獲取試卷文本內容
        ExamText = examdiv.innerText
        ‘判斷試卷是否含有獨立答案
        Independent = ExamText Like "*參考答案*"
        ‘Debug.Print "  Independent "; Independent
        ‘設定搜集題目Word文檔名稱和路徑
        docName = Application.ActiveSheet.Name & "_題目搜集.doc"
        docPath = ThisWorkbook.Path & "\" & docName
        ‘判斷某個段落是否為題目/答案的開始
        IsQuestion = False
        IsAnswer = False
        ‘判斷是否已經提取到內容
        HasGetContent = False
        ‘循環所有段落
        For Each oneP In .getElementsByTagName("p")
            If HasGetContent = False Then
                ‘判斷某段內容是否為題號行
                If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then
                    Subject = ""
                    Question = ""
                    ImageURL = ""
                    Answer = ""
                    ‘開始記錄題幹內容
                    Subject = oneP.innerText
                    ‘Debug.Print OneP.innerText
                Else
                    If InStr(oneP.innerText, FindText) = 0 Then
                        ‘過濾不相幹的問題,僅保留符合條件的問題
                        If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then
                            ‘繼續記錄問題內容
                            Subject = Subject & oneP.innerText
                        End If
                    End If
                End If
                ‘提取題目圖片的地址
                Set nextTag = oneP.NextSibling
                If Not nextTag Is Nothing Then
                    If UCase(nextTag.tagName) = "A" Then
                        If nextTag.HasChildNodes Then
                            If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
                                ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src")
                                ‘Debug.Print ImageURL
                            End If
                        End If
                    End If
                End If
                
                ‘提取題目的序號和問題的序號
                If InStr(oneP.innerText, FindText) > 0 Then
                    SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*")
                    Question = oneP.innerText
                    questionIndex = RegGet(Question, "[\((](\d)[\))].*")
                    ‘Debug.Print "題序:"; SubjectIndex; "   問序: "; questionIndex
                    HasGetContent = True
                End If
                
            Else
                ‘提取內容後 開始找答案
                ‘試卷不含獨立答案,答案就附在每道題後面
                If Independent = False Then
                    
                    If IsAnswer = False Then
                        If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then
                            Answer = oneP.innerText
                            IsAnswer = True
                            ‘Exit For
                        End If
                    Else
                        Debug.Print oneP.innerText
                        If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
                            Exit For
                        Else
                            Answer = Answer & oneP.innerText
                        End If
                    End If
                    
                    
                    
                    
                Else
                    ‘試卷還有獨立參考答案
                    ‘判斷某段內容的題號是否符合條件
                    If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then
                        IsQuestion = True
                        ‘Debug.Print isQuestion
                    End If
                    If IsQuestion = True Then
                        ‘判斷某段內容的問題序號是否符合條件
                        If IsAnswer = False Then
                            If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then
                                ‘記錄問題答案
                                Answer = oneP.innerText
                                IsAnswer = True
                                ‘Exit For
                            End If
                        Else
                            Debug.Print oneP.innerText
                            If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
                                Exit For
                            Else
                                Answer = Answer & oneP.innerText
                            End If
                        End If
                    End If
                End If
            End If
        Next oneP
        ‘圖片地址處理
        ImageURL = Mid(ImageURL, 2)
        ‘測試
        Debug.Print Subject
        Debug.Print ImageURL
        Debug.Print Question
        Debug.Print Answer
    End With
    
    ‘【補丁,有待改進】2017年下半年部分圖片提取不到的問題修正
    If Len(ImageURL) = 0 Then
        hasimagetext = Split(WebText, FindText)(0)
        hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src")))
        ImageURL = Split(hasimagetext, """")(1)
    End If
    
    ‘輸出題目內容到Word文檔
    Dim wdApp As Object
    Dim Doc As Object
    
    On Error Resume Next
    Set wdApp = GetObject(, "Word.Application")
    On Error GoTo 0
    If Not wdApp Is Nothing Then
        wdApp.Visible = True
        On Error Resume Next
        Set Doc = wdApp.documents(docName)
        On Error GoTo 0
        If Doc Is Nothing Then
            Set Doc = wdApp.documents.Add()
            Doc.SaveAs docPath
        End If
    Else
        Set wdApp = CreateObject("Word.Application")
        wdApp.Visible = True
        Set Doc = wdApp.documents.Add()
        Doc.SaveAs docPath
    End If
    
    Doc.Activate
    wdApp.Selection.EndKey 6
    wdApp.Selection.TypeParagraph
    wdApp.Selection.InsertBreak 7
    ‘輸出題幹內容
    wdApp.Selection.TypeText Text:=Subject
    wdApp.Selection.TypeParagraph
    
    ‘下載圖片並插入WORD文檔
    If ImageURL <> "" Then
        If InStr(ImageURL, "|") = 0 Then
            ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
            DownloadImageName ImageURL, ImagePath
            wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
            wdApp.Selection.TypeParagraph
            Kill ImagePath
            ‘Stop
        Else
            ImageURLs = Split(ImageURL, "|")
            For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1
                ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
                DownloadImageName ImageURL, ImagePath
                wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
                wdApp.Selection.TypeParagraph
                Kill ImagePath
            Next n
        End If
    End If
    ‘輸出問題內容
    wdApp.Selection.TypeText Text:=Question
    wdApp.Selection.TypeParagraph
    ‘輸出答案內容
    wdApp.Selection.TypeText Text:="【答案】" & Answer
    wdApp.Selection.TypeParagraph
    Set wdApp = Nothing
    Set Doc = Nothing
    Set oneP = Nothing
End Sub
Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
‘傳遞參數 :原字符串, 匹配模式
    Dim Regex As Object
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    RegTest = Regex.test(OrgText)
    Set Regex = Nothing
End Function
Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
‘傳遞參數 :原字符串, 匹配模式
    Dim Regex As Object
    Dim Mh As Object
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    If Regex.test(OrgText) Then
        Set Mh = Regex.Execute(OrgText)
        RegGet = Mh.Item(0).submatches(0)
    Else
        RegGet = ""
    End If
    Set Regex = Nothing
End Function
Sub SetFontRed(ByVal Rng As Range)
    With Rng.Font
        .Color = -16776961
        .TintAndShade = 0
    End With
End Sub

  

2018-02-16 GetSameTypeQuestion