text scraperwiki数据

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了text scraperwiki数据相关的知识,希望对你有一定的参考价值。

Option Explicit
Public Sub ousefulMashup()
    ' thanks to tony hirst for the data and method.
    ' http://blog.ouseful.info/2013/05/05/questioning-election-data-to-see-if-it-has-a-story-to-tell/
    Dim ds As cDataSet, dr As cDataRow, a As Variant, _
        worksheetName As String, scraperName As String, _
        job As cJobject, joc As cJobject, inWard As String, _
        n As Long
    worksheetName = "questionElection"
    scraperName = "iw_poll_notices_scrape"
    
    ' get data from Tony's scraperwiki and populate sheet
    With scraperWikiStuff(scraperName, worksheetName)
        Set ds = New cDataSet
        ds.load worksheetName
    End With
    
    ' add extra columns
    With lastCell(ds.headingRow.where)
        .Offset(, 1).value = "postcode"
        .Offset(, 2).value = "in ward"
    End With
    ds.tearDown
    
    ' repopulate with new columns
    Set ds = New cDataSet
    With ds.load(worksheetName)
        ' extract post code
        For Each dr In ds.rows
            a = Split(dr.toString("address"), ",")
            If arrayLength(a) > 0 Then
                dr.cell("postcode").value = Trim(CStr(a(UBound(a))))
            End If
        Next dr
        .bigCommit
        .tearDown
    End With
    
    ' use mysociety api to get ward info
    ' these options will not bother trying to populate
    
    With restQuery(worksheetName, "my society", , "postcode", _
        , , , False, False)
        ' check for jobjects of type UTE
        n = 0
        For Each job In .jObjects
            n = n + 1
            inWard = "out"
            If Not job.childExists("areas") Is Nothing Then
                For Each joc In job.child("areas").children
                    If Not joc.childExists("type") Is Nothing Then
                        If joc.child("type").value = "UTE" Then
                        ' we have the right type, check name matches
                            If makeKey(joc.child("name").value) = _
                                makeKey(.dSet.value(n, "ward")) Then
                                inWard = "in"
                                Exit For
                            End If
                        End If
                    End If
                Next joc
                ' mark whether its in our out
                .dSet.cell(n, "in ward").value = inWard
            End If
        Next job
        .dSet.bigCommit
        .tearDown
    End With

End Sub
Public Sub swSeewhatworks()
    Dim ds As New cDataSet, dr As cDataRow
    ds.populateData wholeSheet("scraperwiki"), , , , , , True
    Application.Calculation = xlCalculationManual
    For Each dr In ds.rows
        dr.where.Resize(, 1).Offset(, dr.columns.count).value = _
            swGetDefaultTableSql(dr.toString("short_name"), False)
    Next dr
    Application.Calculation = xlCalculationAutomatic
    Set ds = Nothing
End Sub
Public Sub testScraperWikiInput()
    testScraperWikiData InputBox("shortname?")
End Sub
Public Sub testScraperWikiData(shortName As String)
    scraperWikiStuff shortName, "scraperwikidata"
End Sub

Private Function swGetTables(shortName As String) As cRest

    Const tableDirectory = "SELECT name FROM sqlite_master " & _
        "WHERE type IN ('table','view') AND name NOT LIKE 'sqlite_%' " & _
        "Union all " & _
        "SELECT name FROM sqlite_temp_master " & _
        "WHERE type IN ('table','view') " & _
        "ORDER BY 1"
    ' lets see if we can get the tables that exist in this shaperwiki
    Set swGetTables = restQuery(, "scraperwikidata", _
       shortName & "&query=" & tableDirectory, , , , , False)
    
    
End Function
Private Function swGetDefaultTableSql(shortName As String, Optional complain As Boolean = True) As String
    ' this will look up to see what tables are defined in a given scraperwiki
    Dim s As String, cr As cRest
    Set cr = swGetTables(shortName)
    If cr Is Nothing Then
        MsgBox ("could get info on " & shortName)
    Else
      
      If cr.jObject.hasChildren Then
        ' this is hokey - for the moment just take from the first table found
        swGetDefaultTableSql = "select * from '" & _
                cr.jObject.children(1).child("name").toString & "'"
      Else
        If complain Then MsgBox ("could not find any valid tables for " & _
            shortName & "(" & cr.jObject.serialize & ")")
      End If
    End If
End Function
Private Function scraperWikiStuff(shortName As String, ws As String, _
            Optional optSql As String = vbNullString, Optional optLimit As Long = 0) As cDataSet
    Dim cr As cRest, ds As cDataSet, job As cJobject, r As Range, _
        cj As cJobject, headJob As cJobject, sql As String, limit As String

    sql = optSql
    If sql = vbNullString Then
        sql = swGetDefaultTableSql(shortName)
    End If
    If optLimit <> 0 Then
        limit = "&limit=" & CStr(optLimit)
    End If
   
    ' get the data
    Set cr = restQuery(, "scraperwikidata", _
       shortName & "&query=" & sql & limit, , , , , False)
    
    ' now organize it
    If Not cr Is Nothing Then
       ' get the unique headers and put them to a clean data set
       Set headJob = swGetHeaders(cr.jObject)
       If headJob Is Nothing Then
            MsgBox ("didnt work at all " & cr.jObject.serialize)
       
       Else
            Set ds = swCleanSheet(headJob, ws)
            If ds Is Nothing Then
                MsgBox ("failed to get the expected data " & cr.jObject.serialize)
            Else
                Application.Calculation = xlCalculationManual
                With ds
                    Set r = firstCell(.headingRow.where)
                    ' this is the data returned - each array member is a row
                    For Each cj In cr.jObject.children
                        Set r = r.Offset(1)
                        ' each child is a column
                        For Each job In cj.children
                         r.Offset(, .headingRow.exists(job.key).column - 1).value = job.value
                        Next job
                    Next cj
                     ' repopulate
                    Set scraperWikiStuff = .rePopulate
                End With
                Application.Calculation = xlCalculationAutomatic
            End If
        End If
    End If
End Function
Private Function swCleanSheet(job As cJobject, ws As String) As cDataSet
    ' put headers to a clean sheet
    Dim ds As New cDataSet, cj As cJobject, r As Range
    Set r = firstCell(wholeSheet(ws))
    r.Worksheet.Cells.ClearContents
    ' these are the headings
    
    If job.children.count > 0 Then
        For Each cj In job.children
            r.Offset(, cj.childIndex - 1).value = cj.key
        Next cj
        ' create a data set
        
        Set swCleanSheet = ds.populateData(r.Resize(1, job.children.count))
    End If
End Function
Private Function swGetHeaders(job As cJobject) As cJobject
    ' take scraper wiki data and generate an organized dataset using the headers found
    Dim cj As cJobject, jo As cJobject, cjKeys As New cJobject
    With cjKeys.init(Nothing)
        For Each cj In job.children
            For Each jo In cj.children
            ' we can use a cjobject as a collection
                .add jo.key
            Next jo
        Next cj
    End With
    Set swGetHeaders = cjKeys
End Function

以上是关于text scraperwiki数据的主要内容,如果未能解决你的问题,请参考以下文章

如何在 Windows 上安装 Poppler?

通过 Python 中的下载按钮执行下载

mssql sqlserver text数据类型专题说明

MySQL 中的 BLOB 和 TEXT 数据类型有啥区别?

MAX(text) 返回操作数数据类型 text 对于 max 运算符无效。在 sql server 2008 中

数据量过大,导致数据库text存不下