Datenscraping mittels PowerQuery und automatisierung mit VBA
12.08.2024 14:08:54
MoritzBernhard
Hier der aktuelle VBA-Code:
Sub CreateSheetsForBranchenandBL()
Dim ws As Worksheet
Dim searchTermsWs As Worksheet
Dim newWs As Worksheet
Dim lastRow As Long
Dim i As Long
Dim searchTerm As String
Dim bundesland As String
Dim queryName As String
Dim Workbook As Workbook
Dim powerQueryCode As String
Dim powerQueryCodePart1 As String
Dim powerQueryCodePart2 As String
Dim powerQueryCodePart3 As String
Dim existingSheet As Worksheet
Dim newQueryName As String
' Set the workbook and search terms worksheet
Set Workbook = ThisWorkbook
Set searchTermsWs = Workbook.Sheets("Bundeslandbranchen") ' Name des Blatts mit den Suchbegriffen
' Find the last row with data in column A
lastRow = searchTermsWs.Cells(searchTermsWs.Rows.Count, 1).End(xlUp).Row
' Loop through each cell in column A
For i = 2 To 3
' Get the search term
searchTerm = searchTermsWs.Cells(i, 1).Value
' Check if the worksheet for the current search term already exists
On Error Resume Next
Set existingSheet = Workbook.Sheets(searchTerm)
On Error GoTo 0
' If the sheet does not exist, create it
If existingSheet Is Nothing Then
Set newWs = Workbook.Sheets.Add(After:=Workbook.Sheets(Workbook.Sheets.Count))
newWs.Name = searchTerm
Else
' If the sheet exists, clear previous data
existingSheet.Cells.Clear
Set newWs = existingSheet
End If
' Loop through each cell in column B to get bundesland values
Dim j As Long
Dim startCol As Long
Dim lastBundeslandRow As Long
startCol = 1 ' Start column for the first table
For j = 2 To 3
' Get bundesland and ensure it matches the current searchTerm
bundesland = searchTermsWs.Cells(j, 2).Value
If searchTermsWs.Cells(j, 1).Value = searchTerm Then
' Generate a unique query name for each bundesland
newQueryName = searchTerm & "_" & bundesland
' Define the Power Query M code parts
powerQueryCodePart1 = _
"let" & vbCrLf & _
" LoadData = (searchTerm as text, bundesland as text) =>" & vbCrLf & _
" let" & vbCrLf & _
" LoadPage = (page as number) =>" & vbCrLf & _
" let" & vbCrLf & _
" Url = ""https://firmen.wko.at/"" & searchTerm & ""/"" & bundesland & ""?page="" & Text.From(page)," & vbCrLf & _
" Source = Web.Contents(Url)," & vbCrLf & _
" HtmlContent = Text.FromBinary(Source)," & vbCrLf & _
" ParsedHtml = Html.Table(HtmlContent, {{" & vbCrLf & _
" ""Firmenlink"", "".title-link"", each [Attributes][href]" & vbCrLf & _
" }})," & vbCrLf & _
" RenamedColumns = Table.RenameColumns(ParsedHtml, {{" & vbCrLf & _
" ""Firmenlink"", ""Firmenlink""" & vbCrLf & _
" }})"
powerQueryCodePart2 = _
" in" & vbCrLf & _
" RenamedColumns," & vbCrLf & _
" LoadPagesInBlocks = (startPage as number, endPage as number) as table =>" & vbCrLf & _
" let" & vbCrLf & _
" PageNumbers = {startPage..endPage}," & vbCrLf & _
" LoadPages = List.Transform(PageNumbers, each LoadPage(_))," & vbCrLf & _
" CombinedTable = Table.Combine(LoadPages)" & vbCrLf & _
" in" & vbCrLf & _
" CombinedTable," & vbCrLf & _
" BlockSize = 1," & vbCrLf & _
" PageBlocks = List.Transform({1..2}, each _ * BlockSize - (BlockSize - 1))," & vbCrLf & _
" LoadAllBlocks = List.Transform(PageBlocks, each LoadPagesInBlocks(_, _ + (BlockSize - 1)))," & vbCrLf & _
" CombinedAllBlocks = Table.Combine(LoadAllBlocks)," & vbCrLf & _
" TransformedTable = Table.TransformColumnTypes(CombinedAllBlocks, {{" & vbCrLf & _
" ""Firmenlink"", type text" & vbCrLf & _
" }})"
powerQueryCodePart3 = _
" in" & vbCrLf & _
" TransformedTable" & vbCrLf & _
"in" & vbCrLf & _
" LoadData(""" & searchTerm & """, """ & bundesland & """)"
' Combine the parts into one complete M code string
powerQueryCode = powerQueryCodePart1 & vbCrLf & powerQueryCodePart2 & vbCrLf & powerQueryCodePart3
' Delete the query if it already exists
On Error Resume Next
Workbook.Queries(newQueryName).Delete
On Error GoTo 0
' Add a new query
Workbook.Queries.Add newQueryName, powerQueryCode
' Add the query table to the worksheet in the current column
With newWs.ListObjects.Add(SourceType:=xlSrcExternal, _
Source:="OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=" & newQueryName & ";Extended Properties=''", _
Destination:=newWs.Cells(1, startCol)).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [" & newQueryName & "]")
.Refresh BackgroundQuery:=False
End With
' Adjust start column for the next table
startCol = newWs.Cells(1, newWs.Columns.Count).End(xlToLeft).Column + 2
End If
Next j
Next i
Exit Sub
ErrorHandler:
MsgBox "Fehler bei der Verarbeitung von Suchbegriff: " & searchTerm & vbCrLf & _
"Fehlermeldung: " & Err.Description
Resume Next
End Sub
Anzeige