Automate Invoice Data Entry – Part 2

Prompts

Raw Prompt to Send to Gemini

Extract ALL invoice details across ALL pages.
Return ONLY valid JSON in this structure: 
{
  "InvoiceNumber": "",
  "Date": "",
  "Supplier": "",
  "Total": 0,
  "LineItems": [
    {
      "Description": "",
      "Quantity": 0,
      "UnitPrice": 0,
      "Amount": 0
    }
  ]
}
Combine line items from all pages. JSON only.

File Picker

You are a Senior VBA developer.
Write a sub-procedure to choose one PDF file from a Folder on my computer.
The value of the folder will be stored within a public constant: INVOICE_FOLDER
Capture the full file name within a variable: filename
Follow the below guidelines when building the code:
Assign meaningful names to any variables.
Use camel case to name variables.
Don't add error handling unless explicitly stated.
Don't add message boxes unless explicitly stated.

Convert PDF to PNG Using Ghostscript

You are a Senior VBA developer. 
Write a VBA subroutine named ConvertPdfToPng_MultiPage that uses Ghostscript to convert a PDF into one or more PNG files. 
The path to the PDF folder path will be stored in a constant: INVOICE_FOLDER
The path to the Ghostscript executable file will be stored in a constant: GHOSTSCRIPT_PATH
Follow the below guidelines when building the code:
Assign meaningful names to any variables. 
Use camel case to name variables.
Don't add error handling unless explicitly stated.
Don't add message boxes unless explicitly stated.

Loop over VBA Collection

You are a Senior VBA developer. 
Write a VBA function named GetAllGeneratedPages that loops over every PNG file in folder and stores the full file path in a collection.
The folder path will be stored in a constant: INVOICE_FOLDER
Follow the below guidelines when building the code:
Don't create any constants.
Assign meaningful names to any variables.
Use camel case to name variables.
Don't add error handling unless explicitly stated.
Don't add message boxes unless explicitly stated.

Convert PNG to Base64

Generate two VBA functions that convert an image file into a Base64 string. 
The first function must accept a file path, use ADODB.Stream to read the file in binary mode, load the bytes into a byte array, and pass that array into a second helper function. 
The helper function must take a byte array and return a Base64-encoded string using an MSXML DOM element with the bin.base64 data type. 
Use late binding for all objects and ensure the Base64 output contains no line breaks. 
Keep all function names and parameters exactly as follows: 
EncodeImageToBase64(imagePath As String) As String 
EncodeBase64FromByteArray(bytes() As Byte) As String 
Output the complete VBA code for both functions.

FULL CODE

Option Explicit

Const INVOICE_FOLDER As String = "C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Invoices\"
Const GEMINI_API_URL As String = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key="
Const GHOSTSCRIPT_PATH As String = """C:\Program Files\gs\gs10.05.1\bin\gswin64c.exe"""

Sub MultiLineInvoiceReader()

    Dim fileDialog As fileDialog
    Dim selectedFile As String
    
    PrepareInvoiceSheet
    ' Create a File Picker dialog
    Set fileDialog = Application.fileDialog(msoFileDialogFilePicker)
    
    ' Configure dialog properties
    With fileDialog
        .Title = "Select a PDF File"
        .InitialFileName = INVOICE_FOLDER
        .AllowMultiSelect = False
        .Filters.Clear
        .Filters.Add "PDF Files", "*.pdf"
        
        If .Show = -1 Then
            selectedFile = .SelectedItems(1)
            Else
            MsgBox "No file selected."
            Exit Sub
        End If
    End With
    
    ' Store the selected file path in variable: filename
    Dim fileName As String
    fileName = selectedFile
    fileName = Dir(fileName)
    
    ConvertPdfToPng_MultiPage fileName
    
    Dim pageList As Collection
    Set pageList = GetAllGeneratedPages()
    
    Dim PNGAttachments As String
    PNGAttachments = TransformPNGtoBase64(pageList)
    DeleteTemopraryImages pageList
    
    Dim rawJsonResponse As String
    rawJsonResponse = CallGemini_MultiPage(PNGAttachments)
    
    Dim cleanJsonResponse As String
    cleanJsonResponse = ExtractCleanJSONFromGeminiResponse(rawJsonResponse)
    
    WriteInvoiceHeaderandLines cleanJsonResponse
    
    ThisWorkbook.Sheets("Invoices").Activate
End Sub

Sub PrepareInvoiceSheet()
    Dim ws As Worksheet
    Set ws = ThisWorkbook.Sheets("Invoices")
    ws.Cells.Clear
    ws.Range("A1:H1").Value = Array("InvoiceNumber", "Date", "Supplier", "Total", _
                                            "Description", "Qty", "UnitPrice", "Amount")
    ws.Rows(1).Font.Bold = True
End Sub

Public Sub ConvertPdfToPng_MultiPage(ByVal pdfFileName As String)
    Dim ghostscriptExePath As String
    ghostscriptExePath = GHOSTSCRIPT_PATH
    
    Dim invoiceFolderPath As String
    invoiceFolderPath = INVOICE_FOLDER
    
    Dim inputPdfPath As String
    inputPdfPath = invoiceFolderPath & "\" & pdfFileName
    
    Dim outputPattern As String
    outputPattern = invoiceFolderPath & "\page_%03d.png"
    
    Dim ghostscriptArgs As String
    ghostscriptArgs = " -dNOPAUSE -dBATCH -sDEVICE=png16m" & _
                      " -r300" & _
                      " -sOutputFile=" & Chr(34) & outputPattern & Chr(34) & _
                      " " & Chr(34) & inputPdfPath & Chr(34)
    
    Dim shellCommand As String
    shellCommand = ghostscriptExePath & ghostscriptArgs
    
    Dim shellObj As Object
    Set shellObj = CreateObject("WScript.Shell")
    
    shellObj.Run shellCommand, 0, True
End Sub

Public Function GetAllGeneratedPages() As Collection
    Dim pngFiles As New Collection
    Dim fileName As String
    Dim fullFilePath As String
    
    fileName = Dir(INVOICE_FOLDER & "\*.png")
    
    Do While fileName <> ""
        fullFilePath = INVOICE_FOLDER & "\" & fileName
        pngFiles.Add fullFilePath
        fileName = Dir
    Loop
    
    Set GetAllGeneratedPages = pngFiles
End Function

Function EncodeImageToBase64(imagePath As String) As String
    Dim fileStream As Object
    Dim binaryBytes() As Byte

    Set fileStream = CreateObject("ADODB.Stream")
    fileStream.Type = 1                     'Binary
    fileStream.Open
    fileStream.LoadFromFile imagePath
    binaryBytes = fileStream.Read
    fileStream.Close

    EncodeImageToBase64 = EncodeBase64FromByteArray(binaryBytes)
End Function


Function EncodeBase64FromByteArray(bytes() As Byte) As String
    Dim xmlObj As Object
    Dim xmlNode As Object

    Set xmlObj = CreateObject("MSXML2.DOMDocument")
    Set xmlNode = xmlObj.createElement("Base64Data")

    xmlNode.DataType = "bin.base64"
    xmlNode.NodeTypedValue = bytes

    'Remove any line breaks from Base64 output
    EncodeBase64FromByteArray = Replace(xmlNode.Text, vbLf, "")
    EncodeBase64FromByteArray = Replace(EncodeBase64FromByteArray, vbCr, "")
End Function

Function TransformPNGtoBase64(pageList As Collection) As String

    Dim imgBase64 As String
    Dim parts As String
    Dim i As Long
    
    parts = ""
    For i = 1 To pageList.Count
        imgBase64 = EncodeImageToBase64(pageList(i))
        parts = parts & "{""inline_data"":{""mime_type"":""image/png"",""data"":""" & imgBase64 & """}},"
    Next i
    
    TransformPNGtoBase64 = parts

End Function


Sub DeleteTemopraryImages(pageList As Collection)
    Dim i As Long
    On Error Resume Next
    For i = 1 To pageList.Count
        Kill pageList(i)
    Next i

End Sub

Function CallGemini_MultiPage(PNGAttachments As String) As String

    Dim http As Object
    Dim requestJson As String
    Dim parts As String
    
    parts = PNGAttachments
    
    parts = parts & "{""text"":""" & _
    "Extract ALL invoice details across ALL pages. " & _
    "Return ONLY valid JSON in this structure: " & _
    "{ 'InvoiceNumber':'','Date':'','Supplier':'','Total':0," & _
    "'LineItems':[{'Description':'','Quantity':0,'UnitPrice':0,'Amount':0}] }. " & _
    "Combine line items from all pages. JSON only.""}"
    
    requestJson = "{""contents"":[{""parts"":[" & parts & "]}]}"
    
    Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
    http.Open "POST", GEMINI_API_URL & GEMINI_API_KEY, False
    http.setRequestHeader "Content-Type", "application/json"
    http.Send requestJson
    CallGemini_MultiPage = http.responseText
    
End Function

Function ExtractCleanJSONFromGeminiResponse(jsonResponse As String) As String
    Dim parsed As Object
    Dim rawText As String
    Set parsed = JsonConverter.ParseJson(jsonResponse)
    rawText = parsed("candidates")(1)("content")("parts")(1)("text")
    rawText = Replace(rawText, "```json", "")
    rawText = Replace(rawText, "```", "")
    ExtractCleanJSONFromGeminiResponse = rawText
End Function

Sub WriteInvoiceHeaderandLines(cleanJSON As String)

    Dim ws As Worksheet
    Dim json As Object
    Dim rowHeader As Long
    Dim item As Object
    Set ws = ThisWorkbook.Sheets("Invoices")
    
    Set json = JsonConverter.ParseJson(cleanJSON)
    Dim r As Long
    r = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 1
    For Each item In json("LineItems")
        ws.Cells(r, 1).Value = json("InvoiceNumber")
        ws.Cells(r, 2).Value = json("Date")
        ws.Cells(r, 3).Value = json("Supplier")
        ws.Cells(r, 4).Value = json("Total")
        
        ws.Cells(r, 5).Value = item("Description")
        ws.Cells(r, 6).Value = item("Quantity")
        ws.Cells(r, 7).Value = item("UnitPrice")
        ws.Cells(r, 8).Value = item("Amount")
        r = r + 1
    Next item

End Sub