Visit my Youtube Channel
Automate Invoice Data Entry – Part 2
Prompts
Raw Prompt to Send to Gemini
Extract ALL invoice details across ALL pages.
Return ONLY valid JSON in this structure:
{
"InvoiceNumber": "",
"Date": "",
"Supplier": "",
"Total": 0,
"LineItems": [
{
"Description": "",
"Quantity": 0,
"UnitPrice": 0,
"Amount": 0
}
]
}
Combine line items from all pages. JSON only.
File Picker
You are a Senior VBA developer.
Write a sub-procedure to choose one PDF file from a Folder on my computer.
The value of the folder will be stored within a public constant: INVOICE_FOLDER
Capture the full file name within a variable: filename
Follow the below guidelines when building the code:
Assign meaningful names to any variables.
Use camel case to name variables.
Don't add error handling unless explicitly stated.
Don't add message boxes unless explicitly stated.
Convert PDF to PNG Using Ghostscript
You are a Senior VBA developer.
Write a VBA subroutine named ConvertPdfToPng_MultiPage that uses Ghostscript to convert a PDF into one or more PNG files.
The path to the PDF folder path will be stored in a constant: INVOICE_FOLDER
The path to the Ghostscript executable file will be stored in a constant: GHOSTSCRIPT_PATH
Follow the below guidelines when building the code:
Assign meaningful names to any variables.
Use camel case to name variables.
Don't add error handling unless explicitly stated.
Don't add message boxes unless explicitly stated.
Loop over VBA Collection
You are a Senior VBA developer.
Write a VBA function named GetAllGeneratedPages that loops over every PNG file in folder and stores the full file path in a collection.
The folder path will be stored in a constant: INVOICE_FOLDER
Follow the below guidelines when building the code:
Don't create any constants.
Assign meaningful names to any variables.
Use camel case to name variables.
Don't add error handling unless explicitly stated.
Don't add message boxes unless explicitly stated.
Convert PNG to Base64
Generate two VBA functions that convert an image file into a Base64 string.
The first function must accept a file path, use ADODB.Stream to read the file in binary mode, load the bytes into a byte array, and pass that array into a second helper function.
The helper function must take a byte array and return a Base64-encoded string using an MSXML DOM element with the bin.base64 data type.
Use late binding for all objects and ensure the Base64 output contains no line breaks.
Keep all function names and parameters exactly as follows:
EncodeImageToBase64(imagePath As String) As String
EncodeBase64FromByteArray(bytes() As Byte) As String
Output the complete VBA code for both functions.
FULL CODE
Option Explicit
Const INVOICE_FOLDER As String = "C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Invoices\"
Const GEMINI_API_URL As String = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key="
Const GHOSTSCRIPT_PATH As String = """C:\Program Files\gs\gs10.05.1\bin\gswin64c.exe"""
Sub MultiLineInvoiceReader()
Dim fileDialog As fileDialog
Dim selectedFile As String
PrepareInvoiceSheet
' Create a File Picker dialog
Set fileDialog = Application.fileDialog(msoFileDialogFilePicker)
' Configure dialog properties
With fileDialog
.Title = "Select a PDF File"
.InitialFileName = INVOICE_FOLDER
.AllowMultiSelect = False
.Filters.Clear
.Filters.Add "PDF Files", "*.pdf"
If .Show = -1 Then
selectedFile = .SelectedItems(1)
Else
MsgBox "No file selected."
Exit Sub
End If
End With
' Store the selected file path in variable: filename
Dim fileName As String
fileName = selectedFile
fileName = Dir(fileName)
ConvertPdfToPng_MultiPage fileName
Dim pageList As Collection
Set pageList = GetAllGeneratedPages()
Dim PNGAttachments As String
PNGAttachments = TransformPNGtoBase64(pageList)
DeleteTemopraryImages pageList
Dim rawJsonResponse As String
rawJsonResponse = CallGemini_MultiPage(PNGAttachments)
Dim cleanJsonResponse As String
cleanJsonResponse = ExtractCleanJSONFromGeminiResponse(rawJsonResponse)
WriteInvoiceHeaderandLines cleanJsonResponse
ThisWorkbook.Sheets("Invoices").Activate
End Sub
Sub PrepareInvoiceSheet()
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets("Invoices")
ws.Cells.Clear
ws.Range("A1:H1").Value = Array("InvoiceNumber", "Date", "Supplier", "Total", _
"Description", "Qty", "UnitPrice", "Amount")
ws.Rows(1).Font.Bold = True
End Sub
Public Sub ConvertPdfToPng_MultiPage(ByVal pdfFileName As String)
Dim ghostscriptExePath As String
ghostscriptExePath = GHOSTSCRIPT_PATH
Dim invoiceFolderPath As String
invoiceFolderPath = INVOICE_FOLDER
Dim inputPdfPath As String
inputPdfPath = invoiceFolderPath & "\" & pdfFileName
Dim outputPattern As String
outputPattern = invoiceFolderPath & "\page_%03d.png"
Dim ghostscriptArgs As String
ghostscriptArgs = " -dNOPAUSE -dBATCH -sDEVICE=png16m" & _
" -r300" & _
" -sOutputFile=" & Chr(34) & outputPattern & Chr(34) & _
" " & Chr(34) & inputPdfPath & Chr(34)
Dim shellCommand As String
shellCommand = ghostscriptExePath & ghostscriptArgs
Dim shellObj As Object
Set shellObj = CreateObject("WScript.Shell")
shellObj.Run shellCommand, 0, True
End Sub
Public Function GetAllGeneratedPages() As Collection
Dim pngFiles As New Collection
Dim fileName As String
Dim fullFilePath As String
fileName = Dir(INVOICE_FOLDER & "\*.png")
Do While fileName <> ""
fullFilePath = INVOICE_FOLDER & "\" & fileName
pngFiles.Add fullFilePath
fileName = Dir
Loop
Set GetAllGeneratedPages = pngFiles
End Function
Function EncodeImageToBase64(imagePath As String) As String
Dim fileStream As Object
Dim binaryBytes() As Byte
Set fileStream = CreateObject("ADODB.Stream")
fileStream.Type = 1 'Binary
fileStream.Open
fileStream.LoadFromFile imagePath
binaryBytes = fileStream.Read
fileStream.Close
EncodeImageToBase64 = EncodeBase64FromByteArray(binaryBytes)
End Function
Function EncodeBase64FromByteArray(bytes() As Byte) As String
Dim xmlObj As Object
Dim xmlNode As Object
Set xmlObj = CreateObject("MSXML2.DOMDocument")
Set xmlNode = xmlObj.createElement("Base64Data")
xmlNode.DataType = "bin.base64"
xmlNode.NodeTypedValue = bytes
'Remove any line breaks from Base64 output
EncodeBase64FromByteArray = Replace(xmlNode.Text, vbLf, "")
EncodeBase64FromByteArray = Replace(EncodeBase64FromByteArray, vbCr, "")
End Function
Function TransformPNGtoBase64(pageList As Collection) As String
Dim imgBase64 As String
Dim parts As String
Dim i As Long
parts = ""
For i = 1 To pageList.Count
imgBase64 = EncodeImageToBase64(pageList(i))
parts = parts & "{""inline_data"":{""mime_type"":""image/png"",""data"":""" & imgBase64 & """}},"
Next i
TransformPNGtoBase64 = parts
End Function
Sub DeleteTemopraryImages(pageList As Collection)
Dim i As Long
On Error Resume Next
For i = 1 To pageList.Count
Kill pageList(i)
Next i
End Sub
Function CallGemini_MultiPage(PNGAttachments As String) As String
Dim http As Object
Dim requestJson As String
Dim parts As String
parts = PNGAttachments
parts = parts & "{""text"":""" & _
"Extract ALL invoice details across ALL pages. " & _
"Return ONLY valid JSON in this structure: " & _
"{ 'InvoiceNumber':'','Date':'','Supplier':'','Total':0," & _
"'LineItems':[{'Description':'','Quantity':0,'UnitPrice':0,'Amount':0}] }. " & _
"Combine line items from all pages. JSON only.""}"
requestJson = "{""contents"":[{""parts"":[" & parts & "]}]}"
Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
http.Open "POST", GEMINI_API_URL & GEMINI_API_KEY, False
http.setRequestHeader "Content-Type", "application/json"
http.Send requestJson
CallGemini_MultiPage = http.responseText
End Function
Function ExtractCleanJSONFromGeminiResponse(jsonResponse As String) As String
Dim parsed As Object
Dim rawText As String
Set parsed = JsonConverter.ParseJson(jsonResponse)
rawText = parsed("candidates")(1)("content")("parts")(1)("text")
rawText = Replace(rawText, "```json", "")
rawText = Replace(rawText, "```", "")
ExtractCleanJSONFromGeminiResponse = rawText
End Function
Sub WriteInvoiceHeaderandLines(cleanJSON As String)
Dim ws As Worksheet
Dim json As Object
Dim rowHeader As Long
Dim item As Object
Set ws = ThisWorkbook.Sheets("Invoices")
Set json = JsonConverter.ParseJson(cleanJSON)
Dim r As Long
r = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 1
For Each item In json("LineItems")
ws.Cells(r, 1).Value = json("InvoiceNumber")
ws.Cells(r, 2).Value = json("Date")
ws.Cells(r, 3).Value = json("Supplier")
ws.Cells(r, 4).Value = json("Total")
ws.Cells(r, 5).Value = item("Description")
ws.Cells(r, 6).Value = item("Quantity")
ws.Cells(r, 7).Value = item("UnitPrice")
ws.Cells(r, 8).Value = item("Amount")
r = r + 1
Next item
End Sub
