Visit my Youtube Channel
Automate Invoice Data Entry
Note: This webpage is designed to accompany the Youtube video posted above. The video offers detailed explanation on all topics; while this webpage will serve as a repository for the code presented in the video. Source code will be presented in the order of the sections within the video.
For the below to code to work, you will need to create a constant for the API Key.
Public Const GEMINI_API_KEY As String = "AIzXXXX"
Public Const GEMINI_API_URL As String = "https://generativelanguage.googleapis.com/v1/models/gemini-2.5-flash:generateContent?key="
Main Subs
Option Explicit
Const DONE_FOLDER As String = "C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Done\"
Const INVOICE_FOLDER As String = "C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Invoices\"
Const GHOSTSCRIPT_PATH As String = """C:\Program Files\gs\gs10.05.1\bin\gswin64c.exe"""
Sub ExtractInvoicesFromFiles()
Dim fileName As String
Dim fileExt As String
Dim filePath As String
Dim imagePath As String
Dim jsonResponse As String
Dim cleanJson As String
PrepareInvoiceSheet
fileName = Dir(INVOICE_FOLDER & "*.*")
Do While fileName <> ""
fileExt = LCase(Right(fileName, Len(fileName) - InStrRev(fileName, ".")))
filePath = INVOICE_FOLDER & fileName
Select Case fileExt
Case "pdf"
imagePath = INVOICE_FOLDER & Replace(fileName, ".pdf", ".png")
ConvertPdfToPng_Ghostscript filePath, imagePath
jsonResponse = CallGeminiAPI(imagePath)
cleanJson = ExtractCleanJsonFromGeminiResponse(jsonResponse)
If cleanJson <> "" Then
WriteInvoiceHeader (cleanJson)
Name filePath As DONE_FOLDER & fileName
Kill imagePath
End If
Case "png", "jpg", "jpeg"
imagePath = filePath
jsonResponse = CallGeminiAPI(imagePath)
cleanJson = ExtractCleanJsonFromGeminiResponse(jsonResponse)
If cleanJson <> "" Then
WriteInvoiceHeader (cleanJson)
Name filePath As DONE_FOLDER & fileName
End If
Case Else
GoTo ContinueLoop
End Select
ContinueLoop:
fileName = Dir()
Loop
End Sub
Sub PrepareInvoiceSheet()
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets("Invoices")
ws.Cells.Clear
ws.Range("A1:D1").Value = Array("InvoiceNumber", "Date", "Supplier", "Total")
ws.Rows(1).Font.Bold = True
With ws
.Columns("A").NumberFormat = "@"
.Columns("B").NumberFormat = "dd-MMM-yy"
.Columns("C").NumberFormat = "#,##0.00"
.Columns("D").NumberFormat = "@"
End With
End Sub
Sub ConvertPdfToPng_Ghostscript(pdfPath As String, outputPath As String)
Dim cmd As String
cmd = GHOSTSCRIPT_PATH & " -dNOPAUSE -dBATCH -sDEVICE=png16m -r200 " & _
"-sOutputFile=""" & outputPath & """ """ & pdfPath & """"
Dim shell As Object
Set shell = CreateObject("WScript.Shell")
shell.Run cmd, 0, True
End Sub
Sub Test()
Dim raw As String
Dim clean As String
raw = CallGeminiAPI("C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Done\Handwritten.jpg")
clean = ExtractCleanJsonFromGeminiResponse(raw)
Debug.Print clean
End Sub
Function ExtractCleanJsonFromGeminiResponse(jsonResponse As String) As String
Dim jsonParsed As Object
Dim rawText As String
Set jsonParsed = JsonConverter.ParseJson(jsonResponse)
rawText = jsonParsed("candidates")(1)("content")("parts")(1)("text")
rawText = Replace(rawText, "```json", "")
rawText = Replace(rawText, "```", "")
ExtractCleanJsonFromGeminiResponse = Trim(rawText)
End Function
Sub WriteInvoiceHeader(cleanJson As String)
Dim ws As Worksheet
Dim jsonParsed As Object
Dim nextRow As Long
Set jsonParsed = JsonConverter.ParseJson(cleanJson)
Set ws = ThisWorkbook.Sheets("Invoices")
nextRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 1
ws.Cells(nextRow, 1).Value = jsonParsed("InvoiceNumber")
ws.Cells(nextRow, 2).Value = jsonParsed("Date")
ws.Cells(nextRow, 3).Value = jsonParsed("Total")
ws.Cells(nextRow, 4).Value = jsonParsed("Supplier")
End Sub
Function CallGeminiAPI(imagePath As String) As String
Dim http As Object
Dim imgBase64 As String
Dim requestJson As String
imgBase64 = EncodeImageToBase64(imagePath)
requestJson = "{""contents"":[{""parts"":[{""inline_data"":{""mime_type"":""image/png"",""data"":""" & imgBase64 & """}}," & _
"{""text"":""Extract invoice details in JSON format with fields: InvoiceNumber, Date, Total, Supplier. " & _
"Give date in dd-MMM-yy format. Only return valid JSON.""}]}]}"
Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
http.Open "POST", GEMINI_API_URL & GEMINI_API_KEY, False
http.setRequestHeader "Content-Type", "application/json"
http.Send requestJson
CallGeminiAPI = http.responseText
End Function
Function EncodeImageToBase64(imagePath As String) As String
Dim stream As Object
Dim bytes() As Byte
Dim base64Str As String
Set stream = CreateObject("ADODB.Stream")
stream.Type = 1
stream.Open
stream.LoadFromFile imagePath
bytes = stream.Read
stream.Close
base64Str = EncodeBase64FromByteArray(bytes)
EncodeImageToBase64 = base64Str
End Function
Function EncodeBase64FromByteArray(bytes() As Byte) As String
Dim xml As Object
Set xml = CreateObject("MSXML2.DOMDocument").createElement("b64")
xml.DataType = "bin.base64"
xml.nodeTypedValue = bytes
EncodeBase64FromByteArray = Replace(xml.Text, vbLf, "")
End Function