Automate Invoice Data Entry

Note: This webpage is designed to accompany the Youtube video posted above. The video offers detailed explanation on all topics; while this webpage will serve as a repository for the code presented in the video. Source code will be presented in the order of the sections within the video.

For the below to code to work, you will need to create a constant for the API Key.

Public Const GEMINI_API_KEY As String = "AIzXXXX"
Public Const GEMINI_API_URL As String = "https://generativelanguage.googleapis.com/v1/models/gemini-2.5-flash:generateContent?key="

Main Subs

Option Explicit

Const DONE_FOLDER As String = "C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Done\"
Const INVOICE_FOLDER As String = "C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Invoices\"
Const GHOSTSCRIPT_PATH As String = """C:\Program Files\gs\gs10.05.1\bin\gswin64c.exe"""

Sub ExtractInvoicesFromFiles()
    Dim fileName As String
    Dim fileExt As String
    Dim filePath As String
    Dim imagePath As String
    Dim jsonResponse As String
    Dim cleanJson As String
    
    PrepareInvoiceSheet
    fileName = Dir(INVOICE_FOLDER & "*.*")
    
    Do While fileName <> ""
        fileExt = LCase(Right(fileName, Len(fileName) - InStrRev(fileName, ".")))
        filePath = INVOICE_FOLDER & fileName
        Select Case fileExt
            Case "pdf"
                imagePath = INVOICE_FOLDER & Replace(fileName, ".pdf", ".png")
                ConvertPdfToPng_Ghostscript filePath, imagePath
                jsonResponse = CallGeminiAPI(imagePath)
                cleanJson = ExtractCleanJsonFromGeminiResponse(jsonResponse)
                If cleanJson <> "" Then
                    WriteInvoiceHeader (cleanJson)
                    Name filePath As DONE_FOLDER & fileName
                    Kill imagePath
                End If
            Case "png", "jpg", "jpeg"
                imagePath = filePath
                jsonResponse = CallGeminiAPI(imagePath)
                cleanJson = ExtractCleanJsonFromGeminiResponse(jsonResponse)
                If cleanJson <> "" Then
                    WriteInvoiceHeader (cleanJson)
                    Name filePath As DONE_FOLDER & fileName
                End If
            Case Else
                GoTo ContinueLoop
        End Select
        
ContinueLoop:
        fileName = Dir()
    Loop
    
End Sub

Sub PrepareInvoiceSheet()
    Dim ws As Worksheet
    Set ws = ThisWorkbook.Sheets("Invoices")
    ws.Cells.Clear
    ws.Range("A1:D1").Value = Array("InvoiceNumber", "Date", "Supplier", "Total")
    ws.Rows(1).Font.Bold = True
    With ws
        .Columns("A").NumberFormat = "@"
        .Columns("B").NumberFormat = "dd-MMM-yy"
        .Columns("C").NumberFormat = "#,##0.00"
        .Columns("D").NumberFormat = "@"
    End With
End Sub

Sub ConvertPdfToPng_Ghostscript(pdfPath As String, outputPath As String)
    Dim cmd As String
    cmd = GHOSTSCRIPT_PATH & " -dNOPAUSE -dBATCH -sDEVICE=png16m -r200 " & _
            "-sOutputFile=""" & outputPath & """ """ & pdfPath & """"
    Dim shell As Object
    Set shell = CreateObject("WScript.Shell")
    shell.Run cmd, 0, True

End Sub

Sub Test()

Dim raw As String
Dim clean As String
raw = CallGeminiAPI("C:\Youtube\Current\Archive\VBA_Masterclass\Course\LLM\Sample\Done\Handwritten.jpg")
clean = ExtractCleanJsonFromGeminiResponse(raw)
Debug.Print clean
End Sub

Function ExtractCleanJsonFromGeminiResponse(jsonResponse As String) As String
    Dim jsonParsed As Object
    Dim rawText As String
    
    Set jsonParsed = JsonConverter.ParseJson(jsonResponse)
    rawText = jsonParsed("candidates")(1)("content")("parts")(1)("text")
    rawText = Replace(rawText, "```json", "")
    rawText = Replace(rawText, "```", "")
    ExtractCleanJsonFromGeminiResponse = Trim(rawText)
    
End Function

Sub WriteInvoiceHeader(cleanJson As String)

    Dim ws As Worksheet
    Dim jsonParsed As Object
    Dim nextRow As Long
    Set jsonParsed = JsonConverter.ParseJson(cleanJson)
    Set ws = ThisWorkbook.Sheets("Invoices")
    nextRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 1
    ws.Cells(nextRow, 1).Value = jsonParsed("InvoiceNumber")
    ws.Cells(nextRow, 2).Value = jsonParsed("Date")
    ws.Cells(nextRow, 3).Value = jsonParsed("Total")
    ws.Cells(nextRow, 4).Value = jsonParsed("Supplier")

End Sub


Function CallGeminiAPI(imagePath As String) As String
    Dim http As Object
    Dim imgBase64 As String
    Dim requestJson As String
    
    imgBase64 = EncodeImageToBase64(imagePath)
    
    requestJson = "{""contents"":[{""parts"":[{""inline_data"":{""mime_type"":""image/png"",""data"":""" & imgBase64 & """}}," & _
                  "{""text"":""Extract invoice details in JSON format with fields: InvoiceNumber, Date, Total, Supplier. " & _
                  "Give date in dd-MMM-yy format. Only return valid JSON.""}]}]}"
    
    Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
    http.Open "POST", GEMINI_API_URL & GEMINI_API_KEY, False
    http.setRequestHeader "Content-Type", "application/json"
    http.Send requestJson
    
    CallGeminiAPI = http.responseText
    
End Function

Function EncodeImageToBase64(imagePath As String) As String
    Dim stream As Object
    Dim bytes() As Byte
    Dim base64Str As String
    Set stream = CreateObject("ADODB.Stream")
    stream.Type = 1
    stream.Open
    stream.LoadFromFile imagePath
    bytes = stream.Read
    stream.Close
    base64Str = EncodeBase64FromByteArray(bytes)
    EncodeImageToBase64 = base64Str
End Function

Function EncodeBase64FromByteArray(bytes() As Byte) As String
    Dim xml As Object
    Set xml = CreateObject("MSXML2.DOMDocument").createElement("b64")
    xml.DataType = "bin.base64"
    xml.nodeTypedValue = bytes
    EncodeBase64FromByteArray = Replace(xml.Text, vbLf, "")
End Function