Skip to main content

Python SDK Examples

Basic Workflow

Extract data from invoices:

from structurify import Structurify
import os

client = Structurify(api_key=os.environ['STRUCTURIFY_API_KEY'])

# Create project with invoice template
project = client.projects.create(
name="Invoice Processing",
template_id="tpl_invoice"
)

# Upload document
doc = client.documents.upload(
project_id=project["id"],
file_path="invoice.pdf"
)
print(f"Uploaded: {doc['name']}")

# Run extraction
job = client.extraction.run(project_id=project["id"])
completed = client.extraction.wait_for_completion(job["id"])
print(f"Completed: {completed['completedTasks']} tasks")

# Export to CSV
export = client.exports.create(
project_id=project["id"],
format="csv"
)
csv_data = client.exports.download(export["export"]["id"])

# Save to file
with open("results.csv", "w") as f:
f.write(csv_data)

Batch Processing

Process multiple documents:

import os
from pathlib import Path
from structurify import Structurify

client = Structurify(api_key=os.environ['STRUCTURIFY_API_KEY'])

# Create project
project = client.projects.create(
name="Batch Processing",
template_id="tpl_invoice"
)

# Upload all PDFs in a folder
pdf_folder = Path("./invoices")
for pdf_file in pdf_folder.glob("*.pdf"):
doc = client.documents.upload(
project_id=project["id"],
file_path=str(pdf_file)
)
print(f"Uploaded: {doc['name']}")

# Run extraction on all documents
job = client.extraction.run(project_id=project["id"])
completed = client.extraction.wait_for_completion(job["id"])

print(f"Processed {completed['completedTasks']} documents")
print(f"Failed: {completed['failedTasks']}")

# Export all results
export = client.exports.create(
project_id=project["id"],
format="csv"
)
csv_data = client.exports.download(export["export"]["id"])

Export to DataFrame

Use pandas for data analysis:

import pandas as pd
from io import StringIO
from structurify import Structurify

client = Structurify(api_key="sk_live_xxx")

# ... (create project, upload, extract)

# Export and load into DataFrame
export = client.exports.create(
project_id=project["id"],
format="csv"
)
csv_data = client.exports.download(export["export"]["id"])

df = pd.read_csv(StringIO(csv_data))
print(df.head())

# Analyze data
print(f"Total invoices: {len(df)}")
print(f"Total amount: ${df['Amount'].sum():,.2f}")

Error Handling

Robust error handling:

from structurify import (
Structurify,
AuthenticationError,
NotFoundError,
InsufficientCreditsError,
RateLimitError,
)
import time

client = Structurify(api_key="sk_live_xxx")

def process_with_retry(project_id, max_retries=3):
for attempt in range(max_retries):
try:
job = client.extraction.run(project_id=project_id)
return client.extraction.wait_for_completion(job["id"])
except RateLimitError as e:
if attempt < max_retries - 1:
print(f"Rate limited. Waiting {e.retry_after}s...")
time.sleep(e.retry_after)
else:
raise
except InsufficientCreditsError:
print("Not enough credits. Please top up.")
raise

try:
result = process_with_retry("proj_xxx")
print(f"Success: {result['status']}")
except AuthenticationError:
print("Invalid API key")
except NotFoundError:
print("Project not found")

Webhook Handler (Flask)

Handle extraction completion webhooks:

from flask import Flask, request, jsonify
from structurify.webhooks import verify_signature
import json

app = Flask(__name__)
WEBHOOK_SECRET = "your_webhook_secret"

@app.route("/webhook", methods=["POST"])
def handle_webhook():
# Verify signature
signature = request.headers.get("X-Structurify-Signature")
if not verify_signature(request.data, signature, WEBHOOK_SECRET):
return jsonify({"error": "Invalid signature"}), 401

# Parse event
event = json.loads(request.data)

if event["event"] == "extraction.completed":
job_id = event["data"]["job_id"]
project_id = event["data"]["project_id"]
print(f"Extraction completed: {job_id}")

# Process results...

return jsonify({"status": "ok"})

if __name__ == "__main__":
app.run(port=5000)

Google Colab

Try our interactive notebooks:

Open In Colab

We have 169 document-specific notebooks for different use cases.