Python SDK Examples

Basic Workflow

Extract data from invoices:

from structurify import Structurify
import os

client = Structurify(api_key=os.environ['STRUCTURIFY_API_KEY'])

# Create project with invoice template
project = client.projects.create(
    name="Invoice Processing",
    template_id="tpl_invoice"
)

# Upload document
doc = client.documents.upload(
    project_id=project["id"],
    file_path="invoice.pdf"
)
print(f"Uploaded: {doc['name']}")

# Run extraction
job = client.extraction.run(project_id=project["id"])
completed = client.extraction.wait_for_completion(job["id"])
print(f"Completed: {completed['completedTasks']} tasks")

# Export to CSV
export = client.exports.create(
    project_id=project["id"],
    format="csv"
)
csv_data = client.exports.download(export["export"]["id"])

# Save to file
with open("results.csv", "w") as f:
    f.write(csv_data)

Batch Processing

Process multiple documents:

import os
from pathlib import Path
from structurify import Structurify

client = Structurify(api_key=os.environ['STRUCTURIFY_API_KEY'])

# Create project
project = client.projects.create(
    name="Batch Processing",
    template_id="tpl_invoice"
)

# Upload all PDFs in a folder
pdf_folder = Path("./invoices")
for pdf_file in pdf_folder.glob("*.pdf"):
    doc = client.documents.upload(
        project_id=project["id"],
        file_path=str(pdf_file)
    )
    print(f"Uploaded: {doc['name']}")

# Run extraction on all documents
job = client.extraction.run(project_id=project["id"])
completed = client.extraction.wait_for_completion(job["id"])

print(f"Processed {completed['completedTasks']} documents")
print(f"Failed: {completed['failedTasks']}")

# Export all results
export = client.exports.create(
    project_id=project["id"],
    format="csv"
)
csv_data = client.exports.download(export["export"]["id"])

Export to DataFrame

Use pandas for data analysis:

import pandas as pd
from io import StringIO
from structurify import Structurify

client = Structurify(api_key="sk_live_xxx")

# ... (create project, upload, extract)

# Export and load into DataFrame
export = client.exports.create(
    project_id=project["id"],
    format="csv"
)
csv_data = client.exports.download(export["export"]["id"])

df = pd.read_csv(StringIO(csv_data))
print(df.head())

# Analyze data
print(f"Total invoices: {len(df)}")
print(f"Total amount: ${df['Amount'].sum():,.2f}")

Error Handling

Robust error handling:

from structurify import (
    Structurify,
    AuthenticationError,
    NotFoundError,
    InsufficientCreditsError,
    RateLimitError,
)
import time

client = Structurify(api_key="sk_live_xxx")

def process_with_retry(project_id, max_retries=3):
    for attempt in range(max_retries):
        try:
            job = client.extraction.run(project_id=project_id)
            return client.extraction.wait_for_completion(job["id"])
        except RateLimitError as e:
            if attempt < max_retries - 1:
                print(f"Rate limited. Waiting {e.retry_after}s...")
                time.sleep(e.retry_after)
            else:
                raise
        except InsufficientCreditsError:
            print("Not enough credits. Please top up.")
            raise

try:
    result = process_with_retry("proj_xxx")
    print(f"Success: {result['status']}")
except AuthenticationError:
    print("Invalid API key")
except NotFoundError:
    print("Project not found")

Webhook Handler (Flask)

Handle extraction completion webhooks:

from flask import Flask, request, jsonify
from structurify.webhooks import verify_signature
import json

app = Flask(__name__)
WEBHOOK_SECRET = "your_webhook_secret"

@app.route("/webhook", methods=["POST"])
def handle_webhook():
    # Verify signature
    signature = request.headers.get("X-Structurify-Signature")
    if not verify_signature(request.data, signature, WEBHOOK_SECRET):
        return jsonify({"error": "Invalid signature"}), 401

    # Parse event
    event = json.loads(request.data)

    if event["event"] == "extraction.completed":
        job_id = event["data"]["job_id"]
        project_id = event["data"]["project_id"]
        print(f"Extraction completed: {job_id}")

        # Process results...

    return jsonify({"status": "ok"})

if __name__ == "__main__":
    app.run(port=5000)

Google Colab

Try our interactive notebooks:

We have 169 document-specific notebooks for different use cases.

Basic Workflow​

Batch Processing​

Export to DataFrame​

Error Handling​

Webhook Handler (Flask)​

Google Colab​