Python SDK Examples
Basic Workflow
Extract data from invoices:
from structurify import Structurify
import os
client = Structurify(api_key=os.environ['STRUCTURIFY_API_KEY'])
# Create project with invoice template
project = client.projects.create(
name="Invoice Processing",
template_id="tpl_invoice"
)
# Upload document
doc = client.documents.upload(
project_id=project["id"],
file_path="invoice.pdf"
)
print(f"Uploaded: {doc['name']}")
# Run extraction
job = client.extraction.run(project_id=project["id"])
completed = client.extraction.wait_for_completion(job["id"])
print(f"Completed: {completed['completedTasks']} tasks")
# Export to CSV
export = client.exports.create(
project_id=project["id"],
format="csv"
)
csv_data = client.exports.download(export["export"]["id"])
# Save to file
with open("results.csv", "w") as f:
f.write(csv_data)
Batch Processing
Process multiple documents:
import os
from pathlib import Path
from structurify import Structurify
client = Structurify(api_key=os.environ['STRUCTURIFY_API_KEY'])
# Create project
project = client.projects.create(
name="Batch Processing",
template_id="tpl_invoice"
)
# Upload all PDFs in a folder
pdf_folder = Path("./invoices")
for pdf_file in pdf_folder.glob("*.pdf"):
doc = client.documents.upload(
project_id=project["id"],
file_path=str(pdf_file)
)
print(f"Uploaded: {doc['name']}")
# Run extraction on all documents
job = client.extraction.run(project_id=project["id"])
completed = client.extraction.wait_for_completion(job["id"])
print(f"Processed {completed['completedTasks']} documents")
print(f"Failed: {completed['failedTasks']}")
# Export all results
export = client.exports.create(
project_id=project["id"],
format="csv"
)
csv_data = client.exports.download(export["export"]["id"])
Export to DataFrame
Use pandas for data analysis:
import pandas as pd
from io import StringIO
from structurify import Structurify
client = Structurify(api_key="sk_live_xxx")
# ... (create project, upload, extract)
# Export and load into DataFrame
export = client.exports.create(
project_id=project["id"],
format="csv"
)
csv_data = client.exports.download(export["export"]["id"])
df = pd.read_csv(StringIO(csv_data))
print(df.head())
# Analyze data
print(f"Total invoices: {len(df)}")
print(f"Total amount: ${df['Amount'].sum():,.2f}")
Error Handling
Robust error handling:
from structurify import (
Structurify,
AuthenticationError,
NotFoundError,
InsufficientCreditsError,
RateLimitError,
)
import time
client = Structurify(api_key="sk_live_xxx")
def process_with_retry(project_id, max_retries=3):
for attempt in range(max_retries):
try:
job = client.extraction.run(project_id=project_id)
return client.extraction.wait_for_completion(job["id"])
except RateLimitError as e:
if attempt < max_retries - 1:
print(f"Rate limited. Waiting {e.retry_after}s...")
time.sleep(e.retry_after)
else:
raise
except InsufficientCreditsError:
print("Not enough credits. Please top up.")
raise
try:
result = process_with_retry("proj_xxx")
print(f"Success: {result['status']}")
except AuthenticationError:
print("Invalid API key")
except NotFoundError:
print("Project not found")
Webhook Handler (Flask)
Handle extraction completion webhooks:
from flask import Flask, request, jsonify
from structurify.webhooks import verify_signature
import json
app = Flask(__name__)
WEBHOOK_SECRET = "your_webhook_secret"
@app.route("/webhook", methods=["POST"])
def handle_webhook():
# Verify signature
signature = request.headers.get("X-Structurify-Signature")
if not verify_signature(request.data, signature, WEBHOOK_SECRET):
return jsonify({"error": "Invalid signature"}), 401
# Parse event
event = json.loads(request.data)
if event["event"] == "extraction.completed":
job_id = event["data"]["job_id"]
project_id = event["data"]["project_id"]
print(f"Extraction completed: {job_id}")
# Process results...
return jsonify({"status": "ok"})
if __name__ == "__main__":
app.run(port=5000)
Google Colab
Try our interactive notebooks:
We have 169 document-specific notebooks for different use cases.