Advanced Usage

This guide covers advanced OpenConvert features and techniques for power users.

Advanced Command-Line Usage

Complex Conversion Scenarios

Multi-step conversions with different prompts:

# Create draft version
openconvert -i report.txt -o draft.pdf --prompt "Quick draft layout"

# Create final version with enhanced formatting
openconvert -i report.txt -o final.pdf \\
  --prompt "Executive presentation with charts, tables, and professional layout"

# Create presentation version
openconvert -i report.txt -o slides.pdf \\
  --prompt "Convert to presentation slides with bullet points"

Conditional conversions with shell scripting:

#!/bin/bash
# Smart conversion script

input_file="$1"

# Check file size
file_size=$(stat -f%z "$input_file" 2>/dev/null || stat -c%s "$input_file")

if [ "$file_size" -gt 10485760 ]; then  # 10MB
    echo "Large file detected, using compression prompt"
    prompt="Optimize for size, compress images and reduce quality if needed"
else
    echo "Normal file size, using quality prompt"
    prompt="High quality conversion with best formatting"
fi

# Convert with appropriate prompt
openconvert -i "$input_file" -o "${input_file%.*}.pdf" --prompt "$prompt"

Format chain processing:

# Process through multiple formats for different outputs
input="data.csv"

# Create chart
openconvert -i "$input" -o "charts.png" --prompt "Create bar and line charts"

# Create formatted table
openconvert -i "$input" -o "table.pdf" --prompt "Professional data table"

# Create Excel with formulas
openconvert -i "$input" -o "analysis.xlsx" --prompt "Add formulas and pivot tables"

Advanced Python Integration

Custom Client Configuration

from openconvert.client import OpenConvertClient
import asyncio
from pathlib import Path

class CustomOpenConvertClient:
    """Extended client with additional features."""

    def __init__(self, hosts=None, retry_count=3, timeout=30):
        """Initialize with multiple hosts and retry logic."""
        self.hosts = hosts or [("localhost", 8765)]
        self.retry_count = retry_count
        self.timeout = timeout
        self.current_host_index = 0
        self.client = None

    async def connect_with_failover(self):
        """Connect with automatic failover to backup hosts."""
        for attempt in range(self.retry_count):
            for i, (host, port) in enumerate(self.hosts):
                try:
                    self.client = OpenConvertClient()
                    await asyncio.wait_for(
                        self.client.connect(host, port),
                        timeout=self.timeout
                    )
                    self.current_host_index = i
                    print(f"Connected to {host}:{port}")
                    return True
                except Exception as e:
                    print(f"Failed to connect to {host}:{port}: {e}")
                    continue

            if attempt < self.retry_count - 1:
                await asyncio.sleep(2 ** attempt)  # Exponential backoff

        raise ConnectionError("Failed to connect to any host")

    async def convert_with_retry(self, input_file, output_file, **kwargs):
        """Convert with automatic retry and failover."""
        for attempt in range(self.retry_count):
            try:
                if not self.client:
                    await self.connect_with_failover()

                result = await self.client.convert_file(
                    input_file=Path(input_file),
                    output_file=Path(output_file),
                    **kwargs
                )
                return result

            except Exception as e:
                print(f"Conversion attempt {attempt + 1} failed: {e}")
                if attempt < self.retry_count - 1:
                    # Try next host on failure
                    self.current_host_index = (self.current_host_index + 1) % len(self.hosts)
                    self.client = None
                    await asyncio.sleep(1)
                else:
                    raise

# Usage
async def main():
    client = CustomOpenConvertClient(
        hosts=[("primary.example.com", 8765), ("backup.example.com", 8765)],
        retry_count=3
    )

    result = await client.convert_with_retry(
        "document.txt",
        "document.pdf",
        prompt="High-quality conversion"
    )
    print(f"Conversion result: {result}")

asyncio.run(main())

Intelligent Format Selection

import mimetypes
from pathlib import Path
from openconvert import convert_file

class SmartConverter:
    """Intelligent converter that selects optimal formats and prompts."""

    def __init__(self):
        self.format_mappings = {
            'text/plain': {
                'best_outputs': ['application/pdf', 'text/html'],
                'prompts': {
                    'application/pdf': 'Professional document formatting',
                    'text/html': 'Clean web-readable format'
                }
            },
            'image/jpeg': {
                'best_outputs': ['image/webp', 'image/png'],
                'prompts': {
                    'image/webp': 'Optimize for web with quality preservation',
                    'image/png': 'Lossless conversion'
                }
            },
            'text/csv': {
                'best_outputs': ['application/pdf', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'],
                'prompts': {
                    'application/pdf': 'Create formatted report with charts',
                    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Structured spreadsheet with formulas'
                }
            }
        }

    def detect_input_format(self, file_path):
        """Detect input file format."""
        mime_type, _ = mimetypes.guess_type(file_path)
        return mime_type

    def suggest_output_format(self, input_format, purpose='general'):
        """Suggest optimal output format based on input and purpose."""
        mapping = self.format_mappings.get(input_format, {})
        best_outputs = mapping.get('best_outputs', ['application/pdf'])

        # Purpose-specific logic
        if purpose == 'web':
            web_formats = ['image/webp', 'text/html', 'application/pdf']
            for fmt in web_formats:
                if fmt in best_outputs:
                    return fmt
        elif purpose == 'print':
            if 'application/pdf' in best_outputs:
                return 'application/pdf'

        return best_outputs[0] if best_outputs else 'application/pdf'

    def get_smart_prompt(self, input_format, output_format, file_size=None):
        """Generate intelligent prompt based on formats and file properties."""
        mapping = self.format_mappings.get(input_format, {})
        base_prompt = mapping.get('prompts', {}).get(output_format, '')

        # Enhance prompt based on file size
        if file_size:
            if file_size > 10 * 1024 * 1024:  # 10MB
                base_prompt += ". Optimize for file size."
            elif file_size < 1024:  # 1KB
                base_prompt += ". Maintain maximum quality."

        return base_prompt

    def smart_convert(self, input_file, output_file=None, purpose='general'):
        """Perform intelligent conversion with optimal settings."""
        input_path = Path(input_file)

        # Detect input format
        input_format = self.detect_input_format(str(input_path))
        if not input_format:
            raise ValueError(f"Cannot detect format for {input_file}")

        # Determine output format and file
        if output_file:
            output_path = Path(output_file)
            output_format = self.detect_input_format(str(output_path))
        else:
            output_format = self.suggest_output_format(input_format, purpose)
            ext = mimetypes.guess_extension(output_format) or '.pdf'
            output_path = input_path.with_suffix(ext)

        # Get file size
        file_size = input_path.stat().st_size

        # Generate smart prompt
        prompt = self.get_smart_prompt(input_format, output_format, file_size)

        print(f"Converting {input_file}")
        print(f"  Input format: {input_format}")
        print(f"  Output format: {output_format}")
        print(f"  Output file: {output_path}")
        print(f"  Prompt: {prompt}")

        # Perform conversion
        return convert_file(
            str(input_path),
            str(output_path),
            from_format=input_format,
            to_format=output_format,
            prompt=prompt
        )

# Usage
converter = SmartConverter()

# Automatic optimization for web
converter.smart_convert("photo.jpg", purpose="web")

# Automatic optimization for print
converter.smart_convert("document.txt", purpose="print")

# Manual output specification
converter.smart_convert("data.csv", "report.pdf")

Advanced Prompt Engineering

Context-Aware Prompts

import os
import re
from datetime import datetime
from pathlib import Path

class PromptBuilder:
    """Build intelligent, context-aware prompts."""

    def __init__(self):
        self.templates = {
            'document': {
                'academic': "Format as academic paper with: title page, abstract, sections with numbered headings, bibliography, proper margins, Times New Roman font",
                'business': "Professional business document with: company header, executive summary, clear sections, bullet points, charts where appropriate",
                'casual': "Clean, readable format with: clear headings, good spacing, easy-to-read font",
                'presentation': "Convert to presentation format with: slide titles, bullet points, large readable text, minimal text per slide"
            },
            'image': {
                'thumbnail': "Create {size} thumbnail: crop to center, maintain aspect ratio, optimize for fast loading",
                'web_optimized': "Optimize for web: compress to {quality}% quality, convert to {format}, reduce file size",
                'print_ready': "Prepare for print: high DPI, CMYK color space if possible, preserve quality",
                'social_media': "Optimize for social media: crop to {aspect_ratio}, enhance colors, compress appropriately"
            },
            'data': {
                'report': "Create professional data report with: formatted tables, charts for trends, summary statistics, conclusions",
                'dashboard': "Build executive dashboard with: key metrics highlighted, visual charts, clear labels, trend indicators",
                'analysis': "Perform data analysis with: statistical summaries, correlation analysis, trend identification, insights"
            }
        }

    def analyze_content(self, file_path):
        """Analyze file content to determine appropriate prompt."""
        path = Path(file_path)

        # Analyze filename for clues
        filename = path.stem.lower()

        context = {
            'type': 'general',
            'domain': 'general',
            'urgency': 'normal',
            'audience': 'general'
        }

        # Detect document type from filename
        if any(word in filename for word in ['report', 'summary', 'analysis']):
            context['type'] = 'report'
        elif any(word in filename for word in ['presentation', 'slides', 'deck']):
            context['type'] = 'presentation'
        elif any(word in filename for word in ['academic', 'paper', 'thesis', 'research']):
            context['type'] = 'academic'
        elif any(word in filename for word in ['business', 'proposal', 'contract']):
            context['type'] = 'business'

        # Detect domain
        if any(word in filename for word in ['financial', 'finance', 'budget', 'accounting']):
            context['domain'] = 'finance'
        elif any(word in filename for word in ['technical', 'engineering', 'spec', 'design']):
            context['domain'] = 'technical'
        elif any(word in filename for word in ['marketing', 'sales', 'campaign']):
            context['domain'] = 'marketing'

        # Detect urgency
        if any(word in filename for word in ['urgent', 'priority', 'asap']):
            context['urgency'] = 'high'
        elif any(word in filename for word in ['draft', 'preliminary', 'temp']):
            context['urgency'] = 'low'

        return context

    def build_prompt(self, file_path, output_format, context_override=None):
        """Build context-aware prompt."""
        context = context_override or self.analyze_content(file_path)

        # Base prompt from template
        file_type = 'document' if 'text' in output_format or 'pdf' in output_format else 'image'
        template_type = context.get('type', 'general')

        base_prompt = self.templates.get(file_type, {}).get(template_type, "Professional formatting")

        # Add domain-specific enhancements
        domain_enhancements = {
            'finance': "Include financial formatting: currency symbols, percentage formatting, aligned numbers",
            'technical': "Use technical formatting: code blocks, diagrams, precise terminology",
            'marketing': "Use engaging formatting: attractive layout, emphasis on key points, visual appeal"
        }

        domain = context.get('domain')
        if domain in domain_enhancements:
            base_prompt += f". {domain_enhancements[domain]}"

        # Add urgency considerations
        if context.get('urgency') == 'high':
            base_prompt += ". Prioritize clarity and quick readability."
        elif context.get('urgency') == 'low':
            base_prompt += ". Focus on detailed formatting and visual appeal."

        return base_prompt

    def build_batch_prompts(self, file_list, output_format):
        """Build prompts for batch processing with consistency."""
        prompts = {}

        # Analyze all files to find common context
        contexts = [self.analyze_content(f) for f in file_list]

        # Find most common type and domain
        types = [c.get('type') for c in contexts]
        domains = [c.get('domain') for c in contexts]

        common_type = max(set(types), key=types.count) if types else 'general'
        common_domain = max(set(domains), key=domains.count) if domains else 'general'

        base_context = {'type': common_type, 'domain': common_domain}

        for file_path in file_list:
            # Use common context for consistency, but allow for file-specific tweaks
            file_context = self.analyze_content(file_path)

            # Override with common context for consistency
            merged_context = {**file_context, **base_context}

            prompts[file_path] = self.build_prompt(file_path, output_format, merged_context)

        return prompts

# Usage
prompt_builder = PromptBuilder()

# Single file with automatic context detection
prompt = prompt_builder.build_prompt("financial_report_Q3.txt", "application/pdf")
print(f"Generated prompt: {prompt}")

# Batch processing with consistent prompts
files = ["report1.txt", "report2.txt", "report3.txt"]
prompts = prompt_builder.build_batch_prompts(files, "application/pdf")

for file_path, prompt in prompts.items():
    convert_file(file_path, f"{file_path}.pdf", prompt=prompt)

Dynamic Prompt Adjustment

import json
from openconvert import convert_file

class AdaptiveConverter:
    """Converter that learns from conversion results and adjusts prompts."""

    def __init__(self, feedback_file="conversion_feedback.json"):
        self.feedback_file = feedback_file
        self.feedback_data = self.load_feedback()

    def load_feedback(self):
        """Load previous conversion feedback."""
        try:
            with open(self.feedback_file, 'r') as f:
                return json.load(f)
        except FileNotFoundError:
            return {}

    def save_feedback(self):
        """Save feedback data."""
        with open(self.feedback_file, 'w') as f:
            json.dump(self.feedback_data, f, indent=2)

    def get_success_rate(self, prompt_pattern):
        """Get success rate for similar prompts."""
        matching_conversions = [
            conv for conv in self.feedback_data.values()
            if prompt_pattern.lower() in conv.get('prompt', '').lower()
        ]

        if not matching_conversions:
            return 0.5  # Default success rate

        successful = sum(1 for conv in matching_conversions if conv.get('success', False))
        return successful / len(matching_conversions)

    def optimize_prompt(self, base_prompt, input_format, output_format):
        """Optimize prompt based on historical success rates."""

        # Try variations of the prompt
        variations = [
            base_prompt,
            f"{base_prompt}. Use high quality settings.",
            f"{base_prompt}. Optimize for readability.",
            f"{base_prompt}. Ensure professional appearance.",
            f"Professional formatting: {base_prompt.lower()}"
        ]

        # Score each variation
        scored_variations = []
        for variation in variations:
            score = self.get_success_rate(variation)
            scored_variations.append((variation, score))

        # Return best variation
        best_prompt, best_score = max(scored_variations, key=lambda x: x[1])

        print(f"Selected prompt (score: {best_score:.2f}): {best_prompt}")
        return best_prompt

    def convert_with_learning(self, input_file, output_file, base_prompt, **kwargs):
        """Convert and learn from the result."""

        # Optimize prompt
        optimized_prompt = self.optimize_prompt(
            base_prompt,
            kwargs.get('from_format'),
            kwargs.get('to_format')
        )

        # Perform conversion
        success = convert_file(
            input_file,
            output_file,
            prompt=optimized_prompt,
            **kwargs
        )

        # Record feedback
        conversion_id = f"{input_file}_{output_file}_{hash(optimized_prompt)}"
        self.feedback_data[conversion_id] = {
            'input_file': input_file,
            'output_file': output_file,
            'prompt': optimized_prompt,
            'success': success,
            'input_format': kwargs.get('from_format'),
            'output_format': kwargs.get('to_format'),
            'timestamp': datetime.now().isoformat()
        }

        self.save_feedback()

        return success

# Usage
adaptive_converter = AdaptiveConverter()

# Convert with learning
success = adaptive_converter.convert_with_learning(
    "document.txt",
    "document.pdf",
    "Create professional document",
    from_format="text/plain",
    to_format="application/pdf"
)

Performance Optimization

Parallel Processing Strategies

import asyncio
import concurrent.futures
from pathlib import Path
from openconvert import convert_file
from openconvert.client import OpenConvertClient

class HighPerformanceConverter:
    """High-performance converter with multiple optimization strategies."""

    def __init__(self, max_workers=4, max_concurrent_agents=2):
        self.max_workers = max_workers
        self.max_concurrent_agents = max_concurrent_agents

    async def convert_with_agent_pool(self, conversion_tasks):
        """Convert using multiple agent connections."""

        # Create agent pool
        agent_pool = []
        for i in range(self.max_concurrent_agents):
            client = OpenConvertClient(agent_id=f"batch-client-{i}")
            await client.connect()
            agent_pool.append(client)

        try:
            # Distribute tasks across agents
            semaphore = asyncio.Semaphore(self.max_concurrent_agents)

            async def convert_with_semaphore(task, agent):
                async with semaphore:
                    return await agent.convert_file(**task)

            # Create tasks
            tasks = []
            for i, conversion_task in enumerate(conversion_tasks):
                agent = agent_pool[i % len(agent_pool)]
                task = convert_with_semaphore(conversion_task, agent)
                tasks.append(task)

            # Execute all tasks
            results = await asyncio.gather(*tasks, return_exceptions=True)
            return results

        finally:
            # Cleanup agent connections
            for agent in agent_pool:
                await agent.disconnect()

    def convert_cpu_bound_parallel(self, file_pairs):
        """Use process pool for CPU-bound pre/post-processing."""

        def process_file_pair(pair):
            input_file, output_file, prompt = pair
            return convert_file(input_file, output_file, prompt=prompt)

        with concurrent.futures.ProcessPoolExecutor(max_workers=self.max_workers) as executor:
            future_to_pair = {
                executor.submit(process_file_pair, pair): pair
                for pair in file_pairs
            }

            results = []
            for future in concurrent.futures.as_completed(future_to_pair):
                pair = future_to_pair[future]
                try:
                    result = future.result()
                    results.append((pair, result))
                except Exception as e:
                    results.append((pair, f"Error: {e}"))

            return results

    def convert_io_bound_parallel(self, file_pairs):
        """Use thread pool for I/O-bound operations."""

        def convert_single(pair):
            input_file, output_file, prompt = pair
            return convert_file(input_file, output_file, prompt=prompt)

        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(convert_single, pair) for pair in file_pairs]
            results = []

            for future in concurrent.futures.as_completed(futures):
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    results.append(f"Error: {e}")

            return results

# Usage
converter = HighPerformanceConverter(max_workers=8, max_concurrent_agents=4)

# For many small files (I/O bound)
file_pairs = [
    ("file1.txt", "file1.pdf", "Quick conversion"),
    ("file2.txt", "file2.pdf", "Quick conversion"),
    # ... many more files
]

results = converter.convert_io_bound_parallel(file_pairs)

# For fewer large files (agent-bound)
conversion_tasks = [
    {
        'input_file': Path("large1.txt"),
        'output_file': Path("large1.pdf"),
        'prompt': "Detailed formatting"
    },
    {
        'input_file': Path("large2.txt"),
        'output_file': Path("large2.pdf"),
        'prompt': "Detailed formatting"
    }
]

results = asyncio.run(converter.convert_with_agent_pool(conversion_tasks))

Monitoring and Profiling

import time
import psutil
import logging
from contextlib import contextmanager
from openconvert import convert_file

class ConversionProfiler:
    """Profile conversion performance and resource usage."""

    def __init__(self):
        self.metrics = []

    @contextmanager
    def profile_conversion(self, conversion_name):
        """Context manager to profile a conversion."""

        # Record start metrics
        start_time = time.time()
        start_memory = psutil.virtual_memory().used
        start_cpu = psutil.cpu_percent(interval=None)

        try:
            yield
        finally:
            # Record end metrics
            end_time = time.time()
            end_memory = psutil.virtual_memory().used
            end_cpu = psutil.cpu_percent(interval=None)

            metrics = {
                'name': conversion_name,
                'duration': end_time - start_time,
                'memory_used': end_memory - start_memory,
                'cpu_avg': (start_cpu + end_cpu) / 2,
                'timestamp': time.time()
            }

            self.metrics.append(metrics)

            logging.info(f"Conversion '{conversion_name}': "
                       f"{metrics['duration']:.2f}s, "
                       f"Memory: {metrics['memory_used']/1024/1024:.1f}MB, "
                       f"CPU: {metrics['cpu_avg']:.1f}%")

    def get_performance_report(self):
        """Generate performance report."""
        if not self.metrics:
            return "No conversions recorded"

        total_time = sum(m['duration'] for m in self.metrics)
        avg_time = total_time / len(self.metrics)
        max_memory = max(m['memory_used'] for m in self.metrics)
        avg_cpu = sum(m['cpu_avg'] for m in self.metrics) / len(self.metrics)

        report = f"""
Performance Report:
==================
Total conversions: {len(self.metrics)}
Total time: {total_time:.2f}s
Average time per conversion: {avg_time:.2f}s
Peak memory usage: {max_memory/1024/1024:.1f}MB
Average CPU usage: {avg_cpu:.1f}%

Individual conversions:
"""

        for m in self.metrics:
            report += f"  {m['name']}: {m['duration']:.2f}s\n"

        return report

# Usage
profiler = ConversionProfiler()

files_to_convert = [
    ("doc1.txt", "doc1.pdf"),
    ("doc2.txt", "doc2.pdf"),
    ("doc3.txt", "doc3.pdf")
]

for input_file, output_file in files_to_convert:
    with profiler.profile_conversion(f"{input_file} -> {output_file}"):
        convert_file(input_file, output_file)

print(profiler.get_performance_report())

See Also