Source code for langgraph_compare.create_html

import json
from pathlib import Path
import base64
import jinja2
from typing import Any, Dict, List, Optional, Union
import webbrowser
from dataclasses import dataclass



[docs]
@dataclass
class InfrastructureDirs:
    """Class to hold directory paths for infrastructure data."""
    reports_dir: str
    images_dir: Optional[str] = None


[docs]
    @classmethod
    def from_default_structure(cls, base_path: str) -> 'InfrastructureDirs':
        """Create paths using default directory structure."""
        return cls(
            reports_dir=str(Path(base_path) / "reports"),
            images_dir=str(Path(base_path) / "img")
        )




class _MetricsFormatter:
    HTML_ARROW = " &rarr; "

    # List of metrics that should be formatted as time
    TIME_METRICS = {
        "avg_case_duration",
        "avg_graph_duration",
        "total_time",
        "processing_time"
    }

    @staticmethod
    def format_time(seconds: float) -> str:
        """Format time in seconds to a readable string."""
        if seconds < 0.1:
            return f"{seconds * 1000:.2f} ms"
        return f"{seconds:.2f} s"

    @staticmethod
    def format_count(value: int) -> str:
        """Format count with a thousand separators."""
        return f"{value:,}"

    @staticmethod
    # Format sequences into a more readable structure - connecting steps with arrows
    def format_sequences(sequences: Dict[str, list]) -> Dict[str, Any]:
        """Format sequences into a more readable structure."""
        return {
            f"Sequence {seq_id}": {
                "steps": len(steps),
                "path": _MetricsFormatter.HTML_ARROW.join(steps)
            }
            for seq_id, steps in sequences.items()
        }

    @staticmethod
    # Same as above but additionally with probabilities
    def format_sequences_with_probabilities(sequences: List) -> Dict[str, Any]:
        """Format sequences with probabilities into a readable structure."""
        return {
            f"Sequence {seq_id}": {
                "steps": len(sequence),
                "probability": f"{probability * 100:.1f}%",
                "path": _MetricsFormatter.HTML_ARROW.join(sequence)
            }
            for seq_id, sequence, probability in sequences
        }

    @staticmethod
    # Format self distances into a more readable structure - how many steps to reach the same activity
    def format_self_distances(distances: Dict[str, Dict[str, int]]) -> Dict[str, Any]:
        """Format self distances into a more readable structure."""
        return {
            f"Sequence {seq_id}": {
                activity: f"{distance} steps"
                for activity, distance in activities.items()
            }
            for seq_id, activities in distances.items()
        }

    @staticmethod
    # Format activities count into a more readable structure - how many times each activity was executed (percentage)
    def format_activities_count(counts: Dict[str, int]) -> Dict[str, str]:
        """Format activity counts with percentage of total."""
        total = sum(counts.values())
        return {
            activity: f"{count:,} ({(count / total) * 100:.1f}%)"
            for activity, count in counts.items()
        }

    @staticmethod
    # Format rework counts into a more readable structure - how many times each activity was reworked (percentage)
    def format_rework_counts(counts: Dict[str, int]) -> Union[str, Dict[str, str]]:
        """Format rework counts with percentages of total activities."""
        if not counts:
            return "No reworks"

        # Get activities count from context
        activities_count = _MetricsFormatter._context.get('activities_count', {})

        result = {}
        for activity, rework_count in counts.items():
            total_activity_count = activities_count.get(activity, 0)
            if total_activity_count > 0:
                percentage = (rework_count / total_activity_count) * 100
                result[activity] = f"{_MetricsFormatter.format_count(rework_count)} ({percentage:.1f}% of activity)"
            else:
                result[activity] = f"{_MetricsFormatter.format_count(rework_count)}"

        return result

    @staticmethod
    def format_metric(key: str, value: Any) -> Any:
        """Format a metric based on its key and value type."""
        # First check if it's a time metric
        if key in _MetricsFormatter.TIME_METRICS and isinstance(value, (int, float)):
            return _MetricsFormatter.format_time(value)

        if isinstance(value, dict):
            formatters = {
                "sequences": _MetricsFormatter.format_sequences,
                "minimum_self_distances": _MetricsFormatter.format_self_distances,
                "activities_count": _MetricsFormatter.format_activities_count,
                "cases_durations": lambda v: {case: _MetricsFormatter.format_time(duration)
                                              for case, duration in v.items()},
                "activities_mean_service_time": lambda v: {activity: _MetricsFormatter.format_time(duration)
                                                           for activity, duration in v.items()},
                "rework_counts": _MetricsFormatter.format_rework_counts
            }
            return formatters.get(key, lambda x: x)(value)
        elif isinstance(value, list) and key == "sequences_with_probabilities":
            return _MetricsFormatter.format_sequences_with_probabilities(value)

        return value

    # Class variable to store the context
    _context = {}

    @classmethod
    def set_context(cls, report_data: Dict):
        """Set the context for formatting metrics."""
        cls._context = report_data


class _ArchitectureComparisonReport:
    DEFAULT_EXPERIMENTS_DIR = "experiments"
    DEFAULT_REPORTS_DIR = "comparison_reports"

    def __init__(
            self,
            infrastructures: Dict[str, Union[str, InfrastructureDirs]],
            base_dir: Optional[str] = None,
            output_dir: Optional[str] = None
    ):
        """
        Initialize the report generator.

        Args:
            infrastructures: Dict mapping infrastructure names to either:
                           - Directory path string (uses default structure)
                           - InfrastructureDirs object (custom directories)
            base_dir: Optional base directory for relative paths
            output_dir: Optional output directory for reports
        """
        self.base_dir = base_dir
        self.infrastructures = infrastructures
        # Allow custom output directory or use default
        self.report_dir = output_dir if output_dir else self.DEFAULT_REPORTS_DIR
        # Ensure the report directory exists
        Path(self.report_dir).mkdir(parents=True, exist_ok=True)

        # Process infrastructure directories
        self.infra_dirs = {}
        for infra_name, dir_info in infrastructures.items():
            if isinstance(dir_info, InfrastructureDirs):
                self.infra_dirs[infra_name] = dir_info
            else:
                # Handle string path
                infra_path = dir_info
                # If it's just a name (no parent directory) and no base_dir was specified,
                # assume it's under the experiments directory
                if not Path(infra_path).parent.name and not self.base_dir:
                    base_path = str(Path(self.DEFAULT_EXPERIMENTS_DIR) / infra_path)
                # If base_dir was specified, use it
                elif self.base_dir:
                    base_path = str(Path(base_dir) / infra_path)
                # Otherwise, use the path as-is
                else:
                    base_path = infra_path
                self.infra_dirs[infra_name] = InfrastructureDirs.from_default_structure(base_path)

        # Storage for data from reports (JSON files)
        self.infrastructures_data = {}
        # Storage for images data (base64 encoded)
        self.images_data = {}
        # Formatter for metrics
        self.formatter = _MetricsFormatter()

    def generate_report_filename(self) -> str:
        """Generate a filename for the report based on compared infrastructures."""
        # Get infrastructure names without path
        infra_names = [Path(infra).name for infra in self.infrastructures]
        # Join with 'vs' and add .html extension
        return f"{'_vs_'.join(infra_names)}.html"

    def load_data(self):
        for infra_name, dirs in self.infra_dirs.items():
            # Storage for data from reports (JSON files)
            self.infrastructures_data[infra_name] = {}
            # Storage for images data (base64 encoded)
            self.images_data[infra_name] = {}

            # Load metrics report data
            metrics_path = Path(dirs.reports_dir) / "metrics_report.json"
            try:
                with open(metrics_path) as f:
                    report_data = json.load(f)
                    # Format the data as needed
                    _MetricsFormatter.set_context(report_data)
                    self.infrastructures_data[infra_name]['main_report'] = {
                        key: self.formatter.format_metric(key, value)
                        for key, value in report_data.items()
                    }
            except FileNotFoundError:
                raise FileNotFoundError(f"Metrics report not found at {metrics_path}")

            # Load sequences report data if available
            sequences_path = Path(dirs.reports_dir) / "sequences_report.json"
            if sequences_path.exists():
                with open(sequences_path) as f:
                    sequences_data = json.load(f)
                    # Format the data as needed
                    # Get and sort sequence probabilities
                    sequence_probabilities = sequences_data.get('sequence_probabilities', [])
                    # Sort by probability in descending order
                    sorted_sequences = sorted(sequence_probabilities, key=lambda x: x[2], reverse=True)

                    formatted_sequences = {
                        'start_activities': sequences_data.get('start_activities', {}),
                        'end_activities': sequences_data.get('end_activities', {}),
                        'sequence_probabilities': sorted_sequences
                    }
                    # Save formatted sequences data
                    self.infrastructures_data[infra_name]['sequences_report'] = formatted_sequences

            # Load images if directory is provided
            if dirs.images_dir and Path(dirs.images_dir).exists():
                for img_file in Path(dirs.images_dir).glob("*.png"):
                    with open(img_file, 'rb') as f:
                        # Encode image as base64 and store in dictionary
                        self.images_data[infra_name][img_file.stem] = base64.b64encode(f.read()).decode('utf-8')

    def generate_report(self, open_browser: bool = True):
        # Generate the report path using the configured directory and automatic filename
        report_filename = self.generate_report_filename()
        report_path = Path(self.report_dir) / report_filename

        # Prepare metrics comparison data
        first_infra = next(iter(self.infrastructures_data))
        # Create a dictionary with metrics as keys and lists of values for each infrastructure
        metrics_comparison = {
            metric: [self.infrastructures_data[infra]['main_report'].get(metric)
                     for infra in self.infrastructures_data]
            for metric in self.infrastructures_data[first_infra]['main_report']
        }

        # Create a dictionary with sequences data for each infrastructure
        sequences_data = {
            infra: data.get('sequences_report', {})
            for infra, data in self.infrastructures_data.items()
        }

        template = self.get_template()
        # Render the template with the data
        html_content = template.render(
            infrastructures_data=self.infrastructures_data,
            images_data=self.images_data,
            metrics_comparison=metrics_comparison,
            sequences_data=sequences_data
        )

        report_path.parent.mkdir(parents=True, exist_ok=True)
        with open(report_path, 'w') as f:
            f.write(html_content)

        print(f"Report generated at {report_path}")

        if open_browser:
            # Convert to absolute path and file URI format
            file_uri = f"file://{report_path.resolve().as_posix()}"
            webbrowser.open(file_uri)

    @staticmethod
    def get_template():
        """Load the HTML template from file."""
        current_dir = Path(__file__).parent
        template_path = current_dir / "templates" / "comparison_report.html"

        # Create Jinja environment with the template directory
        env = jinja2.Environment(
            loader=jinja2.FileSystemLoader(template_path.parent),
            autoescape=True
        )

        return env.get_template("comparison_report.html")



[docs]
def compare(
        infrastructures: Union[List[str], Dict[str, Union[str, InfrastructureDirs]]],
        base_dir: Optional[str] = None,
        output_dir: Optional[str] = None
) -> None:
    """
    Generate and open HTML comparison report comparing multi-agent infrastructures.

    :param infrastructures: List of infrastructure names or dictionary mapping names to paths. Can be:
                          - List of infrastructure names for default structure
                          - Dict mapping names to directory paths or InfrastructureDirs objects
    :type infrastructures: Union[List[str], Dict[str, Union[str, InfrastructureDirs]]]
    :param base_dir: Base directory where all experiments are stored, defaults to "experiments"
    :type base_dir: Optional[str]
    :param output_dir: Directory where generated reports will be saved, defaults to "comparison_reports"
    :type output_dir: Optional[str]

    **Examples:**

    Basic usage with default directory structure::

        compare(["test_1", "test_2"])

    Using custom paths with default subdirectory structure::

        # Will use path/to/test1/reports/ and path/to/test1/img/
        compare({
            "test_1": "path/to/test1",
            "test_2": "path/to/test2"
        })

    Using fully custom directory paths::

        compare({
            "test_1": InfrastructureDirs(
                reports_dir="custom/path1/my_reports",
                images_dir="custom/path1/my_images"
            ),
            "test_2": InfrastructureDirs(
                reports_dir="custom/path2/my_reports"
            )
        })

    Save to specific output directory::

        compare(infrastructures, output_dir="my_reports")
    """
    # Convert list to dict if necessary
    if isinstance(infrastructures, list):
        infrastructures = {infra: infra for infra in infrastructures}

    report_generator = _ArchitectureComparisonReport(infrastructures, base_dir, output_dir)
    report_generator.load_data()
    report_generator.generate_report()