Skip to content

Graph

create_truthfulness_graph

from truthfulness_evaluator.llm.workflows.graph import create_truthfulness_graph

graph = create_truthfulness_graph()

Returns a compiled LangGraph with checkpointing.

State

from truthfulness_evaluator.llm.workflows.state import WorkflowState

class WorkflowState(TypedDict):
    document: str
    document_path: str
    root_path: str | None
    claims: list[Claim]
    current_claim_index: int
    verifications: list[VerificationResult]
    evidence_cache: dict[str, list[Evidence]]
    config: dict
    final_report: TruthfulnessReport | None

Nodes

Node Function Description
extract_claims extract_claims_node Extract claims from document
search_evidence search_evidence_node Search web + filesystem
verify_claim verify_claim_node Multi-model verification
generate_report generate_report_node Create final report

Usage

Basic

result = await graph.ainvoke({
    "document": "Python was created in 1991...",
    "document_path": "README.md",
    "root_path": None,
    "claims": [],
    "current_claim_index": 0,
    "verifications": [],
    "evidence_cache": {},
    "config": config.model_dump(),
    "final_report": None
})

report = result["final_report"]

Note: config above refers to an EvaluatorConfig instance from truthfulness_evaluator.core.config.

With Checkpointing

config = {"configurable": {"thread_id": "eval_1"}}

# Start
result = await graph.ainvoke(input_state, config)

# Resume (after interruption)
result = await graph.ainvoke(None, config)

Streaming

async for event in graph.astream(
    input_state,
    config,
    stream_mode="updates"
):
    if "extract_claims" in event:
        print(f"Extracted {len(event['extract_claims']['claims'])} claims")
    elif "verify_claim" in event:
        print("Verified claim")

Human-in-the-Loop

from langgraph.types import interrupt, Command

# In verify_claim_node:
human_input = interrupt({
    "claim": claim.text,
    "proposed_verdict": verification.verdict,
    "question": "Approve?"
})

# Resume:
result = await graph.ainvoke(
    Command(resume={"response": "approve"}),
    config
)

Custom Graph

Build your own workflow:

from langgraph.graph import StateGraph, START, END
from truthfulness_evaluator.llm.workflows.state import WorkflowState

builder = StateGraph(WorkflowState)

# Add nodes
builder.add_node("extract", extract_claims_node)
builder.add_node("verify", verify_claim_node)

# Add edges
builder.add_edge(START, "extract")
builder.add_edge("extract", "verify")
builder.add_edge("verify", END)

# Compile with checkpointing
from langgraph.checkpoint.memory import MemorySaver
graph = builder.compile(checkpointer=MemorySaver())

Time Travel

# Get state history
history = graph.get_state_history(config)
for state in history:
    print(f"Step {state.step}: {state.values.keys()}")

# Branch from specific point
past_state = history[3]  # Step 3
new_config = graph.get_state(past_state.config)

API Reference

Graph Creation

truthfulness_evaluator.llm.workflows.graph.create_truthfulness_graph()

Create and compile the truthfulness evaluation graph.

Source code in src/truthfulness_evaluator/llm/workflows/graph.py
def create_truthfulness_graph():
    """Create and compile the truthfulness evaluation graph."""
    builder = StateGraph(TruthfulnessState)

    # Add nodes
    builder.add_node("extract_claims", extract_claims_node)
    builder.add_node("search_evidence", search_evidence_node)
    builder.add_node("verify_claim", verify_claim_node)
    builder.add_node("generate_report", generate_report_node)

    # Define edges
    builder.add_edge(START, "extract_claims")
    builder.add_edge("extract_claims", "search_evidence")
    builder.add_edge("search_evidence", "verify_claim")

    # Conditional: continue to next claim or generate report
    builder.add_conditional_edges(
        "verify_claim",
        should_continue,
        {"search_evidence": "search_evidence", "generate_report": "generate_report"},
    )

    builder.add_edge("generate_report", END)

    # Checkpointing for durability
    checkpointer = MemorySaver()

    return builder.compile(checkpointer=checkpointer)

State

truthfulness_evaluator.llm.workflows.state.WorkflowState

Bases: TypedDict

Unified state for all truthfulness evaluation workflows.

The core fields are used by every workflow. The extensions field provides a namespace for strategy-specific state (e.g., claim classifications for internal verification workflows).

Source code in src/truthfulness_evaluator/llm/workflows/state.py
class WorkflowState(TypedDict):
    """Unified state for all truthfulness evaluation workflows.

    The core fields are used by every workflow. The `extensions` field
    provides a namespace for strategy-specific state (e.g., claim
    classifications for internal verification workflows).
    """

    # Input
    document: str
    document_path: str

    # Core pipeline state
    claims: list[Claim]
    current_claim_index: int
    evidence_cache: dict[str, list[Evidence]]
    verifications: list[VerificationResult]

    # Output
    final_report: TruthfulnessReport | None

    # Strategy-specific state (open for extension, closed for modification)
    extensions: dict[str, Any]

Configuration

truthfulness_evaluator.core.config.EvaluatorConfig

Bases: BaseSettings

Configuration for the truthfulness evaluator.

Source code in src/truthfulness_evaluator/core/config.py
class EvaluatorConfig(BaseSettings):
    """Configuration for the truthfulness evaluator."""

    model_config = SettingsConfigDict(
        env_prefix="TRUTH_",
        env_file=".env",
        env_file_encoding="utf-8",
    )

    # Model configuration
    extraction_model: str = "gpt-4o-mini"
    verification_models: list[str] = ["gpt-4o", "claude-sonnet-4-5"]
    consensus_method: Literal["simple", "weighted", "ice"] = "weighted"
    confidence_threshold: float = 0.7

    # Search configuration
    enable_web_search: bool = True
    enable_filesystem_search: bool = True
    max_evidence_items: int = 5

    # ICE configuration
    ice_max_rounds: int = 3

    # Output configuration
    output_format: Literal["json", "markdown"] = "json"
    include_explanations: bool = True
    include_model_votes: bool = True

    # Human-in-the-loop
    enable_human_review: bool = False
    human_review_threshold: float = 0.6

    # API Keys (loaded from environment)
    openai_api_key: str = ""
    anthropic_api_key: str = ""

    def model_post_init(self, __context: Any) -> None:
        """Fallback to standard env vars if TRUTH_ prefix not used."""
        if not self.openai_api_key:
            self.openai_api_key = os.getenv("OPENAI_API_KEY", "")
        if not self.anthropic_api_key:
            self.anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "")

model_post_init(__context)

Fallback to standard env vars if TRUTH_ prefix not used.

Source code in src/truthfulness_evaluator/core/config.py
def model_post_init(self, __context: Any) -> None:
    """Fallback to standard env vars if TRUTH_ prefix not used."""
    if not self.openai_api_key:
        self.openai_api_key = os.getenv("OPENAI_API_KEY", "")
    if not self.anthropic_api_key:
        self.anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "")