Skip to content

dspy.experimental.Document

dspy.experimental.Document

Bases: Type

A document type for providing content that can be cited by language models.

This type represents documents that can be passed to language models for citation-enabled responses, particularly useful with Anthropic's Citations API. Documents include the content and metadata that helps the LM understand and reference the source material.

Attributes:

Name Type Description
data str

The text content of the document

title str | None

Optional title for the document (used in citations)

media_type Literal['text/plain', 'application/pdf']

MIME type of the document content (defaults to "text/plain")

context str | None

Optional context information about the document

Example
import dspy
from dspy.signatures import Signature
from dspy.experimental import Document, Citations

class AnswerWithSources(Signature):
    '''Answer questions using provided documents with citations.'''
    documents: list[Document] = dspy.InputField()
    question: str = dspy.InputField()
    answer: str = dspy.OutputField()
    citations: Citations = dspy.OutputField()

# Create documents
docs = [
    Document(
        data="The Earth orbits the Sun in an elliptical path.",
        title="Basic Astronomy Facts"
    ),
    Document(
        data="Water boils at 100°C at standard atmospheric pressure.",
        title="Physics Fundamentals",
    )
]

# Use with a citation-supporting model
lm = dspy.LM("anthropic/claude-opus-4-1-20250805")
predictor = dspy.Predict(AnswerWithSources)
result = predictor(documents=docs, question="What temperature does water boil?", lm=lm)
print(result.citations)

Functions

description() -> str classmethod

Description of the document type for use in prompts.

Source code in dspy/adapters/types/document.py
@classmethod
def description(cls) -> str:
    """Description of the document type for use in prompts."""
    return (
        "A document containing text content that can be referenced and cited. "
        "Include the full text content and optionally a title for proper referencing."
    )

extract_custom_type_from_annotation(annotation) classmethod

Extract all custom types from the annotation.

This is used to extract all custom types from the annotation of a field, while the annotation can have arbitrary level of nesting. For example, we detect Tool is in list[dict[str, Tool]].

Source code in dspy/adapters/types/base_type.py
@classmethod
def extract_custom_type_from_annotation(cls, annotation):
    """Extract all custom types from the annotation.

    This is used to extract all custom types from the annotation of a field, while the annotation can
    have arbitrary level of nesting. For example, we detect `Tool` is in `list[dict[str, Tool]]`.
    """
    # Direct match. Nested type like `list[dict[str, Event]]` passes `isinstance(annotation, type)` in python 3.10
    # while fails in python 3.11. To accommodate users using python 3.10, we need to capture the error and ignore it.
    try:
        if isinstance(annotation, type) and issubclass(annotation, cls):
            return [annotation]
    except TypeError:
        pass

    origin = get_origin(annotation)
    if origin is None:
        return []

    result = []
    # Recurse into all type args
    for arg in get_args(annotation):
        result.extend(cls.extract_custom_type_from_annotation(arg))

    return result

format() -> list[dict[str, Any]]

Format document for LM consumption.

Returns:

Type Description
list[dict[str, Any]]

A list containing the document block in the format expected by citation-enabled language models.

Source code in dspy/adapters/types/document.py
def format(self) -> list[dict[str, Any]]:
    """Format document for LM consumption.

    Returns:
        A list containing the document block in the format expected by citation-enabled language models.
    """
    document_block = {
        "type": "document",
        "source": {
            "type": "text",
            "media_type": self.media_type,
            "data": self.data
        },
        "citations": {"enabled": True}
    }

    if self.title:
        document_block["title"] = self.title

    if self.context:
        document_block["context"] = self.context

    return [document_block]

serialize_model()

Source code in dspy/adapters/types/base_type.py
@pydantic.model_serializer()
def serialize_model(self):
    formatted = self.format()
    if isinstance(formatted, list):
        return f"{CUSTOM_TYPE_START_IDENTIFIER}{formatted}{CUSTOM_TYPE_END_IDENTIFIER}"
    return formatted

validate_input(data: Any) classmethod

Source code in dspy/adapters/types/document.py
@pydantic.model_validator(mode="before")
@classmethod
def validate_input(cls, data: Any):
    if isinstance(data, cls):
        return data

    # Handle case where data is just a string (data only)
    if isinstance(data, str):
        return {"data": data}

    # Handle case where data is a dict
    elif isinstance(data, dict):
        return data

    raise ValueError(f"Received invalid value for `Document`: {data}")

:::