Skip to content

dspy.LM

dspy.LM(model: str, model_type: Literal['chat', 'text', 'responses'] = 'chat', temperature: float = 0.0, max_tokens: int = 4000, cache: bool = True, callbacks: list[BaseCallback] | None = None, num_retries: int = 3, provider: Provider | None = None, finetuning_model: str | None = None, launch_kwargs: dict[str, Any] | None = None, train_kwargs: dict[str, Any] | None = None, use_developer_role: bool = False, **kwargs)

Bases: BaseLM

A language model supporting chat or text completion requests for use with DSPy modules.

Create a new language model instance for use with DSPy modules and programs.

Parameters:

Name Type Description Default
model str

The model to use. This should be a string of the form "llm_provider/llm_name" supported by LiteLLM. For example, "openai/gpt-4o".

required
model_type Literal['chat', 'text', 'responses']

The type of the model, either "chat" or "text".

'chat'
temperature float

The sampling temperature to use when generating responses.

0.0
max_tokens int

The maximum number of tokens to generate per response.

4000
cache bool

Whether to cache the model responses for reuse to improve performance and reduce costs.

True
callbacks list[BaseCallback] | None

A list of callback functions to run before and after each request.

None
num_retries int

The number of times to retry a request if it fails transiently due to network error, rate limiting, etc. Requests are retried with exponential backoff.

3
provider Provider | None

The provider to use. If not specified, the provider will be inferred from the model.

None
finetuning_model str | None

The model to finetune. In some providers, the models available for finetuning is different from the models available for inference.

None
rollout_id

Optional integer used to differentiate cache entries for otherwise identical requests. Different values bypass DSPy's caches while still caching future calls with the same inputs and rollout ID. Note that rollout_id only affects generation when temperature is non-zero. This argument is stripped before sending requests to the provider.

required
Source code in dspy/clients/lm.py
def __init__(
    self,
    model: str,
    model_type: Literal["chat", "text", "responses"] = "chat",
    temperature: float = 0.0,
    max_tokens: int = 4000,
    cache: bool = True,
    callbacks: list[BaseCallback] | None = None,
    num_retries: int = 3,
    provider: Provider | None = None,
    finetuning_model: str | None = None,
    launch_kwargs: dict[str, Any] | None = None,
    train_kwargs: dict[str, Any] | None = None,
    use_developer_role: bool = False,
    **kwargs,
):
    """
    Create a new language model instance for use with DSPy modules and programs.

    Args:
        model: The model to use. This should be a string of the form ``"llm_provider/llm_name"``
               supported by LiteLLM. For example, ``"openai/gpt-4o"``.
        model_type: The type of the model, either ``"chat"`` or ``"text"``.
        temperature: The sampling temperature to use when generating responses.
        max_tokens: The maximum number of tokens to generate per response.
        cache: Whether to cache the model responses for reuse to improve performance
               and reduce costs.
        callbacks: A list of callback functions to run before and after each request.
        num_retries: The number of times to retry a request if it fails transiently due to
                     network error, rate limiting, etc. Requests are retried with exponential
                     backoff.
        provider: The provider to use. If not specified, the provider will be inferred from the model.
        finetuning_model: The model to finetune. In some providers, the models available for finetuning is different
            from the models available for inference.
        rollout_id: Optional integer used to differentiate cache entries for otherwise
            identical requests. Different values bypass DSPy's caches while still caching
            future calls with the same inputs and rollout ID. Note that `rollout_id`
            only affects generation when `temperature` is non-zero. This argument is
            stripped before sending requests to the provider.
    """
    # Remember to update LM.copy() if you modify the constructor!
    self.model = model
    self.model_type = model_type
    self.cache = cache
    self.provider = provider or self.infer_provider()
    self.callbacks = callbacks or []
    self.history = []
    self.num_retries = num_retries
    self.finetuning_model = finetuning_model
    self.launch_kwargs = launch_kwargs or {}
    self.train_kwargs = train_kwargs or {}
    self.use_developer_role = use_developer_role
    self._warned_zero_temp_rollout = False

    # Handle model-specific configuration for different model families
    model_family = model.split("/")[-1].lower() if "/" in model else model.lower()

    # Recognize OpenAI reasoning models (o1, o3, o4, gpt-5 family)
    model_pattern = re.match(r"^(?:o[1345]|gpt-5)(?:-(?:mini|nano))?", model_family)

    if model_pattern:
        if max_tokens < 16000 or temperature != 1.0:
            raise ValueError(
                "OpenAI's reasoning models require passing temperature=1.0 and max_tokens >= 16000 to "
                "`dspy.LM(...)`, e.g., dspy.LM('openai/gpt-5', temperature=1.0, max_tokens=16000)"
            )
        self.kwargs = dict(temperature=temperature, max_completion_tokens=max_tokens, **kwargs)
        if self.kwargs.get("rollout_id") is None:
            self.kwargs.pop("rollout_id", None)
    else:
        self.kwargs = dict(temperature=temperature, max_tokens=max_tokens, **kwargs)
        if self.kwargs.get("rollout_id") is None:
            self.kwargs.pop("rollout_id", None)

    self._warn_zero_temp_rollout(self.kwargs.get("temperature"), self.kwargs.get("rollout_id"))

Functions

__call__(prompt=None, messages=None, **kwargs)

Source code in dspy/clients/base_lm.py
@with_callbacks
def __call__(self, prompt=None, messages=None, **kwargs):
    response = self.forward(prompt=prompt, messages=messages, **kwargs)
    outputs = self._process_lm_response(response, prompt, messages, **kwargs)

    return outputs

acall(prompt=None, messages=None, **kwargs) async

Source code in dspy/clients/base_lm.py
@with_callbacks
async def acall(self, prompt=None, messages=None, **kwargs):
    response = await self.aforward(prompt=prompt, messages=messages, **kwargs)
    outputs = self._process_lm_response(response, prompt, messages, **kwargs)
    return outputs

aforward(prompt=None, messages=None, **kwargs) async

Source code in dspy/clients/lm.py
async def aforward(self, prompt=None, messages=None, **kwargs):
    # Build the request.
    kwargs = dict(kwargs)
    cache = kwargs.pop("cache", self.cache)

    messages = messages or [{"role": "user", "content": prompt}]
    if self.use_developer_role and self.model_type == "responses":
        messages = [
            {**m, "role": "developer"} if m.get("role") == "system" else m
            for m in messages
        ]
    kwargs = {**self.kwargs, **kwargs}
    self._warn_zero_temp_rollout(kwargs.get("temperature"), kwargs.get("rollout_id"))
    if kwargs.get("rollout_id") is None:
        kwargs.pop("rollout_id", None)

    if self.model_type == "chat":
        completion = alitellm_completion
    elif self.model_type == "text":
        completion = alitellm_text_completion
    elif self.model_type == "responses":
        completion = alitellm_responses_completion
    completion, litellm_cache_args = self._get_cached_completion_fn(completion, cache)

    results = await completion(
        request=dict(model=self.model, messages=messages, **kwargs),
        num_retries=self.num_retries,
        cache=litellm_cache_args,
    )

    self._check_truncation(results)

    if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
        settings.usage_tracker.add_usage(self.model, dict(results.usage))
    return results

copy(**kwargs)

Returns a copy of the language model with possibly updated parameters.

Any provided keyword arguments update the corresponding attributes or LM kwargs of the copy. For example, lm.copy(rollout_id=1, temperature=1.0) returns an LM whose requests use a different rollout ID at non-zero temperature to bypass cache collisions.

Source code in dspy/clients/base_lm.py
def copy(self, **kwargs):
    """Returns a copy of the language model with possibly updated parameters.

    Any provided keyword arguments update the corresponding attributes or LM kwargs of
    the copy. For example, ``lm.copy(rollout_id=1, temperature=1.0)`` returns an LM whose
    requests use a different rollout ID at non-zero temperature to bypass cache collisions.
    """

    import copy

    new_instance = copy.deepcopy(self)
    new_instance.history = []

    for key, value in kwargs.items():
        if hasattr(self, key):
            setattr(new_instance, key, value)
        if (key in self.kwargs) or (not hasattr(self, key)):
            if value is None:
                new_instance.kwargs.pop(key, None)
            else:
                new_instance.kwargs[key] = value
    if hasattr(new_instance, "_warned_zero_temp_rollout"):
        new_instance._warned_zero_temp_rollout = False

    return new_instance

dump_state()

Source code in dspy/clients/lm.py
def dump_state(self):
    state_keys = [
        "model",
        "model_type",
        "cache",
        "num_retries",
        "finetuning_model",
        "launch_kwargs",
        "train_kwargs",
    ]
    return {key: getattr(self, key) for key in state_keys} | self.kwargs

finetune(train_data: list[dict[str, Any]], train_data_format: TrainDataFormat | None, train_kwargs: dict[str, Any] | None = None) -> TrainingJob

Source code in dspy/clients/lm.py
def finetune(
    self,
    train_data: list[dict[str, Any]],
    train_data_format: TrainDataFormat | None,
    train_kwargs: dict[str, Any] | None = None,
) -> TrainingJob:
    from dspy import settings as settings

    if not self.provider.finetunable:
        raise ValueError(
            f"Provider {self.provider} does not support fine-tuning, please specify your provider by explicitly "
            "setting `provider` when creating the `dspy.LM` instance. For example, "
            "`dspy.LM('openai/gpt-4.1-mini-2025-04-14', provider=dspy.OpenAIProvider())`."
        )

    def thread_function_wrapper():
        return self._run_finetune_job(job)

    thread = threading.Thread(target=thread_function_wrapper)
    train_kwargs = train_kwargs or self.train_kwargs
    model_to_finetune = self.finetuning_model or self.model
    job = self.provider.TrainingJob(
        thread=thread,
        model=model_to_finetune,
        train_data=train_data,
        train_data_format=train_data_format,
        train_kwargs=train_kwargs,
    )
    thread.start()

    return job

forward(prompt=None, messages=None, **kwargs)

Source code in dspy/clients/lm.py
def forward(self, prompt=None, messages=None, **kwargs):
    # Build the request.
    kwargs = dict(kwargs)
    cache = kwargs.pop("cache", self.cache)

    messages = messages or [{"role": "user", "content": prompt}]
    if self.use_developer_role and self.model_type == "responses":
        messages = [
            {**m, "role": "developer"} if m.get("role") == "system" else m
            for m in messages
        ]
    kwargs = {**self.kwargs, **kwargs}
    self._warn_zero_temp_rollout(kwargs.get("temperature"), kwargs.get("rollout_id"))
    if kwargs.get("rollout_id") is None:
        kwargs.pop("rollout_id", None)

    if self.model_type == "chat":
        completion = litellm_completion
    elif self.model_type == "text":
        completion = litellm_text_completion
    elif self.model_type == "responses":
        completion = litellm_responses_completion
    completion, litellm_cache_args = self._get_cached_completion_fn(completion, cache)

    results = completion(
        request=dict(model=self.model, messages=messages, **kwargs),
        num_retries=self.num_retries,
        cache=litellm_cache_args,
    )

    self._check_truncation(results)

    if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
        settings.usage_tracker.add_usage(self.model, dict(results.usage))
    return results

infer_provider() -> Provider

Source code in dspy/clients/lm.py
def infer_provider(self) -> Provider:
    if OpenAIProvider.is_provider_model(self.model):
        return OpenAIProvider()
    return Provider()

inspect_history(n: int = 1)

Source code in dspy/clients/base_lm.py
def inspect_history(self, n: int = 1):
    return pretty_print_history(self.history, n)

kill(launch_kwargs: dict[str, Any] | None = None)

Source code in dspy/clients/lm.py
def kill(self, launch_kwargs: dict[str, Any] | None = None):
    self.provider.kill(self, launch_kwargs)

launch(launch_kwargs: dict[str, Any] | None = None)

Source code in dspy/clients/lm.py
def launch(self, launch_kwargs: dict[str, Any] | None = None):
    self.provider.launch(self, launch_kwargs)

reinforce(train_kwargs) -> ReinforceJob

Source code in dspy/clients/lm.py
def reinforce(self, train_kwargs) -> ReinforceJob:
    # TODO(GRPO Team): Should we return an initialized job here?
    from dspy import settings as settings

    err = f"Provider {self.provider} does not implement the reinforcement learning interface."
    assert self.provider.reinforceable, err

    job = self.provider.ReinforceJob(lm=self, train_kwargs=train_kwargs)
    job.initialize()
    return job

update_history(entry)

Source code in dspy/clients/base_lm.py
def update_history(self, entry):
    if settings.disable_history:
        return

    # Global LM history
    if len(GLOBAL_HISTORY) >= MAX_HISTORY_SIZE:
        GLOBAL_HISTORY.pop(0)

    GLOBAL_HISTORY.append(entry)

    if settings.max_history_size == 0:
        return

    # dspy.LM.history
    if len(self.history) >= settings.max_history_size:
        self.history.pop(0)

    self.history.append(entry)

    # Per-module history
    caller_modules = settings.caller_modules or []
    for module in caller_modules:
        if len(module.history) >= settings.max_history_size:
            module.history.pop(0)
        module.history.append(entry)

:::