Skip to content

dspy.LM

dspy.LM(model: str, model_type: Literal['chat', 'text'] = 'chat', temperature: float = 0.0, max_tokens: int = 1000, cache: bool = True, cache_in_memory: bool = True, callbacks: Optional[List[BaseCallback]] = None, num_retries: int = 8, provider=None, finetuning_model: Optional[str] = None, launch_kwargs: Optional[dict[str, Any]] = None, **kwargs)

Bases: BaseLM

A language model supporting chat or text completion requests for use with DSPy modules.

Create a new language model instance for use with DSPy modules and programs.

Parameters:

Name Type Description Default
model str

The model to use. This should be a string of the form "llm_provider/llm_name" supported by LiteLLM. For example, "openai/gpt-4o".

required
model_type Literal['chat', 'text']

The type of the model, either "chat" or "text".

'chat'
temperature float

The sampling temperature to use when generating responses.

0.0
max_tokens int

The maximum number of tokens to generate per response.

1000
cache bool

Whether to cache the model responses for reuse to improve performance and reduce costs.

True
cache_in_memory bool

To enable additional caching with LRU in memory.

True
callbacks Optional[List[BaseCallback]]

A list of callback functions to run before and after each request.

None
num_retries int

The number of times to retry a request if it fails transiently due to network error, rate limiting, etc. Requests are retried with exponential backoff.

8
provider

The provider to use. If not specified, the provider will be inferred from the model.

None
finetuning_model Optional[str]

The model to finetune. In some providers, the models available for finetuning is different from the models available for inference.

None
Source code in dspy/clients/lm.py
def __init__(
    self,
    model: str,
    model_type: Literal["chat", "text"] = "chat",
    temperature: float = 0.0,
    max_tokens: int = 1000,
    cache: bool = True,
    cache_in_memory: bool = True,
    callbacks: Optional[List[BaseCallback]] = None,
    num_retries: int = 8,
    provider=None,
    finetuning_model: Optional[str] = None,
    launch_kwargs: Optional[dict[str, Any]] = None,
    **kwargs,
):
    """
    Create a new language model instance for use with DSPy modules and programs.

    Args:
        model: The model to use. This should be a string of the form ``"llm_provider/llm_name"``
               supported by LiteLLM. For example, ``"openai/gpt-4o"``.
        model_type: The type of the model, either ``"chat"`` or ``"text"``.
        temperature: The sampling temperature to use when generating responses.
        max_tokens: The maximum number of tokens to generate per response.
        cache: Whether to cache the model responses for reuse to improve performance
               and reduce costs.
        cache_in_memory: To enable additional caching with LRU in memory.
        callbacks: A list of callback functions to run before and after each request.
        num_retries: The number of times to retry a request if it fails transiently due to
                     network error, rate limiting, etc. Requests are retried with exponential
                     backoff.
        provider: The provider to use. If not specified, the provider will be inferred from the model.
        finetuning_model: The model to finetune. In some providers, the models available for finetuning is different
            from the models available for inference.
    """
    # Remember to update LM.copy() if you modify the constructor!
    self.model = model
    self.model_type = model_type
    self.cache = cache
    self.cache_in_memory = cache_in_memory
    self.provider = provider or self.infer_provider()
    self.callbacks = callbacks or []
    self.history = []
    self.callbacks = callbacks or []
    self.num_retries = num_retries
    self.finetuning_model = finetuning_model
    self.launch_kwargs = launch_kwargs

    # Handle model-specific configuration for different model families
    model_family = model.split("/")[-1].lower() if "/" in model else model.lower()

    # Match pattern: o[1,3] at the start, optionally followed by -mini and anything else
    model_pattern = re.match(r"^o([13])(?:-mini)?", model_family)

    if model_pattern:
        # Handle OpenAI reasoning models (o1, o3)
        assert (
            max_tokens >= 5000 and temperature == 1.0
        ), "OpenAI's reasoning models require passing temperature=1.0 and max_tokens >= 5000 to `dspy.LM(...)`"
        self.kwargs = dict(temperature=temperature, max_completion_tokens=max_tokens, **kwargs)
    else:
        self.kwargs = dict(temperature=temperature, max_tokens=max_tokens, **kwargs)

Functions

__call__(prompt=None, messages=None, **kwargs)

Source code in dspy/clients/lm.py
@with_callbacks
def __call__(self, prompt=None, messages=None, **kwargs):
    # Build the request.
    cache = kwargs.pop("cache", self.cache)
    # disable cache will also disable in memory cache
    cache_in_memory = cache and kwargs.pop("cache_in_memory", self.cache_in_memory)
    messages = messages or [{"role": "user", "content": prompt}]
    kwargs = {**self.kwargs, **kwargs}

    # Make the request and handle LRU & disk caching.
    if cache_in_memory:
        completion = cached_litellm_completion if self.model_type == "chat" else cached_litellm_text_completion

        response = completion(
            request=dict(model=self.model, messages=messages, **kwargs),
            num_retries=self.num_retries,
        )
    else:
        completion = litellm_completion if self.model_type == "chat" else litellm_text_completion

        response = completion(
            request=dict(model=self.model, messages=messages, **kwargs),
            num_retries=self.num_retries,
            # only leverage LiteLLM cache in this case
            cache={"no-cache": not cache, "no-store": not cache},
        )

    if kwargs.get("logprobs"):
        outputs = [
            {
                "text": c.message.content if hasattr(c, "message") else c["text"],
                "logprobs": c.logprobs if hasattr(c, "logprobs") else c["logprobs"],
            }
            for c in response["choices"]
        ]
    else:
        outputs = [c.message.content if hasattr(c, "message") else c["text"] for c in response["choices"]]

    if dspy.settings.disable_history:
        return outputs

    # Logging, with removed api key & where `cost` is None on cache hit.
    kwargs = {k: v for k, v in kwargs.items() if not k.startswith("api_")}
    entry = dict(prompt=prompt, messages=messages, kwargs=kwargs, response=response)
    entry = dict(**entry, outputs=outputs, usage=dict(response["usage"]))
    entry = dict(**entry, cost=response.get("_hidden_params", {}).get("response_cost"))
    entry = dict(
        **entry,
        timestamp=datetime.now().isoformat(),
        uuid=str(uuid.uuid4()),
        model=self.model,
        response_model=response["model"],
        model_type=self.model_type,
    )
    self.history.append(entry)
    self.update_global_history(entry)

    return outputs

copy(**kwargs)

Returns a copy of the language model with possibly updated parameters.

Source code in dspy/clients/lm.py
def copy(self, **kwargs):
    """Returns a copy of the language model with possibly updated parameters."""

    import copy

    new_instance = copy.deepcopy(self)
    new_instance.history = []

    for key, value in kwargs.items():
        if hasattr(self, key):
            setattr(new_instance, key, value)
        if (key in self.kwargs) or (not hasattr(self, key)):
            new_instance.kwargs[key] = value

    return new_instance

finetune(train_data: List[Dict[str, Any]], train_kwargs: Optional[Dict[str, Any]] = None, data_format: Optional[DataFormat] = None) -> TrainingJob

Source code in dspy/clients/lm.py
def finetune(
    self,
    train_data: List[Dict[str, Any]],
    train_kwargs: Optional[Dict[str, Any]] = None,
    data_format: Optional[DataFormat] = None,
) -> TrainingJob:
    from dspy import settings as settings

    err = "Fine-tuning is an experimental feature."
    err += " Set `dspy.settings.experimental` to `True` to use it."
    assert settings.experimental, err

    err = f"Provider {self.provider} does not support fine-tuning."
    assert self.provider.finetunable, err

    # Perform data validation before starting the thread to fail early
    train_kwargs = train_kwargs or {}
    if not data_format:
        adapter = self.infer_adapter()
        data_format = infer_data_format(adapter)
    validate_data_format(data=train_data, data_format=data_format)

    # TODO(PR): We can quickly add caching, but doing so requires
    # adding functions that just call other functions as we had in the last
    # iteration, unless people have other ideas.
    def thread_function_wrapper():
        return self._run_finetune_job(job)

    thread = threading.Thread(target=thread_function_wrapper)
    model_to_finetune = self.finetuning_model or self.model
    job = self.provider.TrainingJob(
        thread=thread,
        model=model_to_finetune,
        train_data=train_data,
        train_kwargs=train_kwargs,
        data_format=data_format,
    )
    thread.start()

    return job

infer_adapter() -> Adapter

Source code in dspy/clients/lm.py
def infer_adapter(self) -> Adapter:
    import dspy

    if dspy.settings.adapter:
        return dspy.settings.adapter

    model_type_to_adapter = {
        "chat": dspy.ChatAdapter(),
    }
    model_type = self.model_type
    return model_type_to_adapter[model_type]

infer_provider() -> Provider

Source code in dspy/clients/lm.py
def infer_provider(self) -> Provider:
    if OpenAIProvider.is_provider_model(self.model):
        return OpenAIProvider()
    # TODO(PR): Keeping this function here will require us to import all
    # providers in this file. Is this okay?
    return Provider()

kill(launch_kwargs: Optional[Dict[str, Any]] = None)

Source code in dspy/clients/lm.py
def kill(self, launch_kwargs: Optional[Dict[str, Any]] = None):
    launch_kwargs = launch_kwargs or self.launch_kwargs
    self.provider.kill(self, launch_kwargs)

launch(launch_kwargs: Optional[Dict[str, Any]] = None)

Source code in dspy/clients/lm.py
def launch(self, launch_kwargs: Optional[Dict[str, Any]] = None):
    launch_kwargs = launch_kwargs or self.launch_kwargs
    self.provider.launch(self, launch_kwargs)