Skip to content


dspy.LM(model: str, model_type: Literal['chat', 'text'] = 'chat', temperature: float = 0.0, max_tokens: int = 1000, cache: bool = True, cache_in_memory: bool = True, callbacks: Optional[List[BaseCallback]] = None, num_retries: int = 8, provider=None, finetuning_model: Optional[str] = None, launch_kwargs: Optional[dict[str, Any]] = None, train_kwargs: Optional[dict[str, Any]] = None, **kwargs)

Bases: BaseLM

A language model supporting chat or text completion requests for use with DSPy modules.

Create a new language model instance for use with DSPy modules and programs.


Name Type Description Default
model str

The model to use. This should be a string of the form "llm_provider/llm_name" supported by LiteLLM. For example, "openai/gpt-4o".

model_type Literal['chat', 'text']

The type of the model, either "chat" or "text".

temperature float

The sampling temperature to use when generating responses.

max_tokens int

The maximum number of tokens to generate per response.

cache bool

Whether to cache the model responses for reuse to improve performance and reduce costs.

cache_in_memory bool

To enable additional caching with LRU in memory.

callbacks Optional[List[BaseCallback]]

A list of callback functions to run before and after each request.

num_retries int

The number of times to retry a request if it fails transiently due to network error, rate limiting, etc. Requests are retried with exponential backoff.


The provider to use. If not specified, the provider will be inferred from the model.

finetuning_model Optional[str]

The model to finetune. In some providers, the models available for finetuning is different from the models available for inference.

Source code in dspy/clients/
def __init__(
    model: str,
    model_type: Literal["chat", "text"] = "chat",
    temperature: float = 0.0,
    max_tokens: int = 1000,
    cache: bool = True,
    cache_in_memory: bool = True,
    callbacks: Optional[List[BaseCallback]] = None,
    num_retries: int = 8,
    finetuning_model: Optional[str] = None,
    launch_kwargs: Optional[dict[str, Any]] = None,
    train_kwargs: Optional[dict[str, Any]] = None,
    Create a new language model instance for use with DSPy modules and programs.

        model: The model to use. This should be a string of the form ``"llm_provider/llm_name"``
               supported by LiteLLM. For example, ``"openai/gpt-4o"``.
        model_type: The type of the model, either ``"chat"`` or ``"text"``.
        temperature: The sampling temperature to use when generating responses.
        max_tokens: The maximum number of tokens to generate per response.
        cache: Whether to cache the model responses for reuse to improve performance
               and reduce costs.
        cache_in_memory: To enable additional caching with LRU in memory.
        callbacks: A list of callback functions to run before and after each request.
        num_retries: The number of times to retry a request if it fails transiently due to
                     network error, rate limiting, etc. Requests are retried with exponential
        provider: The provider to use. If not specified, the provider will be inferred from the model.
        finetuning_model: The model to finetune. In some providers, the models available for finetuning is different
            from the models available for inference.
    # Remember to update LM.copy() if you modify the constructor!
    self.model = model
    self.model_type = model_type
    self.cache = cache
    self.cache_in_memory = cache_in_memory
    self.provider = provider or self.infer_provider()
    self.callbacks = callbacks or []
    self.history = []
    self.callbacks = callbacks or []
    self.num_retries = num_retries
    self.finetuning_model = finetuning_model
    self.launch_kwargs = launch_kwargs or {}
    self.train_kwargs = train_kwargs or {}

    # Handle model-specific configuration for different model families
    model_family = model.split("/")[-1].lower() if "/" in model else model.lower()

    # Match pattern: o[1,3] at the start, optionally followed by -mini and anything else
    model_pattern = re.match(r"^o([13])(?:-mini)?", model_family)

    if model_pattern:
        # Handle OpenAI reasoning models (o1, o3)
        assert (
            max_tokens >= 5000 and temperature == 1.0
        ), "OpenAI's reasoning models require passing temperature=1.0 and max_tokens >= 5000 to `dspy.LM(...)`"
        self.kwargs = dict(temperature=temperature, max_completion_tokens=max_tokens, **kwargs)
        self.kwargs = dict(temperature=temperature, max_tokens=max_tokens, **kwargs)


__call__(prompt=None, messages=None, **kwargs)

Source code in dspy/clients/
def __call__(self, prompt=None, messages=None, **kwargs):
    # Build the request.
    cache = kwargs.pop("cache", self.cache)
    # disable cache will also disable in memory cache
    cache_in_memory = cache and kwargs.pop("cache_in_memory", self.cache_in_memory)
    messages = messages or [{"role": "user", "content": prompt}]
    kwargs = {**self.kwargs, **kwargs}

    # Make the request and handle LRU & disk caching.
    if cache_in_memory:
        completion = cached_litellm_completion if self.model_type == "chat" else cached_litellm_text_completion

        response = completion(
            request=dict(model=self.model, messages=messages, **kwargs),
        completion = litellm_completion if self.model_type == "chat" else litellm_text_completion

        response = completion(
            request=dict(model=self.model, messages=messages, **kwargs),
            # only leverage LiteLLM cache in this case
            cache={"no-cache": not cache, "no-store": not cache},

    if kwargs.get("logprobs"):
        outputs = [
                "text": c.message.content if hasattr(c, "message") else c["text"],
                "logprobs": c.logprobs if hasattr(c, "logprobs") else c["logprobs"],
            for c in response["choices"]
        outputs = [c.message.content if hasattr(c, "message") else c["text"] for c in response["choices"]]

    if dspy.settings.disable_history:
        return outputs

    # Logging, with removed api key & where `cost` is None on cache hit.
    kwargs = {k: v for k, v in kwargs.items() if not k.startswith("api_")}
    entry = dict(prompt=prompt, messages=messages, kwargs=kwargs, response=response)
    entry = dict(**entry, outputs=outputs, usage=dict(response["usage"]))
    entry = dict(**entry, cost=response.get("_hidden_params", {}).get("response_cost"))
    entry = dict(

    return outputs


Returns a copy of the language model with possibly updated parameters.

Source code in dspy/clients/
def copy(self, **kwargs):
    """Returns a copy of the language model with possibly updated parameters."""

    import copy

    new_instance = copy.deepcopy(self)
    new_instance.history = []

    for key, value in kwargs.items():
        if hasattr(self, key):
            setattr(new_instance, key, value)
        if (key in self.kwargs) or (not hasattr(self, key)):
            new_instance.kwargs[key] = value

    return new_instance


Source code in dspy/clients/
def dump_state(self):
    state_keys = ["model", "model_type", "cache", "cache_in_memory", "num_retries", "finetuning_model", "launch_kwargs", "train_kwargs"]
    return { key: getattr(self, key) for key in state_keys } | self.kwargs

finetune(train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = None) -> TrainingJob

Source code in dspy/clients/
def finetune(
    train_data: List[Dict[str, Any]],
    train_data_format: Optional[TrainDataFormat],
    train_kwargs: Optional[Dict[str, Any]] = None,
) -> TrainingJob:
    from dspy import settings as settings

    err = "Fine-tuning is an experimental feature."
    err += " Set `dspy.settings.experimental` to `True` to use it."
    assert settings.experimental, err

    err = f"Provider {self.provider} does not support fine-tuning."
    assert self.provider.finetunable, err

    def thread_function_wrapper():
        return self._run_finetune_job(job)

    thread = threading.Thread(target=thread_function_wrapper)
    train_kwargs = train_kwargs or self.train_kwargs
    model_to_finetune = self.finetuning_model or self.model 
    job = self.provider.TrainingJob(

    return job

infer_adapter() -> Adapter

Source code in dspy/clients/
def infer_adapter(self) -> Adapter:
    import dspy

    if dspy.settings.adapter:
        return dspy.settings.adapter

    model_type_to_adapter = {
        "chat": dspy.ChatAdapter(),
    model_type = self.model_type
    return model_type_to_adapter[model_type]

infer_provider() -> Provider

Source code in dspy/clients/
def infer_provider(self) -> Provider:
    if OpenAIProvider.is_provider_model(self.model):
        return OpenAIProvider()
    # TODO(PR): Keeping this function here will require us to import all
    # providers in this file. Is this okay?
    return Provider()

kill(launch_kwargs: Optional[Dict[str, Any]] = None)

Source code in dspy/clients/
def kill(self, launch_kwargs: Optional[Dict[str, Any]] = None):
    self.provider.kill(self, launch_kwargs)

launch(launch_kwargs: Optional[Dict[str, Any]] = None)

Source code in dspy/clients/
def launch(self, launch_kwargs: Optional[Dict[str, Any]] = None):
    self.provider.launch(self, launch_kwargs)