Source code for langchain_core.output_parsers.pydantic
import json
from typing import Generic, List, Type
import pydantic # pydantic: ignore
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.outputs import Generation
from langchain_core.utils.pydantic import (
PYDANTIC_MAJOR_VERSION,
PydanticBaseModel,
TBaseModel,
)
[docs]
class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
"""Parse an output using a pydantic model."""
pydantic_object: Type[TBaseModel] # type: ignore
"""The pydantic model to parse."""
def _parse_obj(self, obj: dict) -> TBaseModel:
if PYDANTIC_MAJOR_VERSION == 2:
try:
if issubclass(self.pydantic_object, pydantic.BaseModel):
return self.pydantic_object.model_validate(obj)
elif issubclass(self.pydantic_object, pydantic.v1.BaseModel):
return self.pydantic_object.parse_obj(obj)
else:
raise OutputParserException(
f"Unsupported model version for PydanticOutputParser: \
{self.pydantic_object.__class__}"
)
except (pydantic.ValidationError, pydantic.v1.ValidationError) as e:
raise self._parser_exception(e, obj)
else: # pydantic v1
try:
return self.pydantic_object.parse_obj(obj)
except pydantic.ValidationError as e:
raise self._parser_exception(e, obj)
def _parser_exception(
self, e: Exception, json_object: dict
) -> OutputParserException:
json_string = json.dumps(json_object)
name = self.pydantic_object.__name__
msg = f"Failed to parse {name} from completion {json_string}. Got: {e}"
return OutputParserException(msg, llm_output=json_string)
[docs]
def parse_result(
self, result: List[Generation], *, partial: bool = False
) -> TBaseModel:
"""Parse the result of an LLM call to a pydantic object.
Args:
result: The result of the LLM call.
partial: Whether to parse partial JSON objects.
If True, the output will be a JSON object containing
all the keys that have been returned so far.
Defaults to False.
Returns:
The parsed pydantic object.
"""
json_object = super().parse_result(result)
return self._parse_obj(json_object)
[docs]
def parse(self, text: str) -> TBaseModel:
"""Parse the output of an LLM call to a pydantic object.
Args:
text: The output of the LLM call.
Returns:
The parsed pydantic object.
"""
return super().parse(text)
@property
def _type(self) -> str:
return "pydantic"
@property
def OutputType(self) -> Type[TBaseModel]:
"""Return the pydantic model."""
return self.pydantic_object
_PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
Here is the output schema:
```
{schema}
```""" # noqa: E501
# Re-exporting types for backwards compatibility
__all__ = [
"PydanticBaseModel",
"PydanticOutputParser",
"TBaseModel",
]