Skip to content

Kickstart Snippets

Quickly start using Panel's chat components with popular LLM packages by copying and pasting one of these snippets. All of these examples support:

  • Streaming
  • Async
  • Memory

Mistralai

Demonstrates how to use MistralAI's Small API with Panel's ChatInterface.

Highlights:

  • Uses PasswordInput to set the API key, or uses the MISTRAL_API_KEY environment variable.
  • Runs pn.bind to update the MistralAsyncClient when the api_key changes and pn.state.cache to store the client.
  • Uses serialize to get chat history from the ChatInterface.
  • Uses yield to continuously concatenate the parts of the response.
Source code for mistralai_.py
"""
Demonstrates how to use MistralAI's Small API with Panel's ChatInterface.

Highlights:

- Uses `PasswordInput` to set the API key, or uses the `MISTRAL_API_KEY` environment variable.
- Runs `pn.bind` to update the `MistralAsyncClient` when the `api_key` changes and pn.state.cache to store the client.
- Uses `serialize` to get chat history from the `ChatInterface`.
- Uses `yield` to continuously concatenate the parts of the response.
"""

import os

import panel as pn
from mistralai import Mistral, UserMessage

pn.extension()


def update_api_key(api_key):
    # Use the provided api_key or default to the environment variable
    pn.state.cache["aclient"] = (
        Mistral(api_key=api_key)
        if api_key
        else Mistral(api_key=os.getenv("MISTRAL_API_KEY", ""))
    )


async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    # memory is a list of serialized messages
    messages = instance.serialize()

    # Convert serialized messages into UserMessage format
    formatted_messages = [UserMessage(content=msg["content"]) for msg in messages]

    response = await pn.state.cache["aclient"].chat.stream_async(
        model="mistral-small",
        messages=formatted_messages,
    )

    message = ""
    async for chunk in response:
        part = chunk.data.choices[0].delta.content
        if part is not None:
            message += part
            yield message


# Input widget for the API key
api_key_input = pn.widgets.PasswordInput(
    placeholder="Uses $MISTRAL_API_KEY if not set",
    sizing_mode="stretch_width",
    styles={"color": "black"},
)

# Bind the API key input to the update function
pn.bind(update_api_key, api_key_input, watch=True)
api_key_input.param.trigger("value")

# Define the Chat Interface with callback
chat_interface = pn.chat.ChatInterface(
    callback=callback,
    callback_user="MistralAI",
    help_text="Send a message to get a reply from MistralAI!",
    callback_exception="verbose",
)

# Template with the chat interface
template = pn.template.FastListTemplate(
    title="MistralAI Small",
    header_background="#FF7000",
    main=[chat_interface],
    header=[api_key_input],
)

# Serve the template
template.servable()

Llama Index

Demonstrates how to use LlamaIndex to wrap OpenAI's GPT-3.5 API with Panel's ChatInterface.

Highlights:

  • Uses PasswordInput to set the API key, or uses the OPENAI_API_KEY environment variable.
  • Uses serialize to get chat history from the ChatInterface.
  • Uses yield to continuously concatenate the parts of the response
Source code for llama_index_.py
"""
Demonstrates how to use LlamaIndex to wrap OpenAI's GPT-3.5 API with Panel's ChatInterface.

Highlights:

- Uses `PasswordInput` to set the API key, or uses the `OPENAI_API_KEY` environment variable.
- Uses `serialize` to get chat history from the `ChatInterface`.
- Uses `yield` to continuously concatenate the parts of the response
"""

import panel as pn
from llama_index.core.agent import ReActAgent
from llama_index.core.llms import ChatMessage
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI

pn.extension()


def multiply(a: int, b: int) -> int:
    """Multiple two integers and returns the result integer"""
    return a * b


async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    if api_key_input.value:
        # use api_key_input.value if set, otherwise use OPENAI_API_KEY
        llm.api_key = api_key_input.value

    # memory is a list of messages
    messages = [ChatMessage(**message) for message in instance.serialize()]

    response = await llm.astream_chat(
        model="gpt-3.5-turbo",
        messages=messages,
    )

    async for chunk in response:
        message = chunk.message.content
        yield str(message)


llm = OpenAI(model="gpt-3.5-turbo-0613")

multiply_tool = FunctionTool.from_defaults(fn=multiply)
agent = ReActAgent.from_tools([multiply_tool], llm=llm, verbose=True)

api_key_input = pn.widgets.PasswordInput(
    placeholder="sk-... uses $OPENAI_API_KEY if not set",
    sizing_mode="stretch_width",
    styles={"color": "black"},
)
chat_interface = pn.chat.ChatInterface(
    callback=callback,
    callback_user="GPT-3.5",
    help_text="Send a message to get a reply from GPT 3.5 Turbo!",
)
template = pn.template.FastListTemplate(
    title="LlamaIndex OpenAI GPT-3.5",
    header_background="#83CBF2",
    main=[chat_interface],
    header=[api_key_input],
)
template.servable()

Openai

Demonstrates how to use OpenAI's GPT-3.5 API with Panel's ChatInterface.

Highlights:

  • Uses PasswordInput to set the API key, or uses the OPENAI_API_KEY environment variable.
  • Uses serialize to get chat history from the ChatInterface.
  • Uses yield to continuously concatenate the parts of the response
Source code for openai_.py
"""
Demonstrates how to use OpenAI's GPT-3.5 API with Panel's ChatInterface.

Highlights:

- Uses `PasswordInput` to set the API key, or uses the `OPENAI_API_KEY` environment variable.
- Uses `serialize` to get chat history from the `ChatInterface`.
- Uses `yield` to continuously concatenate the parts of the response
"""

import panel as pn
from openai import AsyncOpenAI

pn.extension()


async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    if api_key_input.value:
        # use api_key_input.value if set, otherwise use OPENAI_API_KEY
        aclient.api_key = api_key_input.value

    # memory is a list of messages
    messages = instance.serialize()

    response = await aclient.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        stream=True,
    )

    message = ""
    async for chunk in response:
        part = chunk.choices[0].delta.content
        if part is not None:
            message += part
            yield message


aclient = AsyncOpenAI()
api_key_input = pn.widgets.PasswordInput(
    placeholder="sk-... uses $OPENAI_API_KEY if not set",
    sizing_mode="stretch_width",
    styles={"color": "black"},
)
chat_interface = pn.chat.ChatInterface(
    callback=callback,
    callback_user="GPT-3.5",
    help_text="Send a message to get a reply from GPT-3.5 Turbo!",
)
template = pn.template.FastListTemplate(
    title="OpenAI GPT-3.5",
    header_background="#212121",
    main=[chat_interface],
    header=[api_key_input],
)
template.servable()

Langchain

Demonstrates how to use LangChain to wrap OpenAI's GPT-3.5 API with Panel's ChatInterface.

Highlights:

  • Uses PasswordInput to set the API key, or uses the OPENAI_API_KEY environment variable.
  • Uses serialize to get chat history from the ChatInterface.
  • Uses yield to continuously concatenate the parts of the response
Source code for langchain_.py
"""
Demonstrates how to use LangChain to wrap OpenAI's GPT-3.5 API with Panel's ChatInterface.

Highlights:

- Uses `PasswordInput` to set the API key, or uses the `OPENAI_API_KEY` environment variable.
- Uses `serialize` to get chat history from the `ChatInterface`.
- Uses `yield` to continuously concatenate the parts of the response
"""

from operator import itemgetter

import panel as pn
from langchain.memory import ConversationTokenBufferMemory
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI

pn.extension()


async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    if api_key_input.value:
        # use api_key_input.value if set, otherwise use OPENAI_API_KEY
        llm.api_key = api_key_input.value

    memory.clear()
    for message in instance.serialize():
        if message["role"] == "user":
            memory.chat_memory.add_user_message(HumanMessage(**message))
        else:
            memory.chat_memory.add_ai_message(AIMessage(**message))

    response = chain.astream({"user_input": contents})

    message = ""
    async for chunk in response:
        message += chunk
        yield message


llm = ChatOpenAI(model="gpt-3.5-turbo")
memory = ConversationTokenBufferMemory(
    return_messages=True,
    llm=llm,
    memory_key="chat_history",
    max_token_limit=8192 - 1024,
)
memory_link = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables)
    | itemgetter("chat_history")
)
prompt_link = ChatPromptTemplate.from_template(
    "{chat_history}\n\nBe a helpful chat bot and answer: {user_input}",
)
output_parser = StrOutputParser()

chain = (
    {"user_input": RunnablePassthrough()}
    | memory_link
    | prompt_link
    | llm
    | output_parser
)

api_key_input = pn.widgets.PasswordInput(
    placeholder="sk-... uses $OPENAI_API_KEY if not set",
    sizing_mode="stretch_width",
    styles={"color": "black"},
)
chat_interface = pn.chat.ChatInterface(
    callback=callback,
    callback_user="GPT-3.5",
    help_text="Send a message to get a reply from GPT 3.5 Turbo!",
    callback_exception="verbose",
)
template = pn.template.FastListTemplate(
    title="LangChain OpenAI GPT-3.5",
    header_background="#E8B0E6",
    main=[chat_interface],
    header=[api_key_input],
)
template.servable()

Llama Cpp Python

Demonstrates how to use LlamaCpp with a local, quantized model, like TheBloke's Mistral Instruct v0.2, with Panel's ChatInterface.

Highlights:

  • Uses pn.state.onload to load the model from Hugging Face Hub when the app is loaded and prevent blocking the app.
  • Uses pn.state.cache to store the Llama instance.
  • Uses serialize to get chat history from the ChatInterface.
  • Uses yield to continuously concatenate the parts of the response.
Source code for llama_cpp_python_.py
"""
Demonstrates how to use LlamaCpp with a local, quantized model, like TheBloke's Mistral Instruct v0.2,
with Panel's ChatInterface.

Highlights:

- Uses `pn.state.onload` to load the model from Hugging Face Hub when the app is loaded and prevent blocking the app.
- Uses `pn.state.cache` to store the `Llama` instance.
- Uses `serialize` to get chat history from the `ChatInterface`.
- Uses `yield` to continuously concatenate the parts of the response.
"""

import panel as pn
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

REPO_ID = "TheBloke/Mistral-7B-Instruct-v0.2-code-ft-GGUF"
FILENAME = "mistral-7b-instruct-v0.2-code-ft.Q5_K_S.gguf"

pn.extension()


def load_model():
    model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
    pn.state.cache["llama"] = Llama(
        model_path=model_path,
        chat_format="mistral-instruct",
        verbose=False,
        n_gpu_layers=-1,
    )
    chat_interface.disabled = False


def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    # memory is a list of messages
    messages = instance.serialize()

    llama = pn.state.cache["llama"]
    response = llama.create_chat_completion_openai_v1(messages=messages, stream=True)

    message = ""
    for chunk in response:
        part = chunk.choices[0].delta.content
        if part is not None:
            message += part
            yield message


chat_interface = pn.chat.ChatInterface(
    callback=callback,
    callback_user="LlamaCpp",
    help_text="Send a message to get a reply from LlamaCpp!",
    disabled=True,
)
template = pn.template.FastListTemplate(
    title="LlamaCpp Mistral",
    header_background="#A0A0A0",
    main=[chat_interface],
)
pn.state.onload(load_model)
template.servable()