GoogleLLMService provides integration with Google’s Gemini models, supporting streaming responses, function calling, and multimodal inputs. It includes specialized context handling for Google’s message format while maintaining compatibility with OpenAI-style contexts.
Google Gemini’s search grounding feature enables real-time web search integration, allowing the model to access current information and provide citations. This is particularly valuable for applications requiring up-to-date information.
import osfrom pipecat.services.google.llm import GoogleLLMServicefrom pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextfrom pipecat.adapters.schemas.function_schema import FunctionSchemafrom pipecat.adapters.schemas.tools_schema import ToolsSchema# Configure Gemini service with search groundingsearch_tool = { "google_search_retrieval": { "dynamic_retrieval_config": { "mode": "MODE_DYNAMIC", "dynamic_threshold": 0.3 } }}llm = GoogleLLMService( api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash", system_instruction="""You are a helpful assistant with access to current information. When users ask about recent events, use search to provide accurate, up-to-date information.""", tools=[search_tool], params=GoogleLLMService.InputParams( temperature=0.7, max_tokens=1000 ))# Define function for tool callingweather_function = FunctionSchema( name="get_weather", description="Get current weather information", properties={ "location": { "type": "string", "description": "City and state, e.g. San Francisco, CA" } }, required=["location"])# Define image capture function for multimodal capabilitiesimage_function = FunctionSchema( name="get_image", description="Capture and analyze an image from the video stream", properties={ "question": { "type": "string", "description": "Question about what to analyze in the image" } }, required=["question"])tools = ToolsSchema(standard_tools=[weather_function, image_function])# Create context with multimodal system promptcontext = OpenAILLMContext( messages=[ { "role": "system", "content": """You are a helpful assistant with access to current information and vision capabilities. You can answer questions about weather, analyze images from video streams, and search for current information. Keep responses concise for voice output.""" }, {"role": "user", "content": "Hello! What can you help me with?"} ], tools=tools)# Create context aggregatorscontext_aggregator = llm.create_context_aggregator(context)# Register function handlersasync def get_weather(params): location = params.arguments["location"] await params.result_callback(f"Weather in {location}: 72°F and sunny")async def get_image(params): question = params.arguments["question"] # Request image from video stream await params.llm.request_image_frame( user_id=client_id, function_name=params.function_name, tool_call_id=params.tool_call_id, text_content=question ) await params.result_callback(f"Analyzing image for: {question}")llm.register_function("get_weather", get_weather)llm.register_function("get_image", get_image)# Optional: Add function call feedback@llm.event_handler("on_function_calls_started")async def on_function_calls_started(service, function_calls): await tts.queue_frame(TTSSpeakFrame("Let me check on that."))# Use in pipelinepipeline = Pipeline([ transport.input(), stt, context_aggregator.user(), llm, tts, transport.output(), context_aggregator.assistant()])