Streaming Responses — FluxGate Docs

How tracking works with streaming

FluxGate captures token counts and latency after a stream is fully consumed. The stream object carries a fluxGateCostTrackingResponse property that is populated once the last chunk is processed. Your UI starts rendering immediately — tracking happens transparently in the background.

1. OpenAI streaming in a route handler

// app/api/chat/stream/route.ts
import { auth } from "@/lib/auth";
import { openai } from "@/lib/openai";
import { NextRequest } from "next/server";

export async function POST(req: NextRequest) {
  const session = await auth();
  if (!session?.user) return new Response("Unauthorized", { status: 401 });

  const { messages, conversationId } = await req.json();

  const stream = await openai
    .withContext({
      feature: "streaming-chat",
      user: session.user.id,
      conversationId,
    })
    .chat.completions.create({
      model: "gpt-4o",
      messages,
      stream: true,
    });

  const readable = new ReadableStream({
    async start(controller) {
      for await (const chunk of stream) {
        const text = chunk.choices[0]?.delta?.content ?? "";
        if (text) controller.enqueue(new TextEncoder().encode(text));
      }
      controller.close();
      // stream.fluxGateCostTrackingResponse is populated here
    },
  });

  return new Response(readable, {
    headers: { "Content-Type": "text/plain; charset=utf-8" },
  });
}

2. Anthropic streaming in a route handler

// app/api/claude/stream/route.ts
import { auth } from "@/lib/auth";
import { anthropic } from "@/lib/anthropic";
import { NextRequest } from "next/server";

export async function POST(req: NextRequest) {
  const session = await auth();
  if (!session?.user) return new Response("Unauthorized", { status: 401 });

  const { messages } = await req.json();

  const stream = await anthropic
    .withContext({ feature: "claude-streaming", user: session.user.id })
    .messages.create({
      model: "claude-sonnet-4-6",
      max_tokens: 2048,
      messages,
      stream: true,
    });

  const readable = new ReadableStream({
    async start(controller) {
      for await (const event of stream) {
        if (
          event.type === "content_block_delta" &&
          event.delta.type === "text_delta"
        ) {
          controller.enqueue(new TextEncoder().encode(event.delta.text));
        }
      }
      controller.close();
    },
  });

  return new Response(readable, {
    headers: { "Content-Type": "text/plain; charset=utf-8" },
  });
}

3. Gemini streaming in a route handler

// app/api/gemini/stream/route.ts
import { auth } from "@/lib/auth";
import { geminiPro } from "@/lib/gemini";
import { NextRequest } from "next/server";

export async function POST(req: NextRequest) {
  const session = await auth();
  if (!session?.user) return new Response("Unauthorized", { status: 401 });

  const { prompt } = await req.json();

  const result = await geminiPro
    .withContext({ feature: "gemini-streaming", user: session.user.id })
    .generateContentStream(prompt);

  const readable = new ReadableStream({
    async start(controller) {
      for await (const chunk of result.stream) {
        const text = chunk.text();
        if (text) controller.enqueue(new TextEncoder().encode(text));
      }
      // Await the full response so the tracking data is finalised
      await result.response;
      controller.close();
    },
  });

  return new Response(readable, {
    headers: { "Content-Type": "text/plain; charset=utf-8" },
  });
}

4. Client-side hook for consuming streams

A lightweight React hook to consume any text/plain streaming endpoint and accumulate the response in state.

// hooks/use-streaming-reply.ts
"use client";

import { useState, useCallback } from "react";

export function useStreamingReply(endpoint: string) {
  const [text, setText] = useState("");
  const [loading, setLoading] = useState(false);

  const stream = useCallback(
    async (body: object) => {
      setText("");
      setLoading(true);

      try {
        const res = await fetch(endpoint, {
          method: "POST",
          headers: { "Content-Type": "application/json" },
          body: JSON.stringify(body),
        });

        if (!res.ok) throw new Error(await res.text());

        const reader = res.body!.getReader();
        const decoder = new TextDecoder();

        while (true) {
          const { done, value } = await reader.read();
          if (done) break;
          setText((prev) => prev + decoder.decode(value, { stream: true }));
        }
      } finally {
        setLoading(false);
      }
    },
    [endpoint],
  );

  return { text, loading, stream };
}

Usage:

// app/dashboard/chat/_components/chat-input.tsx
"use client";

import { useStreamingReply } from "@/hooks/use-streaming-reply";

export function ChatInput() {
  const { text, loading, stream } = useStreamingReply("/api/chat/stream");

  const handleSend = (messages: Message[]) => {
    stream({ messages, conversationId: "conv-123" });
  };

  return (
    <div>
      <div className="whitespace-pre-wrap">{text}</div>
      {loading && <span className="animate-pulse">▊</span>}
      {/* your input form */}
    </div>
  );
}

5. Tracking streaming duration

The FluxGate wrapper records both latencyInMs (time to first token) and streamingDurationInMs (total stream duration) automatically. You can see both metrics in the Requests Explorer on the dashboard.

// No extra code needed — both metrics are captured automatically.
// After the stream closes:
console.log(stream.fluxGateCostTrackingResponse);
// {
//   status: "SUCCESS",
//   cost: 0.0024,
//   trackingId: "evt_01...",
//   createdAt: "2026-05-12T...",
// }