⚡ FastAPI for AI Services
FastAPI là framework Python tốt nhất cho building AI APIs - async support, automatic docs, và validation.
Tại sao FastAPI?
FastAPI Advantages
- Async/await - Handle nhiều concurrent requests
- Type hints - Automatic validation
- Auto docs - Swagger UI built-in
- Fast - Performance ngang Node.js
- Modern - Python 3.7+
Project Setup
Bash
1# Create project2mkdir ai-api && cd ai-api3 4# Virtual environment5python -m venv venv6source venv/bin/activate # Linux/Mac7.\venv\Scripts\activate # Windows8 9# Install dependencies10pip install fastapi uvicorn openai pydantic python-dotenvProject Structure
Text
1ai-api/2├── app/3│ ├── __init__.py4│ ├── main.py5│ ├── config.py6│ ├── routers/7│ │ ├── __init__.py8│ │ ├── chat.py9│ │ └── images.py10│ ├── services/11│ │ ├── __init__.py12│ │ ├── llm.py13│ │ └── cache.py14│ └── models/15│ ├── __init__.py16│ └── schemas.py17├── requirements.txt18├── .env19└── DockerfileBasic API
main.py
Python
1from fastapi import FastAPI2from fastapi.middleware.cors import CORSMiddleware3from app.routers import chat, images45app = FastAPI(6 title="AI API",7 description="Production-ready AI services",8 version="1.0.0"9)1011# CORS12app.add_middleware(13 CORSMiddleware,14 allow_origins=["*"],15 allow_credentials=True,16 allow_methods=["*"],17 allow_headers=["*"],18)1920# Routers21app.include_router(chat.router, prefix="/api/chat", tags=["Chat"])22app.include_router(images.router, prefix="/api/images", tags=["Images"])2324@app.get("/health")25async def health_check():26 return {"status": "healthy"}schemas.py (Pydantic Models)
Python
1from pydantic import BaseModel, Field2from typing import List, Optional3from enum import Enum45class MessageRole(str, Enum):6 system = "system"7 user = "user"8 assistant = "assistant"910class Message(BaseModel):11 role: MessageRole12 content: str1314class ChatRequest(BaseModel):15 messages: List[Message]16 model: str = "gpt-4o-mini"17 temperature: float = Field(default=0.7, ge=0, le=2)18 max_tokens: Optional[int] = Field(default=None, ge=1, le=4096)19 stream: bool = False2021class ChatResponse(BaseModel):22 message: str23 model: str24 usage: dict2526class ImageRequest(BaseModel):27 prompt: str = Field(..., min_length=1, max_length=4000)28 size: str = "1024x1024"29 quality: str = "standard"30 n: int = Field(default=1, ge=1, le=4)3132class ImageResponse(BaseModel):33 images: List[str]34 revised_prompt: Optional[str]Chat Endpoint
routers/chat.py
Python
1from fastapi import APIRouter, HTTPException2from fastapi.responses import StreamingResponse3from app.models.schemas import ChatRequest, ChatResponse4from app.services.llm import LLMService56router = APIRouter()7llm_service = LLMService()89@router.post("/completions", response_model=ChatResponse)10async def create_chat_completion(request: ChatRequest):11 """Create chat completion"""12 try:13 if request.stream:14 return StreamingResponse(15 llm_service.stream_chat(request),16 media_type="text/event-stream"17 )18 19 response = await llm_service.chat(request)20 return response21 22 except Exception as e:23 raise HTTPException(status_code=500, detail=str(e))2425@router.post("/stream")26async def stream_chat(request: ChatRequest):27 """Stream chat completion"""28 return StreamingResponse(29 llm_service.stream_chat(request),30 media_type="text/event-stream"31 )services/llm.py
Python
1from openai import AsyncOpenAI2from app.models.schemas import ChatRequest, ChatResponse3import json45class LLMService:6 def __init__(self):7 self.client = AsyncOpenAI()8 9 async def chat(self, request: ChatRequest) -> ChatResponse:10 """Non-streaming chat"""11 messages = [{"role": m.role, "content": m.content} for m in request.messages]12 13 response = await self.client.chat.completions.create(14 model=request.model,15 messages=messages,16 temperature=request.temperature,17 max_tokens=request.max_tokens18 )19 20 return ChatResponse(21 message=response.choices[0].message.content,22 model=response.model,23 usage={24 "prompt_tokens": response.usage.prompt_tokens,25 "completion_tokens": response.usage.completion_tokens,26 "total_tokens": response.usage.total_tokens27 }28 )29 30 async def stream_chat(self, request: ChatRequest):31 """Streaming chat"""32 messages = [{"role": m.role, "content": m.content} for m in request.messages]33 34 stream = await self.client.chat.completions.create(35 model=request.model,36 messages=messages,37 temperature=request.temperature,38 stream=True39 )40 41 async for chunk in stream:42 if chunk.choices[0].delta.content:43 data = {"content": chunk.choices[0].delta.content}44 yield f"data: {json.dumps(data)}\n\n"45 46 yield "data: [DONE]\n\n"Image Endpoint
routers/images.py
Python
1from fastapi import APIRouter, HTTPException2from app.models.schemas import ImageRequest, ImageResponse3from openai import AsyncOpenAI45router = APIRouter()6client = AsyncOpenAI()78@router.post("/generate", response_model=ImageResponse)9async def generate_image(request: ImageRequest):10 """Generate image with DALL-E"""11 try:12 response = await client.images.generate(13 model="dall-e-3",14 prompt=request.prompt,15 size=request.size,16 quality=request.quality,17 n=1 # DALL-E 3 only supports n=118 )19 20 return ImageResponse(21 images=[response.data[0].url],22 revised_prompt=response.data[0].revised_prompt23 )24 25 except Exception as e:26 raise HTTPException(status_code=500, detail=str(e))Authentication
API Key Auth
Python
1from fastapi import Security, HTTPException2from fastapi.security import APIKeyHeader3import os45api_key_header = APIKeyHeader(name="X-API-Key")67async def verify_api_key(api_key: str = Security(api_key_header)):8 valid_keys = os.getenv("API_KEYS", "").split(",")9 if api_key not in valid_keys:10 raise HTTPException(status_code=403, detail="Invalid API key")11 return api_key1213# Usage in router14@router.post("/chat")15async def chat(16 request: ChatRequest,17 api_key: str = Security(verify_api_key)18):19 # Authenticated endpoint20 passJWT Auth
Python
1from fastapi import Depends2from fastapi.security import OAuth2PasswordBearer3from jose import JWTError, jwt45oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")67async def get_current_user(token: str = Depends(oauth2_scheme)):8 try:9 payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])10 user_id = payload.get("sub")11 if user_id is None:12 raise HTTPException(status_code=401)13 return user_id14 except JWTError:15 raise HTTPException(status_code=401)Error Handling
Python
1from fastapi import Request2from fastapi.responses import JSONResponse34@app.exception_handler(Exception)5async def global_exception_handler(request: Request, exc: Exception):6 return JSONResponse(7 status_code=500,8 content={9 "error": "Internal server error",10 "detail": str(exc) if DEBUG else "Something went wrong"11 }12 )1314# Custom exceptions15class RateLimitExceeded(Exception):16 pass1718@app.exception_handler(RateLimitExceeded)19async def rate_limit_handler(request: Request, exc: RateLimitExceeded):20 return JSONResponse(21 status_code=429,22 content={"error": "Rate limit exceeded", "retry_after": 60}23 )Running the Server
Bash
1# Development2uvicorn app.main:app --reload --port 80003 4# Production5uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 46 7# With Gunicorn8gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000Testing
Python
1# test_api.py2from fastapi.testclient import TestClient3from app.main import app45client = TestClient(app)67def test_health():8 response = client.get("/health")9 assert response.status_code == 2001011def test_chat():12 response = client.post("/api/chat/completions", json={13 "messages": [{"role": "user", "content": "Hello"}],14 "model": "gpt-4o-mini"15 })16 assert response.status_code == 20017 assert "message" in response.json()Best Practices
FastAPI Tips
- Use Pydantic cho validation
- Async everywhere cho performance
- Dependency injection cho reusability
- Proper error handling với custom exceptions
- API versioning (/v1/, /v2/)
- Rate limiting protect endpoints
Bài tập thực hành
Hands-on Exercise
Build Complete AI API:
- Chat endpoint với streaming
- Image generation endpoint
- Authentication (API key)
- Error handling
- Basic tests
Target: Production-ready API với docs tại /docs
Tiếp theo
Bài tiếp theo: Docker for AI - Containerize AI applications.
