<?xml version="1.0" encoding="utf-8" standalone="yes"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"><url><loc>/theory/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/choosing-the-right-model/</loc><lastmod>2026-02-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/what-is-llm-inference/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/choosing-the-right-gpu/</loc><lastmod>2026-02-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/training-inference-differences/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/calculating-gpu-memory-for-llms/</loc><lastmod>2026-02-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/how-does-llm-inference-work/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/llm-fine-tuning/</loc><lastmod>2026-02-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/cpu-vs-gpu-vs-tpu/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/indi-models/vikhr/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/llm-quantization/</loc><lastmod>2026-02-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/distributed-inference/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/choosing-the-right-inference-framework/</loc><lastmod>2026-02-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/serverless-vs-self-hosted-llm-inference/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/openai-compatible-api/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/llm-inference-metrics/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/what-is-llm-inference-infrastructure/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/enterprise-models/gigachat/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/indi-models/gusev/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/llm-inference-basics/</loc><lastmod>2026-02-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/tool-integration/structured-outputs/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/tool-integration/function-calling/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/tool-integration/model-context-protocol/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/enterprise-models/yandexgpt/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/llm-performance-benchmarks/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/challenges-in-building-infra-for-llm-inference/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/challenges-in-building-infra-for-llm-inference/fast-scaling/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/enterprise-models/t-bank/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/enterprise-models/mts/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/multi-cloud-and-cross-region-inference/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/static-dynamic-continuous-batching/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/challenges-in-building-infra-for-llm-inference/build-and-maintenance-cost/</loc><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/flashattention/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/on-prem-llms/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/challenges-in-building-infra-for-llm-inference/comprehensive-observability/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/pagedattention/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/bring-your-own-cloud/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/infrastructure-and-operations/inferenceops-and-management/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/speculative-decoding/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/prefill-decode-disaggregation/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/prefix-caching/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/prefix-aware-routing/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/getting-started/tool-integration/</loc><lastmod>2026-02-25T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/kv-cache-utilization-aware-load-balancing/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/kv-cache-offloading/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/data-tensor-pipeline-expert-hybrid-parallelism/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/theory/inference-optimization/offline-batch-inference/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/</loc><lastmod>2026-02-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/enterprise-models/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/indi-models/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/russ-ai-models/</loc><lastmod>2026-02-27T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/architecture/</loc><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/news/</loc><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>/solutions/</loc><changefreq>daily</changefreq><priority>0.5</priority></url></urlset>