29:["$","$L2a",null,{"slug":"post-training-rl","initialData":{"meta":{"title":"Post-Training: DPO, GRPO & RL for LLMs","subtitle":"Pick the right post-training algorithm -- preference optimization, reasoning RL, and agent RL -- without drowning in research papers.","accentColor":"#8338EC","function":"learning","version":"1.0","publishedAt":"2026-04-15","categories":["ai-foundations-ops","patterns-and-craft","ai-foundations-intro"],"audience":"engineering","level":"practitioner","translation":{"locale":"es","translatedAt":"2026-05-18","sourceVersion":"1.0","translator":"translate-path-skill"}},"modules":[{"id":"0","title":"Beyond SFT","subtitle":"Why Post-Training RL Exists and When to Reach for It","icon":"","color":"#8338EC","sections":[{"type":"concept"},{"type":"concept"},{"type":"analogy"},{"type":"example"},{"type":"practice"}]},{"id":"1","title":"Reward Signals","subtitle":"What 'Good' Means to a Training Loop","icon":"","color":"#2D7DD2","sections":[{"type":"concept"},{"type":"concept"},{"type":"example"},{"type":"practice"}]},{"id":"2","title":"DPO Deep Dive","subtitle":"Preference Optimization Without a Reward Model","icon":"","color":"#45B69C","sections":[{"type":"concept"},{"type":"concept"},{"type":"example"},{"type":"practice"}]},{"id":"3","title":"GRPO & Reasoning RL","subtitle":"Group Baselines, No Critic, R1-Style Training","icon":"","color":"#FF6B35","sections":[{"type":"concept"},{"type":"concept"},{"type":"analogy"},{"type":"practice"}]},{"id":"4","title":"Refinements That Matter","subtitle":"Dr-GRPO and DAPO -- What They Fix","icon":"","color":"#F7B32B","sections":[{"type":"concept"},{"type":"concept"},{"type":"example"},{"type":"practice"}]},{"id":"5","title":"Agent RL & The Decision","subtitle":"verl-agent, GiGPO, and When NOT to Reach for RL","icon":"","color":"#1B998B","sections":[{"type":"concept"},{"type":"concept"},{"type":"concept"},{"type":"example"},{"type":"practice"}]}],"references":[],"locked":true,"lockReason":"signup"},"initialDeepLink":null,"pathPodcasts":{},"pathCovers":{"agent-engineering":{"coverUrl":"/local-content/ai-engineering/agent-engineering/cover.webp","format":"webp","sizeBytes":47068,"width":1600,"height":900},"agent-infrastructure":{"coverUrl":"/local-content/ai-engineering/agent-infrastructure/cover.webp","format":"webp","sizeBytes":53698,"width":1600,"height":900},"agent-sdk":{"coverUrl":"/local-content/ai-engineering/agent-sdk/cover.webp","format":"webp","sizeBytes":69910,"width":1600,"height":900},"ai-foundations":{"coverUrl":"/local-content/ai-engineering/ai-foundations/cover.webp","format":"webp","sizeBytes":49876,"width":1600,"height":900},"deep-learning":{"coverUrl":"/local-content/ai-engineering/deep-learning/cover.webp","format":"webp","sizeBytes":44666,"width":1600,"height":1067},"local-llm":{"coverUrl":"/local-content/ai-engineering/local-llm/cover.webp","format":"webp","sizeBytes":111754,"width":1600,"height":1067},"machine-learning":{"coverUrl":"/local-content/ai-engineering/machine-learning/cover.webp","format":"webp","sizeBytes":38938,"width":1600,"height":1067},"on-device-edge-ai":{"coverUrl":"/local-content/ai-engineering/on-device-edge-ai/cover.webp","format":"webp","sizeBytes":69992,"width":1600,"height":900},"production-ai":{"coverUrl":"/local-content/ai-engineering/production-ai/cover.webp","format":"webp","sizeBytes":23072,"width":1600,"height":900},"specialized-agents":{"coverUrl":"/local-content/ai-engineering/specialized-agents/cover.webp","format":"webp","sizeBytes":67402,"width":1600,"height":900},"advanced-evals":{"coverUrl":"/local-content/ai-engineering-testing/advanced-evals/cover.webp","format":"webp","sizeBytes":51056,"width":1600,"height":900},"code-testing":{"coverUrl":"/local-content/ai-engineering-testing/code-testing/cover.webp","format":"webp","sizeBytes":57950,"width":1600,"height":900},"e2e-ai-testing":{"coverUrl":"/local-content/ai-engineering-testing/e2e-ai-testing/cover.webp","format":"webp","sizeBytes":28252,"width":1600,"height":900},"ai-adoption":{"coverUrl":"/local-content/ai-for-business/ai-adoption/cover.webp","format":"webp","sizeBytes":69862,"width":1600,"height":1067},"ai-agents-beginners":{"coverUrl":"/local-content/ai-for-business/ai-agents-beginners/cover.webp","format":"webp","sizeBytes":58624,"width":1600,"height":1067},"ai-coding-beginners":{"coverUrl":"/local-content/ai-for-business/ai-coding-beginners/cover.webp","format":"webp","sizeBytes":55354,"width":1600,"height":1067},"ai-for-cx":{"coverUrl":"/local-content/ai-for-business/ai-for-cx/cover.webp","format":"webp","sizeBytes":106208,"width":1600,"height":1067},"ai-for-data-analysts":{"coverUrl":"/local-content/ai-for-business/ai-for-data-analysts/cover.webp","format":"webp","sizeBytes":33832,"width":1600,"height":1067},"ai-for-finance":{"coverUrl":"/local-content/ai-for-business/ai-for-finance/cover.webp","format":"webp","sizeBytes":58650,"width":1600,"height":1067},"ai-for-hr":{"coverUrl":"/local-content/ai-for-business/ai-for-hr/cover.webp","format":"webp","sizeBytes":65386,"width":1600,"height":1067},"ai-for-leaders":{"coverUrl":"/local-content/ai-for-business/ai-for-leaders/cover.webp","format":"webp","sizeBytes":88312,"width":1600,"height":1067},"ai-for-legal":{"coverUrl":"/local-content/ai-for-business/ai-for-legal/cover.webp","format":"webp","sizeBytes":63254,"width":1600,"height":1067},"ai-for-marketing":{"coverUrl":"/local-content/ai-for-business/ai-for-marketing/cover.webp","format":"webp","sizeBytes":84570,"width":1600,"height":1067},"ai-for-operations":{"coverUrl":"/local-content/ai-for-business/ai-for-operations/cover.webp","format":"webp","sizeBytes":64260,"width":1600,"height":1067},"ai-for-pm":{"coverUrl":"/local-content/ai-for-business/ai-for-pm/cover.webp","format":"webp","sizeBytes":56848,"width":1600,"height":1067},"ai-for-sales":{"coverUrl":"/local-content/ai-for-business/ai-for-sales/cover.webp","format":"webp","sizeBytes":58000,"width":1600,"height":1067},"ai-literacy":{"coverUrl":"/local-content/ai-for-business/ai-literacy/cover.webp","format":"webp","sizeBytes":67998,"width":1600,"height":900},"copilot-business":{"coverUrl":"/local-content/ai-for-business/copilot-business/cover.webp","format":"webp","sizeBytes":64794,"width":1600,"height":900},"creative-foundations":{"coverUrl":"/local-content/ai-for-business/creative-foundations/cover.webp","format":"webp","sizeBytes":45952,"width":1600,"height":1067},"creative-pipelines":{"coverUrl":"/local-content/ai-for-business/creative-pipelines/cover.webp","format":"webp","sizeBytes":43980,"width":1600,"height":1067},"daily-ai-toolkit":{"coverUrl":"/local-content/ai-for-business/daily-ai-toolkit/cover.webp","format":"webp","sizeBytes":57384,"width":1600,"height":1067},"images-with-ai":{"coverUrl":"/local-content/ai-for-business/images-with-ai/cover.webp","format":"webp","sizeBytes":39894,"width":1600,"height":1067},"open-models-landscape":{"coverUrl":"/local-content/ai-for-business/open-models-landscape/cover.webp","format":"webp","sizeBytes":46310,"width":1600,"height":1067},"video-motion-with-ai":{"coverUrl":"/local-content/ai-for-business/video-motion-with-ai/cover.webp","format":"webp","sizeBytes":58060,"width":1600,"height":1067},"why-ai-why-now-why-you":{"coverUrl":"/local-content/ai-for-business/why-ai-why-now-why-you/cover.webp","format":"webp","sizeBytes":32622,"width":1600,"height":1067},"adversarial-ai":{"coverUrl":"/local-content/ai-safety-trust/adversarial-ai/cover.webp","format":"webp","sizeBytes":28138,"width":1600,"height":900},"ai-governance-compliance":{"coverUrl":"/local-content/ai-safety-trust/ai-governance-compliance/cover.webp","format":"webp","sizeBytes":74484,"width":1600,"height":1067},"build-with-claude-code":{"coverUrl":"/local-content/claude-code/build-with-claude-code/cover.webp","format":"webp","sizeBytes":76232,"width":1600,"height":1067},"claude-code":{"coverUrl":"/local-content/claude-code/claude-code/cover.webp","format":"webp","sizeBytes":30268,"width":1600,"height":900},"claude-code-orchestration":{"coverUrl":"/local-content/claude-code/claude-code-orchestration/cover.webp","format":"webp","sizeBytes":48798,"width":1600,"height":1067},"claude-design":{"coverUrl":"/local-content/claude-code/claude-design/cover.webp","format":"webp","sizeBytes":55494,"width":1600,"height":1067},"codex":{"coverUrl":"/local-content/claude-code/codex/cover.webp","format":"webp","sizeBytes":31182,"width":1600,"height":900},"coding-agent-craft":{"coverUrl":"/local-content/claude-code/coding-agent-craft/cover.webp","format":"webp","sizeBytes":49374,"width":1600,"height":900},"coding-agents-landscape":{"coverUrl":"/local-content/claude-code/coding-agents-landscape/cover.webp","format":"webp","sizeBytes":68732,"width":1600,"height":1067},"cursor":{"coverUrl":"/local-content/claude-code/cursor/cover.webp","format":"webp","sizeBytes":44974,"width":1600,"height":1067},"gemini-cli":{"coverUrl":"/local-content/claude-code/gemini-cli/cover.webp","format":"webp","sizeBytes":44488,"width":1600,"height":900},"github-copilot":{"coverUrl":"/local-content/claude-code/github-copilot/cover.webp","format":"webp","sizeBytes":56150,"width":1600,"height":1067},"cowork-foundations":{"coverUrl":"/local-content/claude-cowork/cowork-foundations/cover.webp","format":"webp","sizeBytes":91960,"width":1600,"height":1067},"finance-operations-cowork":{"coverUrl":"/local-content/claude-cowork/finance-operations-cowork/cover.webp","format":"webp","sizeBytes":74966,"width":1600,"height":1067},"legal-practice-cowork":{"coverUrl":"/local-content/claude-cowork/legal-practice-cowork/cover.webp","format":"webp","sizeBytes":76228,"width":1600,"height":1067},"marketing-agency-cowork":{"coverUrl":"/local-content/claude-cowork/marketing-agency-cowork/cover.webp","format":"webp","sizeBytes":35580,"width":1600,"height":1067},"sales-pipeline-cowork":{"coverUrl":"/local-content/claude-cowork/sales-pipeline-cowork/cover.webp","format":"webp","sizeBytes":63386,"width":1600,"height":1067},"copilot-admins-it":{"coverUrl":"/local-content/copilot-mastery/copilot-admins-it/cover.webp","format":"webp","sizeBytes":33156,"width":1600,"height":1067},"copilot-business-analysts":{"coverUrl":"/local-content/copilot-mastery/copilot-business-analysts/cover.webp","format":"webp","sizeBytes":76002,"width":1600,"height":1067},"copilot-foundations":{"coverUrl":"/local-content/copilot-mastery/copilot-foundations/cover.webp","format":"webp","sizeBytes":70924,"width":1600,"height":1067},"copilot-per-app":{"coverUrl":"/local-content/copilot-mastery/copilot-per-app/cover.webp","format":"webp","sizeBytes":57352,"width":1600,"height":1067},"copilot-studio":{"coverUrl":"/local-content/copilot-mastery/copilot-studio/cover.webp","format":"webp","sizeBytes":41870,"width":1600,"height":1067},"power-platform-ai":{"coverUrl":"/local-content/copilot-mastery/power-platform-ai/cover.webp","format":"webp","sizeBytes":54614,"width":1600,"height":1067},"build-like-a-product-engineer":{"coverUrl":"/local-content/engineering-reset/build-like-a-product-engineer/cover.webp","format":"webp","sizeBytes":68254,"width":1600,"height":1067},"code-was-the-easy-part":{"coverUrl":"/local-content/engineering-reset/code-was-the-easy-part/cover.webp","format":"webp","sizeBytes":61310,"width":1600,"height":1067},"developer-to-product-engineer":{"coverUrl":"/local-content/engineering-reset/developer-to-product-engineer/cover.webp","format":"webp","sizeBytes":118876,"width":1600,"height":1067},"engineers-changing-jobs":{"coverUrl":"/local-content/engineering-reset/engineers-changing-jobs/cover.webp","format":"webp","sizeBytes":41418,"width":1600,"height":1067},"rebuild-engineering-org":{"coverUrl":"/local-content/engineering-reset/rebuild-engineering-org/cover.webp","format":"webp","sizeBytes":102516,"width":1600,"height":1067},"ai-digital-product":{"coverUrl":"/local-content/micro-experiments/ai-digital-product/cover.webp","format":"webp","sizeBytes":43604,"width":1600,"height":900},"singularity-engineering":{"coverUrl":"/local-content/singularity-node/singularity-engineering/cover.webp","format":"webp","sizeBytes":81254,"width":1600,"height":1067},"singularity-foundations":{"coverUrl":"/local-content/singularity-node/singularity-foundations/cover.webp","format":"webp","sizeBytes":82884,"width":1600,"height":1067},"singularity-maintenance":{"coverUrl":"/local-content/singularity-node/singularity-maintenance/cover.webp","format":"webp","sizeBytes":57858,"width":1600,"height":1067},"singularity-operations":{"coverUrl":"/local-content/singularity-node/singularity-operations/cover.webp","format":"webp","sizeBytes":94688,"width":1600,"height":1067},"singularity-runtime":{"coverUrl":"/local-content/singularity-node/singularity-runtime/cover.webp","format":"webp","sizeBytes":99562,"width":1600,"height":1067}},"catalogOverlay":"$26:props:overlay","locked":true,"lockReason":"signup","draft":false,"anonymous":true}]