28:["$","$L29",null,{"slug":"eval-foundations","initialData":{"meta":{"title":"Eval-Grundlagen: Dein erstes LLM-Eval in 30 Minuten","subtitle":"Schluss mit Bauchgefühl-Checks. Baue ein lauffähiges Eval — Golden Dataset, deterministischer Scorer, LLM-Judge — und lies das Ergebnis wie ein Engineer.","accentColor":"#45B69C","function":"learning","version":"1.0","publishedAt":"2026-04-29","lastReviewedAt":"2026-04-29","tags":["fundamentals","devops","tooling"],"categories":["testing-and-evals","patterns-and-craft","production-and-reliability"],"audience":"engineering","level":"advanced","translation":{"locale":"de","sourceLocale":"en","method":"direct-prompt-opus","translatedAt":"2026-06-05"}},"modules":[{"id":"0","title":"Warum Bauchgefühl-Evals scheitern","subtitle":"Die Kluft zwischen „Ich hab ein paar Outputs geprüft“ und echten Belegen","icon":"","color":"#45B69C","sections":[{"type":"concept"},{"type":"analogy"},{"type":"practice"}]},{"id":"1","title":"Anatomie eines Evals","subtitle":"Die vier Bestandteile, die jedes Eval hat — unabhängig vom Framework","icon":"","color":"#2D7DD2","sections":[{"type":"concept"},{"type":"example"},{"type":"practice"}]},{"id":"2","title":"Die zwei Eval-Familien","subtitle":"Deterministisch versus Model-Graded — wann was passt","icon":"","color":"#0053B3","sections":[{"type":"concept"},{"type":"concept"},{"type":"example"},{"type":"practice"}]},{"id":"3","title":"Baue es (Teil 1): Dein Golden Dataset","subtitle":"Zehn Zeilen reichen zum Starten; 100 sind nützlich; 1000 zum Vertrauen","icon":"","color":"#F7B32B","sections":[{"type":"concept"},{"type":"example"},{"type":"concept"},{"type":"practice"}]},{"id":"4","title":"Baue es (Teil 2): Führe es auf zwei Arten aus","subtitle":"Dasselbe Dataset, deterministischer Scorer und LLM-Judge — sieh, wo sie übereinstimmen","icon":"","color":"#8338EC","sections":[{"type":"concept"},{"type":"example"},{"type":"concept"},{"type":"practice"}]},{"id":"5","title":"Das Ergebnis lesen","subtitle":"Die Pass-Rate ist eine Zahl; die interessanten Zahlen stehen daneben","icon":"","color":"#1B998B","sections":[{"type":"concept"},{"type":"example"},{"type":"concept"},{"type":"practice"}]},{"id":"6","title":"Wie es weitergeht","subtitle":"Die Brücke in den Rest des Skill Path","icon":"","color":"#FF6B35","sections":[{"type":"concept"},{"type":"practice"}]}],"references":[],"locked":true,"lockReason":"subscribe"},"initialDeepLink":null,"pathPodcasts":{},"pathCovers":{"agent-engineering":{"coverUrl":"/local-content/ai-engineering/agent-engineering/cover.webp","format":"webp","sizeBytes":47068,"width":1600,"height":900},"agent-infrastructure":{"coverUrl":"/local-content/ai-engineering/agent-infrastructure/cover.webp","format":"webp","sizeBytes":53698,"width":1600,"height":900},"agent-sdk":{"coverUrl":"/local-content/ai-engineering/agent-sdk/cover.webp","format":"webp","sizeBytes":69910,"width":1600,"height":900},"ai-foundations":{"coverUrl":"/local-content/ai-engineering/ai-foundations/cover.webp","format":"webp","sizeBytes":49876,"width":1600,"height":900},"deep-learning":{"coverUrl":"/local-content/ai-engineering/deep-learning/cover.webp","format":"webp","sizeBytes":44666,"width":1600,"height":1067},"local-llm":{"coverUrl":"/local-content/ai-engineering/local-llm/cover.webp","format":"webp","sizeBytes":111754,"width":1600,"height":1067},"machine-learning":{"coverUrl":"/local-content/ai-engineering/machine-learning/cover.webp","format":"webp","sizeBytes":38938,"width":1600,"height":1067},"on-device-edge-ai":{"coverUrl":"/local-content/ai-engineering/on-device-edge-ai/cover.webp","format":"webp","sizeBytes":69992,"width":1600,"height":900},"production-ai":{"coverUrl":"/local-content/ai-engineering/production-ai/cover.webp","format":"webp","sizeBytes":23072,"width":1600,"height":900},"specialized-agents":{"coverUrl":"/local-content/ai-engineering/specialized-agents/cover.webp","format":"webp","sizeBytes":67402,"width":1600,"height":900},"advanced-evals":{"coverUrl":"/local-content/ai-engineering-testing/advanced-evals/cover.webp","format":"webp","sizeBytes":51056,"width":1600,"height":900},"code-testing":{"coverUrl":"/local-content/ai-engineering-testing/code-testing/cover.webp","format":"webp","sizeBytes":57950,"width":1600,"height":900},"e2e-ai-testing":{"coverUrl":"/local-content/ai-engineering-testing/e2e-ai-testing/cover.webp","format":"webp","sizeBytes":28252,"width":1600,"height":900},"ai-adoption":{"coverUrl":"/local-content/ai-for-business/ai-adoption/cover.webp","format":"webp","sizeBytes":69862,"width":1600,"height":1067},"ai-agents-beginners":{"coverUrl":"/local-content/ai-for-business/ai-agents-beginners/cover.webp","format":"webp","sizeBytes":58624,"width":1600,"height":1067},"ai-coding-beginners":{"coverUrl":"/local-content/ai-for-business/ai-coding-beginners/cover.webp","format":"webp","sizeBytes":55354,"width":1600,"height":1067},"ai-for-cx":{"coverUrl":"/local-content/ai-for-business/ai-for-cx/cover.webp","format":"webp","sizeBytes":106208,"width":1600,"height":1067},"ai-for-data-analysts":{"coverUrl":"/local-content/ai-for-business/ai-for-data-analysts/cover.webp","format":"webp","sizeBytes":33832,"width":1600,"height":1067},"ai-for-finance":{"coverUrl":"/local-content/ai-for-business/ai-for-finance/cover.webp","format":"webp","sizeBytes":58650,"width":1600,"height":1067},"ai-for-hr":{"coverUrl":"/local-content/ai-for-business/ai-for-hr/cover.webp","format":"webp","sizeBytes":65386,"width":1600,"height":1067},"ai-for-leaders":{"coverUrl":"/local-content/ai-for-business/ai-for-leaders/cover.webp","format":"webp","sizeBytes":88312,"width":1600,"height":1067},"ai-for-legal":{"coverUrl":"/local-content/ai-for-business/ai-for-legal/cover.webp","format":"webp","sizeBytes":63254,"width":1600,"height":1067},"ai-for-marketing":{"coverUrl":"/local-content/ai-for-business/ai-for-marketing/cover.webp","format":"webp","sizeBytes":84570,"width":1600,"height":1067},"ai-for-operations":{"coverUrl":"/local-content/ai-for-business/ai-for-operations/cover.webp","format":"webp","sizeBytes":64260,"width":1600,"height":1067},"ai-for-pm":{"coverUrl":"/local-content/ai-for-business/ai-for-pm/cover.webp","format":"webp","sizeBytes":56848,"width":1600,"height":1067},"ai-for-sales":{"coverUrl":"/local-content/ai-for-business/ai-for-sales/cover.webp","format":"webp","sizeBytes":58000,"width":1600,"height":1067},"ai-literacy":{"coverUrl":"/local-content/ai-for-business/ai-literacy/cover.webp","format":"webp","sizeBytes":67998,"width":1600,"height":900},"copilot-business":{"coverUrl":"/local-content/ai-for-business/copilot-business/cover.webp","format":"webp","sizeBytes":64794,"width":1600,"height":900},"creative-foundations":{"coverUrl":"/local-content/ai-for-business/creative-foundations/cover.webp","format":"webp","sizeBytes":45952,"width":1600,"height":1067},"creative-pipelines":{"coverUrl":"/local-content/ai-for-business/creative-pipelines/cover.webp","format":"webp","sizeBytes":43980,"width":1600,"height":1067},"daily-ai-toolkit":{"coverUrl":"/local-content/ai-for-business/daily-ai-toolkit/cover.webp","format":"webp","sizeBytes":57384,"width":1600,"height":1067},"images-with-ai":{"coverUrl":"/local-content/ai-for-business/images-with-ai/cover.webp","format":"webp","sizeBytes":39894,"width":1600,"height":1067},"open-models-landscape":{"coverUrl":"/local-content/ai-for-business/open-models-landscape/cover.webp","format":"webp","sizeBytes":46310,"width":1600,"height":1067},"video-motion-with-ai":{"coverUrl":"/local-content/ai-for-business/video-motion-with-ai/cover.webp","format":"webp","sizeBytes":58060,"width":1600,"height":1067},"why-ai-why-now-why-you":{"coverUrl":"/local-content/ai-for-business/why-ai-why-now-why-you/cover.webp","format":"webp","sizeBytes":32622,"width":1600,"height":1067},"adversarial-ai":{"coverUrl":"/local-content/ai-safety-trust/adversarial-ai/cover.webp","format":"webp","sizeBytes":28138,"width":1600,"height":900},"ai-governance-compliance":{"coverUrl":"/local-content/ai-safety-trust/ai-governance-compliance/cover.webp","format":"webp","sizeBytes":74484,"width":1600,"height":1067},"build-with-claude-code":{"coverUrl":"/local-content/claude-code/build-with-claude-code/cover.webp","format":"webp","sizeBytes":76232,"width":1600,"height":1067},"claude-code":{"coverUrl":"/local-content/claude-code/claude-code/cover.webp","format":"webp","sizeBytes":30268,"width":1600,"height":900},"claude-code-orchestration":{"coverUrl":"/local-content/claude-code/claude-code-orchestration/cover.webp","format":"webp","sizeBytes":48798,"width":1600,"height":1067},"claude-design":{"coverUrl":"/local-content/claude-code/claude-design/cover.webp","format":"webp","sizeBytes":55494,"width":1600,"height":1067},"codex":{"coverUrl":"/local-content/claude-code/codex/cover.webp","format":"webp","sizeBytes":31182,"width":1600,"height":900},"coding-agent-craft":{"coverUrl":"/local-content/claude-code/coding-agent-craft/cover.webp","format":"webp","sizeBytes":49374,"width":1600,"height":900},"coding-agents-landscape":{"coverUrl":"/local-content/claude-code/coding-agents-landscape/cover.webp","format":"webp","sizeBytes":68732,"width":1600,"height":1067},"cursor":{"coverUrl":"/local-content/claude-code/cursor/cover.webp","format":"webp","sizeBytes":44974,"width":1600,"height":1067},"gemini-cli":{"coverUrl":"/local-content/claude-code/gemini-cli/cover.webp","format":"webp","sizeBytes":44488,"width":1600,"height":900},"github-copilot":{"coverUrl":"/local-content/claude-code/github-copilot/cover.webp","format":"webp","sizeBytes":56150,"width":1600,"height":1067},"cowork-foundations":{"coverUrl":"/local-content/claude-cowork/cowork-foundations/cover.webp","format":"webp","sizeBytes":91960,"width":1600,"height":1067},"finance-operations-cowork":{"coverUrl":"/local-content/claude-cowork/finance-operations-cowork/cover.webp","format":"webp","sizeBytes":74966,"width":1600,"height":1067},"legal-practice-cowork":{"coverUrl":"/local-content/claude-cowork/legal-practice-cowork/cover.webp","format":"webp","sizeBytes":76228,"width":1600,"height":1067},"marketing-agency-cowork":{"coverUrl":"/local-content/claude-cowork/marketing-agency-cowork/cover.webp","format":"webp","sizeBytes":35580,"width":1600,"height":1067},"sales-pipeline-cowork":{"coverUrl":"/local-content/claude-cowork/sales-pipeline-cowork/cover.webp","format":"webp","sizeBytes":63386,"width":1600,"height":1067},"copilot-admins-it":{"coverUrl":"/local-content/copilot-mastery/copilot-admins-it/cover.webp","format":"webp","sizeBytes":33156,"width":1600,"height":1067},"copilot-business-analysts":{"coverUrl":"/local-content/copilot-mastery/copilot-business-analysts/cover.webp","format":"webp","sizeBytes":76002,"width":1600,"height":1067},"copilot-foundations":{"coverUrl":"/local-content/copilot-mastery/copilot-foundations/cover.webp","format":"webp","sizeBytes":70924,"width":1600,"height":1067},"copilot-per-app":{"coverUrl":"/local-content/copilot-mastery/copilot-per-app/cover.webp","format":"webp","sizeBytes":57352,"width":1600,"height":1067},"copilot-studio":{"coverUrl":"/local-content/copilot-mastery/copilot-studio/cover.webp","format":"webp","sizeBytes":41870,"width":1600,"height":1067},"power-platform-ai":{"coverUrl":"/local-content/copilot-mastery/power-platform-ai/cover.webp","format":"webp","sizeBytes":54614,"width":1600,"height":1067},"build-like-a-product-engineer":{"coverUrl":"/local-content/engineering-reset/build-like-a-product-engineer/cover.webp","format":"webp","sizeBytes":68254,"width":1600,"height":1067},"code-was-the-easy-part":{"coverUrl":"/local-content/engineering-reset/code-was-the-easy-part/cover.webp","format":"webp","sizeBytes":61310,"width":1600,"height":1067},"developer-to-product-engineer":{"coverUrl":"/local-content/engineering-reset/developer-to-product-engineer/cover.webp","format":"webp","sizeBytes":118876,"width":1600,"height":1067},"engineers-changing-jobs":{"coverUrl":"/local-content/engineering-reset/engineers-changing-jobs/cover.webp","format":"webp","sizeBytes":41418,"width":1600,"height":1067},"rebuild-engineering-org":{"coverUrl":"/local-content/engineering-reset/rebuild-engineering-org/cover.webp","format":"webp","sizeBytes":102516,"width":1600,"height":1067},"ai-digital-product":{"coverUrl":"/local-content/micro-experiments/ai-digital-product/cover.webp","format":"webp","sizeBytes":43604,"width":1600,"height":900},"singularity-engineering":{"coverUrl":"/local-content/singularity-node/singularity-engineering/cover.webp","format":"webp","sizeBytes":81254,"width":1600,"height":1067},"singularity-foundations":{"coverUrl":"/local-content/singularity-node/singularity-foundations/cover.webp","format":"webp","sizeBytes":82884,"width":1600,"height":1067},"singularity-maintenance":{"coverUrl":"/local-content/singularity-node/singularity-maintenance/cover.webp","format":"webp","sizeBytes":57858,"width":1600,"height":1067},"singularity-operations":{"coverUrl":"/local-content/singularity-node/singularity-operations/cover.webp","format":"webp","sizeBytes":94688,"width":1600,"height":1067},"singularity-runtime":{"coverUrl":"/local-content/singularity-node/singularity-runtime/cover.webp","format":"webp","sizeBytes":99562,"width":1600,"height":1067}},"catalogOverlay":"$26:props:overlay","locked":true,"lockReason":"subscribe","draft":false,"anonymous":true}]