<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://rl-handbook.com</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/references</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/00-introduction/introduction</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/00-introduction/taxonomy</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/00-introduction/what-is-reinforcement-learning</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/01-value-based/dqn</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/01-value-based/dqn-improvements</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/01-value-based/dynamic-programming</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/01-value-based/mdp</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/01-value-based/monte-carlo-and-temporal-difference</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/01-value-based/multi-armed-bandits</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/01-value-based/sarsa-and-q-learning</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/02-on-policy-policy-based/actor-critic-a2c-a3c</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/02-on-policy-policy-based/policy-gradient-and-reinforce</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/02-on-policy-policy-based/ppo</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/02-on-policy-policy-based/trpo</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/03-off-policy-policy-based/ddpg</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/03-off-policy-policy-based/off-policy-policy-improvement-framework</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/03-off-policy-policy-based/td3-and-sac</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/04-model-based/alphazero-and-muzero</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/04-model-based/dyna-and-learned-models</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/04-model-based/model-predictive-control</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/05-advanced-topics/exploration</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/05-advanced-topics/goal-conditioned-rl</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/05-advanced-topics/imitation-learning</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/05-advanced-topics/multi-agent-rl</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/05-advanced-topics/offline-rl</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://rl-handbook.com/docs/05-advanced-topics/rl-sequence-generation-and-rlhf</loc>
<lastmod>2026-06-03T14:53:49.608Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
</urlset>
