<?xml version="1.0" encoding="utf-8" standalone="yes"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"><url><loc>https://richardli.xyz/post/mismatch-lr-schedule/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/categories/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/deep-learning/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/language-models/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/learning-rate-scheduling/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/optimization/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/reinforcement-learning/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/category/research/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tags/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/category/theory/</loc><lastmod>2025-12-20T01:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/optimal-token-baseline/</loc><lastmod>2025-12-20T00:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/variance-reduction/</loc><lastmod>2025-12-20T00:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/policy-optimization/</loc><lastmod>2025-12-20T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/trust-region/</loc><lastmod>2025-12-20T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/trust-region-masking/</loc><lastmod>2025-12-20T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/large-language-models/</loc><lastmod>2025-12-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/llm-rl/</loc><lastmod>2025-12-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/mixture-of-experts/</loc><lastmod>2025-12-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/topk-routing-stability-gap/</loc><lastmod>2025-12-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/training-dynamics/</loc><lastmod>2025-12-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/bandits/</loc><lastmod>2025-11-29T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/exploration/</loc><lastmod>2025-11-29T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/scalable-exploration/</loc><lastmod>2025-11-29T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/thompson-sampling/</loc><lastmod>2025-11-29T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/agent-architecture/</loc><lastmod>2025-11-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/category/engineering/</loc><lastmod>2025-11-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/language-agents/</loc><lastmod>2025-11-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/language-rl-agent/</loc><lastmod>2025-11-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/software-engineering/</loc><lastmod>2025-11-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/chain-of-thought/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/category/documentation/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/importance-sampling/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/verl-rollout-correction/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/off-policy/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/off-policy-learning/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/rl-collapse-part3/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/verl/</loc><lastmod>2025-11-04T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/rl-collapse-part2/</loc><lastmod>2025-10-31T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/ppo/</loc><lastmod>2025-10-31T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/rl-collapse-part1/</loc><lastmod>2025-10-30T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/trpo/</loc><lastmod>2025-10-30T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/information-bandwidth-rl/</loc><lastmod>2025-10-01T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/information-theory/</loc><lastmod>2025-10-01T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/post/rl-collapse-training-inference/</loc><lastmod>2025-09-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/algorithm/</loc><lastmod>2024-03-23T13:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/efficiency/</loc><lastmod>2024-03-23T13:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/talk/hyperagent-a-simple-efficient-scalable-and-provable-rl-framework/</loc><lastmod>2024-03-23T13:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/random-projection/</loc><lastmod>2024-03-23T13:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/event/</loc><lastmod>2024-03-23T13:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/regret/</loc><lastmod>2024-03-23T13:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/talk/hyperagent-a-simple-efficient-and-scalable-rl-framework-for-complex-environments/</loc><lastmod>2024-01-13T13:20:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/</loc><lastmod>2023-12-07T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/agi/</loc><lastmod>2023-10-21T14:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/talk/towards-agi-for-humanity-through-efficient-reinforcement-learning/</loc><lastmod>2023-10-21T14:30:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/application/</loc><lastmod>2022-08-23T14:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/bandit/</loc><lastmod>2022-08-23T14:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/game-theory/</loc><lastmod>2022-08-23T14:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/tag/multi-agent/</loc><lastmod>2022-08-23T14:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/talk/no-regret-learning-in-unknown-game-with-applications/</loc><lastmod>2022-08-23T14:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/talk/hyperdqn-randomized-exploration-for-deep-reinforcement-learning/</loc><lastmod>2021-12-14T00:00:00+00:00</lastmod><changefreq>weekly</changefreq></url><url><loc>https://richardli.xyz/publication_types/</loc><changefreq>weekly</changefreq></url></urlset>