<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Facebook - Presentations</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Facebook Presentations feed</description>
    <item>
      <title>Presentation: Scaling Large Language Model Serving Infrastructure at Meta</title>
      <link>https://www.infoq.com/presentations/llm-meta/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Facebook-presentations</link>
      <description>&lt;img src="https://res.infoq.com/presentations/llm-meta/en/mediumimage/ye-charlotte-qi-medium-1747727365712.jpg"/&gt;&lt;p&gt;Ye (Charlotte) Qi overviews LLM serving infrastructure challenges: fitting &amp; speed (Model Runners, KV cache, and distributed inference), production complexities (latency optimization and continuous evaluation), and effective scaling strategies (heterogeneous deployment and autoscaling). Learn key concepts for robust LLM deployment.&lt;/p&gt; &lt;i&gt;By Ye Qi&lt;/i&gt;</description>
      <category>Case Study</category>
      <category>Large language models</category>
      <category>Artificial Intelligence</category>
      <category>QCon San Francisco 2024</category>
      <category>Facebook</category>
      <category>Transcripts</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>presentation</category>
      <pubDate>Thu, 29 May 2025 13:11:00 GMT</pubDate>
      <guid>https://www.infoq.com/presentations/llm-meta/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Facebook-presentations</guid>
      <dc:creator>Ye Qi</dc:creator>
      <dc:date>2025-05-29T13:11:00Z</dc:date>
      <dc:identifier>/presentations/llm-meta/en</dc:identifier>
    </item>
  </channel>
</rss>
