<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Scalability - Presentations</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Scalability Presentations feed</description>
    <item>
      <title>Presentation: Realtime and Batch Processing of GPU Workloads</title>
      <link>https://www.infoq.com/presentations/realtime-gpu-workloads/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Scalability-presentations</link>
      <description>&lt;img src="https://res.infoq.com/presentations/realtime-gpu-workloads/en/mediumimage/medium-1779194310932.jpg"/&gt;&lt;p&gt;Joseph Stein discusses engineering an enterprise AI-as-a-Service platform within a private cloud data center. He explains how to maximize underutilized GPU pools via multi-namespace scheduling, leverage Valkey and Lua for atomic priority queuing and backpressure management, mitigate OWASP Top 10 LLM risks via central proxy gateways, and scale batch pipelines using a custom S3-to-Kafka proxy.&lt;/p&gt; &lt;i&gt;By Joseph Stein&lt;/i&gt;</description>
      <category>Case Study</category>
      <category>GPU</category>
      <category>Scalability</category>
      <category>Cloud</category>
      <category>QCon San Francisco 2025</category>
      <category>Transcripts</category>
      <category>DevOps</category>
      <category>presentation</category>
      <pubDate>Tue, 26 May 2026 09:08:00 GMT</pubDate>
      <guid>https://www.infoq.com/presentations/realtime-gpu-workloads/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Scalability-presentations</guid>
      <dc:creator>Joseph Stein</dc:creator>
      <dc:date>2026-05-26T09:08:00Z</dc:date>
      <dc:identifier>/presentations/realtime-gpu-workloads/en</dc:identifier>
    </item>
    <item>
      <title>Presentation: The AI Gateway: Scaling Centralized Inference across Decentralized Teams</title>
      <link>https://www.infoq.com/presentations/ai-gateway-scalability/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Scalability-presentations</link>
      <description>&lt;img src="https://res.infoq.com/presentations/ai-gateway-scalability/en/mediumimage/medium-1778663382364.jpg"/&gt;&lt;p&gt;Meryem Arik discusses why modern engineering teams face "inference chaos" and how AI model gateways provide a critical control layer. She explains the balance between empowering decentralized teams to choose the best models and maintaining centralized oversight for security, RBAC, and cost control.  Explore open-source solutions like LiteLLM and Doubleword to streamline your AI infra.&lt;/p&gt; &lt;i&gt;By Meryem Arik&lt;/i&gt;</description>
      <category>QCon AI 2025</category>
      <category>Scalability</category>
      <category>Artificial Intelligence</category>
      <category>Transcripts</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>presentation</category>
      <pubDate>Wed, 20 May 2026 12:40:00 GMT</pubDate>
      <guid>https://www.infoq.com/presentations/ai-gateway-scalability/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Scalability-presentations</guid>
      <dc:creator>Meryem Arik</dc:creator>
      <dc:date>2026-05-20T12:40:00Z</dc:date>
      <dc:identifier>/presentations/ai-gateway-scalability/en</dc:identifier>
    </item>
  </channel>
</rss>
