<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Performance - News</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Performance News feed</description>
    <item>
      <title>Google’s TurboQuant Compression May Support Faster Inference, Same Accuracy on Less Capable Hardware</title>
      <link>https://www.infoq.com/news/2026/04/turboquant-compression-kv-cache/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Performance-news</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/04/turboquant-compression-kv-cache/en/headerimage/generatedHeaderImage-1776265077411.jpg"/&gt;&lt;p&gt;Google Research unveiled TurboQuant, a novel quantization algorithm that compresses large language models’ Key-Value caches by up to 6x. With 3.5-bit compression, near-zero accuracy loss, and no retraining needed, it allows developers to run massive context windows on significantly more modest hardware than previously required. Early community benchmarks confirm significant efficiency gains.&lt;/p&gt; &lt;i&gt;By Bruno Couriol&lt;/i&gt;</description>
      <category>Optimization</category>
      <category>Compression</category>
      <category>Performance</category>
      <category>Large language models</category>
      <category>Development</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>news</category>
      <pubDate>Wed, 15 Apr 2026 16:53:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/04/turboquant-compression-kv-cache/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Performance-news</guid>
      <dc:creator>Bruno Couriol</dc:creator>
      <dc:date>2026-04-15T16:53:00Z</dc:date>
      <dc:identifier>/news/2026/04/turboquant-compression-kv-cache/en</dc:identifier>
    </item>
    <item>
      <title>Cloudflare and ETH Zurich Outline Approaches for AI-Driven Cache Optimization</title>
      <link>https://www.infoq.com/news/2026/04/cloudflare-ai-caching-strategies/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Performance-news</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/04/cloudflare-ai-caching-strategies/en/headerimage/aicrawler-1775341603564.jpeg"/&gt;&lt;p&gt;Cloudflare and ETH Zurich highlight how AI-driven crawler traffic challenges traditional caching in CDNs and databases. They propose AI-aware strategies including separate cache tiers, adaptive algorithms, and pay-per-crawl models to balance performance for human users and AI services while maintaining cache efficiency and system stability.&lt;/p&gt; &lt;i&gt;By Leela Kumili&lt;/i&gt;</description>
      <category>CDN</category>
      <category>Database</category>
      <category>Machine Learning</category>
      <category>Distributed Cache</category>
      <category>Artificial Intelligence</category>
      <category>AI Architecture</category>
      <category>Performance</category>
      <category>BOTS</category>
      <category>Caching</category>
      <category>Retrieval-Augmented Generation</category>
      <category>Development</category>
      <category>Architecture &amp; Design</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>news</category>
      <pubDate>Wed, 08 Apr 2026 14:20:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/04/cloudflare-ai-caching-strategies/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Performance-news</guid>
      <dc:creator>Leela Kumili</dc:creator>
      <dc:date>2026-04-08T14:20:00Z</dc:date>
      <dc:identifier>/news/2026/04/cloudflare-ai-caching-strategies/en</dc:identifier>
    </item>
  </channel>
</rss>
