<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Large language models</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Large language models feed</description>
    <item>
      <title>Anthropic Launches Claude Platform on AWS</title>
      <link>https://www.infoq.com/news/2026/05/anthropic-claude-aws/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/05/anthropic-claude-aws/en/headerimage/generatedHeaderImage-1778682420283.jpg"/&gt;&lt;p&gt;Anthropic has announced the general availability of Claude Platform on AWS, a new deployment option that gives AWS customers direct access to Anthropic’s native Claude platform using AWS authentication, billing, and monitoring services.&lt;/p&gt; &lt;i&gt;By Daniel Dominguez&lt;/i&gt;</description>
      <category>Claude</category>
      <category>AWS</category>
      <category>Large language models</category>
      <category>Software Development</category>
      <category>Cloud Computing</category>
      <category>Artificial Intelligence</category>
      <category>Anthropic</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>news</category>
      <pubDate>Wed, 13 May 2026 19:20:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/05/anthropic-claude-aws/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</guid>
      <dc:creator>Daniel Dominguez</dc:creator>
      <dc:date>2026-05-13T19:20:00Z</dc:date>
      <dc:identifier>/news/2026/05/anthropic-claude-aws/en</dc:identifier>
    </item>
    <item>
      <title>Coder Agents Enable Running AI Coding Workflows on Self-Hosted Infrastructure</title>
      <link>https://www.infoq.com/news/2026/05/coder-agents-self-hosted-ai/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/05/coder-agents-self-hosted-ai/en/headerimage/coder-agents-self-hosted-ai-1778516884639.jpeg"/&gt;&lt;p&gt;Coder Agents is a model-agnostic platform designed to let organizations run AI coding agents on their own infrastructure, rather than relying on cloud-based services. This allows teams to maintain full control over code, data, and execution environments.&lt;/p&gt; &lt;i&gt;By Sergio De Simone&lt;/i&gt;</description>
      <category>Large language models</category>
      <category>Agents</category>
      <category>AI Coding</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>news</category>
      <pubDate>Mon, 11 May 2026 17:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/05/coder-agents-self-hosted-ai/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</guid>
      <dc:creator>Sergio De Simone</dc:creator>
      <dc:date>2026-05-11T17:00:00Z</dc:date>
      <dc:identifier>/news/2026/05/coder-agents-self-hosted-ai/en</dc:identifier>
    </item>
    <item>
      <title>Article: Local-First AI Inference: A Cloud Architecture Pattern for Cost-Effective Document Processing</title>
      <link>https://www.infoq.com/articles/local-first-ai-inference-cloud/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</link>
      <description>&lt;img src="https://res.infoq.com/articles/local-first-ai-inference-cloud/en/headerimage/Local-First-AI-Inference-A-Cloud-Architecture-Pattern-for-Cost-Effective-Document-Processing-header-1778141518292.jpg"/&gt;&lt;p&gt;The Local-First AI Inference pattern routes 70–80% of documents to deterministic local extraction at zero API cost, reserving Azure OpenAI calls for edge cases and flagging low-confidence results for human review. Deployed on 4,700 engineering drawing PDFs, it cut API costs by 75% and processing time by 55%, while bounding errors through a human review tier.&lt;/p&gt; &lt;i&gt;By Obinna Iheanachor&lt;/i&gt;</description>
      <category>GPT-4</category>
      <category>Microsoft Azure</category>
      <category>Model Inference</category>
      <category>Generative AI</category>
      <category>Observability</category>
      <category>Azure</category>
      <category>Artificial Intelligence</category>
      <category>Cloud</category>
      <category>Cost Optimization</category>
      <category>DevOps</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>Development</category>
      <category>article</category>
      <pubDate>Mon, 11 May 2026 11:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/local-first-ai-inference-cloud/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</guid>
      <dc:creator>Obinna Iheanachor</dc:creator>
      <dc:date>2026-05-11T11:00:00Z</dc:date>
      <dc:identifier>/articles/local-first-ai-inference-cloud/en</dc:identifier>
    </item>
    <item>
      <title>OpenAI Introduces Websocket-Based Execution Mode to Reduce Latency in Agentic Workflows</title>
      <link>https://www.infoq.com/news/2026/05/openai-websocket-responses-api/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/05/openai-websocket-responses-api/en/headerimage/generatedHeaderImage-1777845282531.jpg"/&gt;&lt;p&gt;OpenAI introduces a WebSocket-based execution mode for its Responses API to improve agentic workflow performance in coding agents and real-time AI systems. The update reduces latency by up to 40 percent by replacing HTTP request-response cycles with persistent connections, improving streaming, tool execution, and multi-step orchestration in production-scale AI systems.&lt;/p&gt; &lt;i&gt;By Leela Kumili&lt;/i&gt;</description>
      <category>OpenAI</category>
      <category>Realtime API</category>
      <category>API</category>
      <category>Workflow Foundation</category>
      <category>AI Architecture</category>
      <category>Optimization</category>
      <category>Orchestration</category>
      <category>WebSocket</category>
      <category>Large language models</category>
      <category>Low Latency</category>
      <category>Distributed Systems</category>
      <category>Agents</category>
      <category>Artificial Intelligence</category>
      <category>AI Assisted Coding</category>
      <category>SDK</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>Development</category>
      <category>Architecture &amp; Design</category>
      <category>news</category>
      <pubDate>Thu, 07 May 2026 14:48:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/05/openai-websocket-responses-api/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</guid>
      <dc:creator>Leela Kumili</dc:creator>
      <dc:date>2026-05-07T14:48:00Z</dc:date>
      <dc:identifier>/news/2026/05/openai-websocket-responses-api/en</dc:identifier>
    </item>
    <item>
      <title>Google New TPU Generation is Specifically Designed for Agents and SOTA Model Training</title>
      <link>https://www.infoq.com/news/2026/05/google-8th-tpu-generation/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/05/google-8th-tpu-generation/en/headerimage/google-8th-gen-tpus-1778060595193.jpeg"/&gt;&lt;p&gt;Google has unvelied a new generation of Tensor Processing Units (TPUs), featuring two specialized chips designed to accelerate model training and agent workflows, which require continuous, multi-step reasoning, and action loops distributed across multiple models. The new TPUs deliver better performance, memory, and energy efficiency, the company says.&lt;/p&gt; &lt;i&gt;By Sergio De Simone&lt;/i&gt;</description>
      <category>GPU</category>
      <category>Large language models</category>
      <category>Google</category>
      <category>Agents</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>Development</category>
      <category>news</category>
      <pubDate>Wed, 06 May 2026 10:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/05/google-8th-tpu-generation/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</guid>
      <dc:creator>Sergio De Simone</dc:creator>
      <dc:date>2026-05-06T10:00:00Z</dc:date>
      <dc:identifier>/news/2026/05/google-8th-tpu-generation/en</dc:identifier>
    </item>
    <item>
      <title>Mistral Adds Remote Agents and Work Mode to Le Chat</title>
      <link>https://www.infoq.com/news/2026/05/mistral-agents-lechat/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/05/mistral-agents-lechat/en/headerimage/generatedHeaderImage-1777663082615.jpg"/&gt;&lt;p&gt;Mistral has released Mistral Medium 3.5, a 128-billion parameter model designed to handle instruction following, reasoning, and coding within a single system, and introduced new cloud-based agent capabilities in its Vibe and Le Chat products.&lt;/p&gt; &lt;i&gt;By Daniel Dominguez&lt;/i&gt;</description>
      <category>OpenAI</category>
      <category>Large language models</category>
      <category>Agents</category>
      <category>Artificial Intelligence</category>
      <category>Mistral AI</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>news</category>
      <pubDate>Tue, 05 May 2026 10:08:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/05/mistral-agents-lechat/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</guid>
      <dc:creator>Daniel Dominguez</dc:creator>
      <dc:date>2026-05-05T10:08:00Z</dc:date>
      <dc:identifier>/news/2026/05/mistral-agents-lechat/en</dc:identifier>
    </item>
    <item>
      <title>Cloudflare Builds High-Performance Infrastructure for Running LLMs</title>
      <link>https://www.infoq.com/news/2026/05/cloudflare-llm-infrastructure/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/05/cloudflare-llm-infrastructure/en/headerimage/generatedHeaderImage-1776661318905.jpg"/&gt;&lt;p&gt;Cloudflare has recently announced new infrastructure designed to run large AI language models across its global network. As these models rely on costly hardware and must handle large volumes of incoming and outgoing text, Cloudflare separates the model's input processing and output generation onto different optimized systems.&lt;/p&gt; &lt;i&gt;By Renato Losio&lt;/i&gt;</description>
      <category>GPU</category>
      <category>Large language models</category>
      <category>Big Data Infrastructure</category>
      <category>AI Architecture</category>
      <category>Optimization</category>
      <category>Cloudflare</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>Development</category>
      <category>news</category>
      <pubDate>Sun, 03 May 2026 10:58:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/05/cloudflare-llm-infrastructure/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Large+language+models</guid>
      <dc:creator>Renato Losio</dc:creator>
      <dc:date>2026-05-03T10:58:00Z</dc:date>
      <dc:identifier>/news/2026/05/cloudflare-llm-infrastructure/en</dc:identifier>
    </item>
  </channel>
</rss>
