<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Reliability</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Reliability feed</description>
    <item>
      <title>Netflix Scales "Human Infrastructure" to Manage Global Live Operations</title>
      <link>https://www.infoq.com/news/2026/04/netflix-live-human-ops-scale/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Reliability</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/04/netflix-live-human-ops-scale/en/headerimage/header-1777364196733.jpeg"/&gt;&lt;p&gt;Netflix has introduced a "human infrastructure" layer to manage live broadcasts at scale. Using a low-latency "telemetry hot path" and a Live Operations Centre, the company now balances automated scaling with human oversight. This shift, which mirrors strategies at AWS and Disney+, focuses on maintaining reliability through expert intervention during high-concurrency global events.&lt;/p&gt; &lt;i&gt;By Mark Silvester&lt;/i&gt;</description>
      <category>Netflix</category>
      <category>Reliability</category>
      <category>Architecture</category>
      <category>Scaling</category>
      <category>Operations management</category>
      <category>Development</category>
      <category>Architecture &amp; Design</category>
      <category>DevOps</category>
      <category>news</category>
      <pubDate>Thu, 30 Apr 2026 08:10:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/04/netflix-live-human-ops-scale/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Reliability</guid>
      <dc:creator>Mark Silvester</dc:creator>
      <dc:date>2026-04-30T08:10:00Z</dc:date>
      <dc:identifier>/news/2026/04/netflix-live-human-ops-scale/en</dc:identifier>
    </item>
    <item>
      <title>Article: When a Cloud Region Fails: Rethinking High Availability in a Geopolitically Unstable World</title>
      <link>https://www.infoq.com/articles/sovereign-fault-domains-cloud-resilience/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Reliability</link>
      <description>&lt;img src="https://res.infoq.com/articles/sovereign-fault-domains-cloud-resilience/en/headerimage/sovereign-fault-domains-cloud-resilience-header-1776430533702.jpg"/&gt;&lt;p&gt;Sovereign fault domains are failure boundaries defined by legal, political, or physical jurisdiction rather than hardware topology. The article maps geopolitical events to known distributed-systems failure modes, argues multi-region should replace multi-AZ as the HA baseline for systems crossing jurisdictions, and outlines design patterns, chaos experiments, and an ALE model to justify the spend.&lt;/p&gt; &lt;i&gt;By Rohan Vardhan&lt;/i&gt;</description>
      <category>Site Reliability Engineering</category>
      <category>Chaos Engineering</category>
      <category>Fault Tolerance</category>
      <category>Reliability</category>
      <category>Architecture</category>
      <category>Availability</category>
      <category>Disaster Recovery</category>
      <category>Cloud</category>
      <category>Resilience</category>
      <category>Cloud Architecture</category>
      <category>Development</category>
      <category>Architecture &amp; Design</category>
      <category>DevOps</category>
      <category>article</category>
      <pubDate>Wed, 22 Apr 2026 09:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/sovereign-fault-domains-cloud-resilience/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Reliability</guid>
      <dc:creator>Rohan Vardhan</dc:creator>
      <dc:date>2026-04-22T09:00:00Z</dc:date>
      <dc:identifier>/articles/sovereign-fault-domains-cloud-resilience/en</dc:identifier>
    </item>
  </channel>
</rss>
