<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Apache Spark - News</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Apache Spark News feed</description>
    <item>
      <title>Uber’s Hive Federation Decentralizes 16K Datasets and 10+ PB for Zero-Downtime Analytics at Scale</title>
      <link>https://www.infoq.com/news/2026/04/uber-hive-decentralized-data/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Apache+Spark-news</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/04/uber-hive-decentralized-data/en/headerimage/generatedHeaderImage-1775343806833.jpg"/&gt;&lt;p&gt;Uber has decentralized its Hive data warehouse, migrating 16,000 datasets totaling over 10 petabytes using pointer-based federation. The migration ensures zero downtime, strict ACL enforcement, improved governance, and scalable, domain-specific datasets for analytics and machine learning workloads.&lt;/p&gt; &lt;i&gt;By Leela Kumili&lt;/i&gt;</description>
      <category>Hive</category>
      <category>Database</category>
      <category>Apache Hive</category>
      <category>Apache Hadoop</category>
      <category>Data Analytics</category>
      <category>Platform Engineering</category>
      <category>Data Analysis</category>
      <category>migration</category>
      <category>Data Governance</category>
      <category>Compliance</category>
      <category>Apache Spark</category>
      <category>Federation</category>
      <category>Development</category>
      <category>Architecture &amp; Design</category>
      <category>news</category>
      <pubDate>Thu, 09 Apr 2026 13:54:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/04/uber-hive-decentralized-data/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Apache+Spark-news</guid>
      <dc:creator>Leela Kumili</dc:creator>
      <dc:date>2026-04-09T13:54:00Z</dc:date>
      <dc:identifier>/news/2026/04/uber-hive-decentralized-data/en</dc:identifier>
    </item>
    <item>
      <title>Pinterest Reduces Spark OOM Failures by 96% through Auto Memory Retries</title>
      <link>https://www.infoq.com/news/2026/04/pinterest-spark-oom-reduction/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Apache+Spark-news</link>
      <description>&lt;img src="https://res.infoq.com/news/2026/04/pinterest-spark-oom-reduction/en/headerimage/workflow-1775338668860.jpeg"/&gt;&lt;p&gt;Pinterest Engineering cut Apache Spark out-of-memory failures by 96% using improved observability, configuration tuning, and automatic memory retries. Staged rollout, dashboards, and proactive memory adjustments stabilized data pipelines, reduced manual intervention, and lowered operational overhead across tens of thousands of daily jobs.&lt;/p&gt; &lt;i&gt;By Leela Kumili&lt;/i&gt;</description>
      <category>Big Data</category>
      <category>Optimization</category>
      <category>Cost Optimization</category>
      <category>Memory</category>
      <category>Architecture Analysis</category>
      <category>Batch Processing</category>
      <category>Observability</category>
      <category>Architecture</category>
      <category>Distributed Systems</category>
      <category>Apache Spark</category>
      <category>Development</category>
      <category>Architecture &amp; Design</category>
      <category>news</category>
      <pubDate>Mon, 06 Apr 2026 14:32:00 GMT</pubDate>
      <guid>https://www.infoq.com/news/2026/04/pinterest-spark-oom-reduction/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Apache+Spark-news</guid>
      <dc:creator>Leela Kumili</dc:creator>
      <dc:date>2026-04-06T14:32:00Z</dc:date>
      <dc:identifier>/news/2026/04/pinterest-spark-oom-reduction/en</dc:identifier>
    </item>
  </channel>
</rss>
