<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Spark SQL - Articles</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Spark SQL Articles feed</description>
    <item>
      <title>Article: Building Reproducible ML Systems with Apache Iceberg and SparkSQL: Open Source Foundations</title>
      <link>https://www.infoq.com/articles/reproducible-ml-iceberg/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Spark+SQL-articles</link>
      <description>&lt;img src="https://res.infoq.com/articles/reproducible-ml-iceberg/en/headerimage/reproducible-ml-iceberg-header-1753341474504.jpg"/&gt;&lt;p&gt;Traditional data lakes are great for storing massive amounts of stuff, but they're terrible at the transactional guarantees and versioning that ML workloads desperately need. Apache Iceberg and SparkSQL bring database-like reliability to your data lake. Time travel, schema evolution, and ACID transactions help support reproducible machine learning experiments.&lt;/p&gt; &lt;i&gt;By Anant Kumar&lt;/i&gt;</description>
      <category>Apache Iceberg</category>
      <category>Spark SQL</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>article</category>
      <pubDate>Thu, 31 Jul 2025 09:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/reproducible-ml-iceberg/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Spark+SQL-articles</guid>
      <dc:creator>Anant Kumar</dc:creator>
      <dc:date>2025-07-31T09:00:00Z</dc:date>
      <dc:identifier>/articles/reproducible-ml-iceberg/en</dc:identifier>
    </item>
  </channel>
</rss>
