<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Apache Spark</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Apache Spark feed</description>
    <item>
      <title>Article: From Batch to Micro-Batch Streaming: Lessons Learned the Hard Way in a Delta Index Pipeline</title>
      <link>https://www.infoq.com/articles/micro-batch-streaming-lessons-learned/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Apache+Spark</link>
      <description>&lt;img src="https://res.infoq.com/articles/micro-batch-streaming-lessons-learned/en/headerimage/micro-batch-streaming-lessons-learned-header-1777381781538.jpg"/&gt;&lt;p&gt;This article describes how a production delta-index pipeline migrated from scheduled batch to micro-batch Spark Structured Streaming. It covers why record-level streaming was rejected, how partition-based watermarks replaced fragile S3 completion markers,  overlap-window correctness, and restart-as-design strategies for better predictability in object-store–based ingestion systems.&lt;/p&gt; &lt;i&gt;By Parveen Saini&lt;/i&gt;</description>
      <category>Apache Spark</category>
      <category>Spark Streaming</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>Development</category>
      <category>article</category>
      <pubDate>Mon, 04 May 2026 11:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/micro-batch-streaming-lessons-learned/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Apache+Spark</guid>
      <dc:creator>Parveen Saini</dc:creator>
      <dc:date>2026-05-04T11:00:00Z</dc:date>
      <dc:identifier>/articles/micro-batch-streaming-lessons-learned/en</dc:identifier>
    </item>
  </channel>
</rss>
