<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Big Data - Articles</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Big Data Articles feed</description>
    <item>
      <title>Article: Time-Series Storage: Design Choices That Shape Cost and Performance</title>
      <link>https://www.infoq.com/articles/time-series-storage-design/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Big+Data-articles</link>
      <description>&lt;img src="https://res.infoq.com/articles/time-series-storage-design/en/headerimage/Time-Series-Storage-Design-Choices-That-Shape-Cost-and-Performance-header-1778155792101.jpg"/&gt;&lt;p&gt;Every time-series database makes a set of storage design decisions: how to lay out rows, when to compress, what to partition on. These decisions determine cost and query performance more than the choice of database itself. This article works through those fundamentals from first principles, using widely available tools like PostgreSQL and Apache Parquet to make each trade-off measurable.&lt;/p&gt; &lt;i&gt;By Nirmesh Khandelwal&lt;/i&gt;</description>
      <category>Big Data</category>
      <category>Time Series Data</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>article</category>
      <pubDate>Tue, 12 May 2026 09:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/time-series-storage-design/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Big+Data-articles</guid>
      <dc:creator>Nirmesh Khandelwal</dc:creator>
      <dc:date>2026-05-12T09:00:00Z</dc:date>
      <dc:identifier>/articles/time-series-storage-design/en</dc:identifier>
    </item>
    <item>
      <title>Article: From Batch to Micro-Batch Streaming: Lessons Learned the Hard Way in a Delta Index Pipeline</title>
      <link>https://www.infoq.com/articles/micro-batch-streaming-lessons-learned/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Big+Data-articles</link>
      <description>&lt;img src="https://res.infoq.com/articles/micro-batch-streaming-lessons-learned/en/headerimage/micro-batch-streaming-lessons-learned-header-1777381781538.jpg"/&gt;&lt;p&gt;This article describes how a production delta-index pipeline migrated from scheduled batch to micro-batch Spark Structured Streaming. It covers why record-level streaming was rejected, how partition-based watermarks replaced fragile S3 completion markers,  overlap-window correctness, and restart-as-design strategies for better predictability in object-store–based ingestion systems.&lt;/p&gt; &lt;i&gt;By Parveen Saini&lt;/i&gt;</description>
      <category>Spark Streaming</category>
      <category>Apache Spark</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>Development</category>
      <category>article</category>
      <pubDate>Mon, 04 May 2026 11:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/micro-batch-streaming-lessons-learned/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Big+Data-articles</guid>
      <dc:creator>Parveen Saini</dc:creator>
      <dc:date>2026-05-04T11:00:00Z</dc:date>
      <dc:identifier>/articles/micro-batch-streaming-lessons-learned/en</dc:identifier>
    </item>
  </channel>
</rss>
