<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
  <channel>
    <title>InfoQ - Architecture &amp; Design - Articles</title>
    <link>https://www.infoq.com</link>
    <description>InfoQ Architecture &amp; Design Articles feed</description>
    <item>
      <title>Article: Redesigning Banking PDF Table Extraction: A Layered Approach with Java</title>
      <link>https://www.infoq.com/articles/redesign-pdf-table-extraction/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Architecture+%26+Design-articles</link>
      <description>&lt;img src="https://res.infoq.com/articles/redesign-pdf-table-extraction/en/headerimage/redesign-pdf-table-extraction-header-1776414059821.jpg"/&gt;&lt;p&gt;PDF table extraction often looks easy until it fails in production. Real bank statements can be messy, with scanned pages, shifting layouts, merged cells, and wrapped rows that break standard Java parsers. This article shares how we redesigned the approach using stream parsing, lattice/OCR, validation, scoring, and selective ML to make extraction more reliable in real banking systems.&lt;/p&gt; &lt;i&gt;By Mehuli Mukherjee&lt;/i&gt;</description>
      <category>Java</category>
      <category>Development</category>
      <category>Architecture &amp; Design</category>
      <category>article</category>
      <pubDate>Tue, 21 Apr 2026 09:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/redesign-pdf-table-extraction/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Architecture+%26+Design-articles</guid>
      <dc:creator>Mehuli Mukherjee</dc:creator>
      <dc:date>2026-04-21T09:00:00Z</dc:date>
      <dc:identifier>/articles/redesign-pdf-table-extraction/en</dc:identifier>
    </item>
    <item>
      <title>Article: Lakehouse Tower of Babel: Handling Identifier Resolution Rules across Database Engines</title>
      <link>https://www.infoq.com/articles/lakehouse-sql-identifier-rules/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Architecture+%26+Design-articles</link>
      <description>&lt;img src="https://res.infoq.com/articles/lakehouse-sql-identifier-rules/en/headerimage/lakehouse-sql-identifier-rules-header-1776241856705.jpg"/&gt;&lt;p&gt;Lakehouse architectures enable multiple engines to operate on shared data using open table formats such as Apache Iceberg. However, differences in SQL identifier resolution and catalog naming rules create interoperability failures. This article examines these behaviors and explains why enforcing consistent naming conventions and cross-engine validation is critical.&lt;/p&gt; &lt;i&gt;By Maninder Parmar&lt;/i&gt;</description>
      <category>Database</category>
      <category>Data Portability</category>
      <category>SQL</category>
      <category>Data Lake</category>
      <category>Data Catalog</category>
      <category>Apache Iceberg</category>
      <category>Architecture &amp; Design</category>
      <category>AI, ML &amp; Data Engineering</category>
      <category>article</category>
      <pubDate>Fri, 17 Apr 2026 09:00:00 GMT</pubDate>
      <guid>https://www.infoq.com/articles/lakehouse-sql-identifier-rules/?utm_campaign=infoq_content&amp;utm_source=infoq&amp;utm_medium=feed&amp;utm_term=Architecture+%26+Design-articles</guid>
      <dc:creator>Maninder Parmar</dc:creator>
      <dc:date>2026-04-17T09:00:00Z</dc:date>
      <dc:identifier>/articles/lakehouse-sql-identifier-rules/en</dc:identifier>
    </item>
  </channel>
</rss>
