From 45331b76a7f07535425c9398c657a0ece9c2421e Mon Sep 17 00:00:00 2001 From: vlou <919070296@qq.com> Date: Mon, 28 Apr 2025 15:00:40 +0800 Subject: [PATCH] t push --force origin fix/tableData fix: prevent duplicated cell content in TableData.grid when table_cells is shorter than grid Fixes an issue where, if the number of table_cells is less than the grid size, the last cell's content would be duplicated across extra grid cells. Now, extra cells remain empty, matching the original document layout and preventing misleading repeated content in the output tables. Signed-off-by: vlou <919070296@qq.com> --- docling_core/types/doc/document.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 4c05bfba..aac40319 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -352,6 +352,7 @@ def grid( ] # Overwrite cells in table data for which there is actual cell content. + count = 1 for cell in self.table_cells: for i in range( min(cell.start_row_offset_idx, self.num_rows), @@ -361,7 +362,11 @@ def grid( min(cell.start_col_offset_idx, self.num_cols), min(cell.end_col_offset_idx, self.num_cols), ): - table_data[i][j] = cell + if count <= len(self.table_cells): + table_data[i][j] = cell + count += 1 + else: + break return table_data