2025-04-08 XLMergeCells: remove <mergeCells> element from worksheet XML when merge count is 0 - addresses #351

This commit is contained in:
Lars Uffmann 2025-04-08 08:28:49 +02:00
parent 48e1b416d0
commit 13b1461e5f
No known key found for this signature in database
4 changed files with 152 additions and 67 deletions

View File

@ -57,6 +57,8 @@ YM M9 MM MM MM MM MM d' `MM. MM MM d' `MM.
#include <memory> // std::unique_ptr
#include <ostream> // std::basic_ostream
#include <string>
#include <string_view> // std::string_view
#include <vector> // std::vector
// ===== OpenXLSX Includes ===== //
#include "OpenXLSX-Exports.hpp"
@ -65,11 +67,17 @@ YM M9 MM MM MM MM MM d' `MM. MM MM d' `MM.
namespace OpenXLSX
{
constexpr size_t XLMaxMergeCells = (std::numeric_limits< int32_t >::max)(); // pull request #261: wrapped max in parentheses to prevent expansion of windows.h "max" macro
typedef int32_t XLMergeIndex;
constexpr const XLMergeIndex XLMergeNotFound = -1;
// pull request #261: wrapped max in parentheses to prevent expansion of windows.h "max" macro
constexpr size_t XLMaxMergeCells = (std::numeric_limits< XLMergeIndex >::max)();
/**
* @brief This class encapsulate the Excel concept of <mergeCells>. Each worksheet that has merged cells has a list of
* (empty) <mergeCell> elements within that array, with a sole attribute ref="..." with ... being a range reference, e.g. A1:B5
* Unfortunately, since an empty <mergeCells> element is not allowed, the class must have access to the worksheet root node and
* delete the <mergeCells> element each time the merge count is zero
*/
class OPENXLSX_EXPORT XLMergeCells
{
@ -81,13 +89,14 @@ namespace OpenXLSX
/**
* @brief
*/
XLMergeCells() = default;
XLMergeCells();
/**
* @brief
* @param node The <mergeCells> node of the worksheet document - must not be an empty node
* @param node The root node of the worksheet document - must not be an empty node
* @param nodeOrder the worksheet node sequence to respect when inserting <mergeCells> node
*/
explicit XLMergeCells(const XMLNode& node);
explicit XLMergeCells(const XMLNode& rootNode, std::vector< std::string_view > const & nodeOrder);
/**
* @brief Destructor
@ -98,54 +107,40 @@ namespace OpenXLSX
* @brief
* @param other
*/
XLMergeCells(const XLMergeCells& other)
{
m_mergeCellsNode = other.m_mergeCellsNode ? std::make_unique<XMLNode>( *other.m_mergeCellsNode ) : std::unique_ptr<XMLNode> {};
m_referenceCache = other.m_referenceCache;
}
XLMergeCells(const XLMergeCells& other);
/**
* @brief
* @param other
*/
XLMergeCells(XLMergeCells&& other)
{
m_mergeCellsNode = std::move( other.m_mergeCellsNode );
m_referenceCache = std::move( other.m_referenceCache );
}
XLMergeCells(XLMergeCells&& other);
/**
* @brief
* @param other
* @return
*/
XLMergeCells& operator=(const XLMergeCells& other)
{
m_mergeCellsNode = other.m_mergeCellsNode ? std::make_unique<XMLNode>( *other.m_mergeCellsNode ) : std::unique_ptr<XMLNode> {};
m_referenceCache = other.m_referenceCache;
return *this;
}
XLMergeCells& operator=(const XLMergeCells& other);
/**
* @brief
* @param other
* @return
*/
XLMergeCells& operator=(XLMergeCells&& other)
{
m_mergeCellsNode = std::move( other.m_mergeCellsNode );
m_referenceCache = std::move( other.m_referenceCache );
return *this;
}
XLMergeCells& operator=(XLMergeCells&& other);
bool uninitialized() const { return ( !m_mergeCellsNode ); }
/**
* @brief test if XLMergeCells has been initialized with a valid XMLNode
* @return true if m_rootNode is neither nullptr nor an empty XMLNode
*/
bool valid() const;
/**
* @brief get the index of a <mergeCell> entry by its reference
* @param reference the reference to search for
* @return -1 if no such reference exists, 0-based index otherwise
* @return XLMergeNotFound (-1) if no such reference exists, 0-based index otherwise
*/
int32_t findMerge(const std::string& reference) const;
XLMergeIndex findMerge(const std::string& reference) const;
/**
* @brief test if a mergeCell with reference exists, equivalent to findMerge(reference) >= 0
@ -157,10 +152,10 @@ namespace OpenXLSX
/**
* @brief get the index of a <mergeCell> entry of which cellReference is a part
* @param cellRef the cell reference (string or XLCellReference) to search for in the merged ranges
* @return -1 if no such reference exists, 0-based index otherwise
* @return XLMergeNotFound (-1) if no such reference exists, 0-based index otherwise
*/
int32_t findMergeByCell(const std::string& cellRef) const;
int32_t findMergeByCell(XLCellReference cellRef) const;
XLMergeIndex findMergeByCell(const std::string& cellRef) const;
XLMergeIndex findMergeByCell(XLCellReference cellRef) const;
/**
* @brief get the amount of entries in <mergeCells>
@ -173,20 +168,20 @@ namespace OpenXLSX
* @param index
* @return
*/
const char* merge(int32_t index) const;
const char* merge(XLMergeIndex index) const;
/**
* @brief Operator overload: allow [] as shortcut access to merge
*/
const char* operator[](int32_t index) const { return merge(index); }
const char* operator[](XLMergeIndex index) const { return merge(index); }
/**
* @brief Append a new merge to the list of merges
* @param reference The reference to append.
* @return An int32_t with the index of the appended string
* @return An XLMergeIndex with the index of the appended merge
* @throws XLInputException if the reference would overlap with an existing reference
*/
int32_t appendMerge(const std::string& reference);
XLMergeIndex appendMerge(const std::string& reference);
/**
* @brief Delete the merge at the given index.
@ -194,7 +189,12 @@ namespace OpenXLSX
* @note Previously obtained merge indexes will be invalidated when calling deleteMerge
* @throws XLInputException if the index does not exist
*/
void deleteMerge(int32_t index);
void deleteMerge(XLMergeIndex index);
/**
* @brief Delete all merges of the worksheet
*/
void deleteAll();
/**
* @brief print the XML contents of the mergeCells array using the underlying XMLNode print function
@ -202,6 +202,8 @@ namespace OpenXLSX
void print(std::basic_ostream<char>& ostr) const;
private:
std::unique_ptr<XMLNode> m_rootNode; /**< An XMLNode object with the worksheet root node (document element) */
std::vector< std::string_view > m_nodeOrder; /**< worksheet XML root node required child sequence as passed into constructor */
std::unique_ptr<XMLNode> m_mergeCellsNode; /**< An XMLNode object with the mergeCells item */
std::deque<std::string> m_referenceCache;
};

View File

@ -51,21 +51,30 @@ YM M9 MM MM MM MM MM d' `MM. MM MM d' `MM.
// ===== OpenXLSX Includes ===== //
#include "XLMergeCells.hpp"
#include "XLCellReference.hpp"
#include <XLException.hpp>
#include "XLException.hpp"
#include "utilities/XLUtilities.hpp" // appendAndGetNode
using namespace OpenXLSX;
/**
* @details Constructs an uninitialized XLMergeCells object
*/
XLMergeCells::XLMergeCells() = default;
/**
* @details Constructs a new XLMergeCells object. Invoked by XLWorksheet::mergeCells / ::unmergeCells
* @note Unfortunately, there is no easy way to persist the reference cache, this could be optimized - however, references access shouldn't
* be much of a performance issue
*/
XLMergeCells::XLMergeCells(const XMLNode& node) : m_mergeCellsNode(std::make_unique<XMLNode>(node))
XLMergeCells::XLMergeCells(const XMLNode& rootNode, std::vector< std::string_view > const & nodeOrder)
: m_rootNode(std::make_unique<XMLNode>(rootNode)),
m_nodeOrder(nodeOrder),
m_mergeCellsNode() // std::unique_ptr initializes to nullptr
{
if (m_mergeCellsNode->empty())
throw XLInternalError("XLMergeCells constructor: can not construct with an empty XML node");
if (m_rootNode->empty())
throw XLInternalError("XLMergeCells constructor: can not construct with an empty XML root node");
m_mergeCellsNode = std::make_unique<XMLNode>(m_rootNode->child("mergeCells"));
XMLNode mergeNode = m_mergeCellsNode->first_child_of_type(pugi::node_element);
while (not mergeNode.empty()) {
bool invalidNode = true;
@ -97,10 +106,14 @@ XLMergeCells::XLMergeCells(const XMLNode& node) : m_mergeCellsNode(std::make_uni
mergeNode = nextNode;
}
// ===== Ensure initial array count attribute (if only 0) / issue #351
XMLAttribute attr = m_mergeCellsNode->attribute("count");
if (attr.empty()) attr = m_mergeCellsNode->append_attribute("count");
attr.set_value(m_referenceCache.size());
if (m_referenceCache.size() > 0) {
// ===== Ensure initial array count attribute / issue #351
XMLAttribute attr = m_mergeCellsNode->attribute("count");
if (attr.empty()) attr = m_mergeCellsNode->append_attribute("count");
attr.set_value(m_referenceCache.size());
}
else // no merges left
deleteAll(); // delete mergeCells element & re-initialize m_mergeCellsNode to a default-constructed XMLNode()
}
/**
@ -108,6 +121,58 @@ XLMergeCells::XLMergeCells(const XMLNode& node) : m_mergeCellsNode(std::make_uni
*/
XLMergeCells::~XLMergeCells() = default;
/**
* @details
*/
XLMergeCells::XLMergeCells(const XLMergeCells& other)
{
m_rootNode = other.m_rootNode ? std::make_unique<XMLNode>( *other.m_rootNode ) : std::unique_ptr<XMLNode> {};
m_nodeOrder = other.m_nodeOrder;
m_mergeCellsNode = other.m_mergeCellsNode ? std::make_unique<XMLNode>( *other.m_mergeCellsNode ) : std::unique_ptr<XMLNode> {};
m_referenceCache = other.m_referenceCache;
}
/**
* @details
*/
XLMergeCells::XLMergeCells(XLMergeCells&& other)
{
m_rootNode = std::move( other.m_rootNode );
m_nodeOrder = std::move( other.m_nodeOrder );
m_mergeCellsNode = std::move( other.m_mergeCellsNode );
m_referenceCache = std::move( other.m_referenceCache );
}
/**
* @details
*/
XLMergeCells& XLMergeCells::operator=(const XLMergeCells& other)
{
m_rootNode = other.m_rootNode ? std::make_unique<XMLNode>( *other.m_rootNode ) : std::unique_ptr<XMLNode> {};
m_nodeOrder = other.m_nodeOrder;
m_mergeCellsNode = other.m_mergeCellsNode ? std::make_unique<XMLNode>( *other.m_mergeCellsNode ) : std::unique_ptr<XMLNode> {};
m_referenceCache = other.m_referenceCache;
return *this;
}
/**
* @details
*/
XLMergeCells& XLMergeCells::operator=(XLMergeCells&& other)
{
m_rootNode = std::move( other.m_rootNode );
m_nodeOrder = std::move( other.m_nodeOrder );
m_mergeCellsNode = std::move( other.m_mergeCellsNode );
m_referenceCache = std::move( other.m_referenceCache );
return *this;
}
/**
* @details
*/
bool XLMergeCells::valid() const { return ( m_rootNode != nullptr && not m_rootNode->empty() ); }
namespace { // anonymous namespace: do not export any symbols from here
/**
* @brief Test if (range) reference overlaps with the cell window defined by topRow, firstCol, bottomRow, lastCol
@ -144,13 +209,13 @@ namespace { // anonymous namespace: do not export any symbols from here
} // anonymous namespace
/**
* @details Look up a merge index by the reference. If the reference does not exist, the returned index is -1.
* @details Look up a merge index by the reference. If the reference does not exist, the returned index is XLMergeNotFound (-1).
*/
int32_t XLMergeCells::findMerge(const std::string& reference) const
XLMergeIndex XLMergeCells::findMerge(const std::string& reference) const
{
const auto iter = std::find_if(m_referenceCache.begin(), m_referenceCache.end(), [&](const std::string& ref) { return reference == ref; });
return iter == m_referenceCache.end() ? -1 : static_cast<int32_t>(std::distance(m_referenceCache.begin(), iter));
return iter == m_referenceCache.end() ? XLMergeNotFound : static_cast<XLMergeIndex>(std::distance(m_referenceCache.begin(), iter));
}
/**
@ -159,17 +224,17 @@ int32_t XLMergeCells::findMerge(const std::string& reference) const
bool XLMergeCells::mergeExists(const std::string& reference) const { return findMerge(reference) >= 0; }
/**
* @details Find the index of the merge of which cellRef is a part. If no such merge exists, the returned index is -1.
* @details Find the index of the merge of which cellRef is a part. If no such merge exists, the returned index is XLMergeNotFound (-1).
*/
int32_t XLMergeCells::findMergeByCell(const std::string& cellRef) const { return findMergeByCell(XLCellReference(cellRef)); }
int32_t XLMergeCells::findMergeByCell(XLCellReference cellRef) const
XLMergeIndex XLMergeCells::findMergeByCell(const std::string& cellRef) const { return findMergeByCell(XLCellReference(cellRef)); }
XLMergeIndex XLMergeCells::findMergeByCell(XLCellReference cellRef) const
{
const auto iter = std::find_if(m_referenceCache.begin(), m_referenceCache.end(),
/**/ [&](const std::string& ref) { // use XLReferenceOverlaps with a "range" that only contains cellRef
/**/ return XLReferenceOverlaps( ref, cellRef.row(), cellRef.column(), cellRef.row(), cellRef.column());
/**/ });
return iter == m_referenceCache.end() ? -1 : static_cast<int32_t>(std::distance(m_referenceCache.begin(), iter));
return iter == m_referenceCache.end() ? XLMergeNotFound : static_cast<XLMergeIndex>(std::distance(m_referenceCache.begin(), iter));
}
/**
@ -180,7 +245,7 @@ size_t XLMergeCells::count() const { return m_referenceCache.size(); }
/**
* @details
*/
const char* XLMergeCells::merge(int32_t index) const
const char* XLMergeCells::merge(XLMergeIndex index) const
{
if (index < 0 || static_cast<uint32_t>(index) >= m_referenceCache.size()) {
using namespace std::literals::string_literals;
@ -194,7 +259,7 @@ const char* XLMergeCells::merge(int32_t index) const
* appended merge is returned
* Before appending a mergeCell entry with reference, check that reference does not overlap with any existing references
*/
int32_t XLMergeCells::appendMerge(const std::string& reference)
XLMergeIndex XLMergeCells::appendMerge(const std::string& reference)
{
using namespace std::literals::string_literals;
@ -222,6 +287,9 @@ int32_t XLMergeCells::appendMerge(const std::string& reference)
}
// if execution gets here: no overlaps
if (m_mergeCellsNode->empty()) // create mergeCells element if needed
m_mergeCellsNode = std::make_unique<XMLNode>(appendAndGetNode(*m_rootNode, "mergeCells", m_nodeOrder));
// append new mergeCell element and set attribute ref
XMLNode insertAfter = m_mergeCellsNode->last_child_of_type(pugi::node_element);
XMLNode newMerge{};
@ -238,24 +306,24 @@ int32_t XLMergeCells::appendMerge(const std::string& reference)
if (attr.empty()) attr = m_mergeCellsNode->append_attribute("count");
attr.set_value(m_referenceCache.size());
return static_cast<int32_t>(referenceCacheSize);
return static_cast<XLMergeIndex>(referenceCacheSize);
}
/**
* @details Delete the merge at the given index
*/
void XLMergeCells::deleteMerge(int32_t index)
void XLMergeCells::deleteMerge(XLMergeIndex index)
{
using namespace std::literals::string_literals;
if (index < 0 || static_cast<uint32_t>(index) >= m_referenceCache.size())
throw XLInputError("XLMergeCells::"s + __func__ + ": index "s + std::to_string(index) + " is out of range"s);
int32_t curIndex = 0;
XLMergeIndex curIndex = 0;
XMLNode node = m_mergeCellsNode->first_child_of_type(pugi::node_element);
while(curIndex < index && not node.empty()) {
node = node.next_sibling_of_type(pugi::node_element);
++curIndex;
++curIndex;
}
if (node.empty())
throw XLInternalError("XLMergeCells::"s + __func__ + ": mismatch between size of mergeCells XML node and internal reference cache"s);
@ -266,10 +334,21 @@ void XLMergeCells::deleteMerge(int32_t index)
m_referenceCache.erase(m_referenceCache.begin() + curIndex);
// ===== Update the array count attribute
XMLAttribute attr = m_mergeCellsNode->attribute("count");
if (attr.empty()) attr = m_mergeCellsNode->append_attribute("count");
attr.set_value(m_referenceCache.size()); // update the array count attribute
if (m_referenceCache.size() > 0) {
// ===== Update the array count attribute
XMLAttribute attr = m_mergeCellsNode->attribute("count");
if (attr.empty()) attr = m_mergeCellsNode->append_attribute("count");
attr.set_value(m_referenceCache.size()); // update the array count attribute
}
else // no merges left
deleteAll(); // delete mergeCells element & re-initialize m_mergeCellsNode to a default-constructed XMLNode()
}
void XLMergeCells::deleteAll()
{
m_referenceCache.clear();
m_rootNode->remove_child(*m_mergeCellsNode);
m_mergeCellsNode = std::make_unique<XMLNode>(XMLNode());
}
/**

View File

@ -1399,10 +1399,8 @@ void XLWorksheet::updateSheetName(const std::string& oldName, const std::string&
*/
XLMergeCells & XLWorksheet::merges()
{
if (m_merges.uninitialized()) {
XMLNode rootNode = xmlDocument().document_element(); // until I learn how to make appendAndGetNode take by reference but not fail on rvalue document_element
m_merges = XLMergeCells(appendAndGetNode(rootNode, "mergeCells", m_nodeOrder));
}
if (!m_merges.valid())
m_merges = XLMergeCells(xmlDocument().document_element(), m_nodeOrder);
return m_merges;
}

View File

@ -7,6 +7,12 @@ Microsoft Excel® files, with the .xlsx format.
As the heading says - the latest "Release" that is shown on https://github.com/troldal/OpenXLSX/releases is from 2021-11-06, and severely outdated - please pull / download the latest SW version directly from the repository in its current state. Link for those that do not want to use ```git```: https://github.com/troldal/OpenXLSX/archive/refs/heads/master.zip
## (aral-matrix) 08 April 2025 - XLMergeCells: remove <mergeCells> element from worksheet XML when merge count is 0 - addresses #351
* ```XLMergeCells``` is now constructed with the worksheet root XML node (unfortunately necessary) and will use this access to create/delete the <mergeCells> node as necessary, this addresses https://github.com/troldal/OpenXLSX/issues/351
* added a function ```XLMergeCells::deleteAll``` to clear all merges in the worksheet
* added a typedef ```XLMergeIndex``` as the parameter / return type for all functions using a merge index - the underlying ```int32_t``` remains unchanged
* added a ```constexpr const XLMergeIndex XLMergeNotFound = -1``` for code readability
## (aral-matrix) 07 April 2025 - Demo10: added a disabled use of borders with merged cells
* enable Demo10 line 450 to experiment - but LibreOffice behaves weird (at least) with (diagonal) borders, so I have disabled this Demo functionality by default