Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions Framework/Core/include/Framework/ASoA.h
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,11 @@ struct PreslicePolicySorted : public PreslicePolicyBase {

SliceInfoPtr sliceInfo;
std::shared_ptr<arrow::Table> getSliceFor(int value, std::shared_ptr<arrow::Table> const& input, uint64_t& offset) const;
// One-slot cache for the empty (0-row) slice, so that empty groups do not
// slice every column only to produce 0 rows (the common case for sparse
// grouping, e.g. candidates per collision). Keyed by the input table, which
// changes with every dataframe.
mutable std::pair<arrow::Table const*, std::shared_ptr<arrow::Table>> emptySlice{nullptr, nullptr};
};

struct PreslicePolicyGeneral : public PreslicePolicyBase {
Expand Down Expand Up @@ -1731,7 +1736,10 @@ auto doSliceByCached(T const* table, framework::expressions::BindingNode const&
auto localCache = cache.ptr->getCacheFor({"", originReplacement(cache.ptr->newOrigin)(o2::soa::getMatcherFromTypeForKey<T>(node.name)),
node.name});
auto [offset, count] = localCache.getSliceFor(value);
auto t = typename T::self_t({table->asArrowTable()->Slice(static_cast<uint64_t>(offset), count)}, static_cast<uint64_t>(offset));
// Empty group: reuse a cached empty (0-row) table instead of slicing every column.
auto slice = count == 0 ? cache.ptr->getEmptySliceFor(table->asArrowTable())
: table->asArrowTable()->Slice(static_cast<uint64_t>(offset), count);
auto t = typename T::self_t({slice}, static_cast<uint64_t>(offset));
if (t.tableSize() != 0) {
table->copyIndexBindings(t);
}
Expand All @@ -1744,7 +1752,9 @@ auto doFilteredSliceByCached(T const* table, framework::expressions::BindingNode
auto localCache = cache.ptr->getCacheFor({"", originReplacement(cache.ptr->newOrigin)(o2::soa::getMatcherFromTypeForKey<T>(node.name)),
node.name});
auto [offset, count] = localCache.getSliceFor(value);
auto slice = table->asArrowTable()->Slice(static_cast<uint64_t>(offset), count);
// Empty group: reuse a cached empty (0-row) table instead of slicing every column.
auto slice = count == 0 ? cache.ptr->getEmptySliceFor(table->asArrowTable())
: table->asArrowTable()->Slice(static_cast<uint64_t>(offset), count);
return prepareFilteredSlice(table, slice, offset);
}

Expand Down
6 changes: 6 additions & 0 deletions Framework/Core/include/Framework/ArrowTableSlicingCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ struct ArrowTableSlicingCache {
SliceInfoPtr getCacheForPos(int pos) const;
SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const;

// get a cached empty (0-row) slice of the given table, so that empty groups
// do not slice every column only to produce 0 rows (the common case for
// sparse grouping). One-slot cache keyed by the table pointer.
std::shared_ptr<arrow::Table> getEmptySliceFor(std::shared_ptr<arrow::Table> const& table);
std::pair<arrow::Table const*, std::shared_ptr<arrow::Table>> emptySlice{nullptr, nullptr};

static void validateOrder(Entry const& bindingKey, std::shared_ptr<arrow::Table> const& input);
};
} // namespace o2::framework
Expand Down
13 changes: 11 additions & 2 deletions Framework/Core/include/Framework/GroupSlicer.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,16 @@ struct GroupSlicer {
auto oc = sliceInfos[index].getSliceFor(pos);
uint64_t offset = oc.first;
auto count = oc.second;
auto groupedElementsTable = originalTable.asArrowTable()->Slice(offset, count);
if (count == 0) {
return std::decay_t<A1>{{groupedElementsTable}, soa::SelectionVector{}};
// Empty group: avoid slicing every column only to discard it. Cache one
// empty (0-row) table per associated table and reuse it. This is the
// common case for sparse grouping (e.g. collisions with no candidates).
if (!emptyTables[index]) {
emptyTables[index] = originalTable.asArrowTable()->Slice(0, 0);
}
return std::decay_t<A1>{{emptyTables[index]}, soa::SelectionVector{}};
}
auto groupedElementsTable = originalTable.asArrowTable()->Slice(offset, count);

// for each grouping element we need to slice the selection vector
auto start_iterator = std::lower_bound(starts[index], selections[index]->end(), offset);
Expand Down Expand Up @@ -275,6 +281,9 @@ struct GroupSlicer {
std::span<int64_t const> groupSelection;
std::array<std::span<int64_t const> const*, sizeof...(A)> selections;
std::array<std::span<int64_t const>::iterator, sizeof...(A)> starts;
// Cached empty (0-row) table per associated table, lazily built and reused
// for empty groups so we do not slice every column on each empty group.
std::array<std::shared_ptr<arrow::Table>, sizeof...(A)> emptyTables{};

std::array<SliceInfoPtr, sizeof...(A)> sliceInfos;
std::array<SliceInfoUnsortedPtr, sizeof...(A)> sliceInfosUnsorted;
Expand Down
11 changes: 9 additions & 2 deletions Framework/Core/src/ASoA.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,16 @@ void PreslicePolicyGeneral::updateSliceInfo(SliceInfoUnsortedPtr&& si)
std::shared_ptr<arrow::Table> PreslicePolicySorted::getSliceFor(int value, std::shared_ptr<arrow::Table> const& input, uint64_t& offset) const
{
auto [offset_, count] = this->sliceInfo.getSliceFor(value);
auto output = input->Slice(offset_, count);
offset = static_cast<int64_t>(offset_);
return output;
if (count == 0) {
// Empty group: avoid slicing every column only to discard it. Cache one
// empty (0-row) table per input table and reuse it (see GroupSlicer).
if (emptySlice.first != input.get()) {
emptySlice = {input.get(), input->Slice(0, 0)};
}
return emptySlice.second;
}
return input->Slice(offset_, count);
}

std::span<const int64_t> PreslicePolicyGeneral::getSliceFor(int value) const
Expand Down
8 changes: 8 additions & 0 deletions Framework/Core/src/ArrowTableSlicingCache.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,14 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con
};
}

std::shared_ptr<arrow::Table> ArrowTableSlicingCache::getEmptySliceFor(std::shared_ptr<arrow::Table> const& table)
{
if (emptySlice.first != table.get()) {
emptySlice = {table.get(), table->Slice(0, 0)};
}
return emptySlice.second;
}

void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr<arrow::Table>& input)
{
auto const& [target, matcher, key, enabled] = bindingKey;
Expand Down