Skip to content

store rows in small vectors #525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions metagraph/src/annotation/annotation_converters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ void convert_to_row_diff<RowDiffRowSparseAnnotator>(

std::unique_ptr<MultiBRWTAnnotator>
convert_to_BRWT(
const std::vector<std::vector<uint64_t>> &linkage,
const std::vector<std::vector<BRWT::Column>> &linkage,
size_t num_parallel_nodes,
size_t num_threads,
const fs::path &tmp_path,
Expand Down Expand Up @@ -481,7 +481,7 @@ convert_to_BRWT(
template <>
std::unique_ptr<MultiBRWTAnnotator> convert_to_BRWT<MultiBRWTAnnotator>(
const std::vector<std::string> &annotation_files,
const std::vector<std::vector<uint64_t>> &linkage,
const std::vector<std::vector<BRWT::Column>> &linkage,
size_t num_parallel_nodes,
size_t num_threads,
const fs::path &tmp_path) {
Expand Down Expand Up @@ -512,7 +512,7 @@ std::unique_ptr<MultiBRWTAnnotator> convert_to_BRWT<MultiBRWTAnnotator>(
template<>
std::unique_ptr<RowDiffBRWTAnnotator>
convert_to_BRWT<RowDiffBRWTAnnotator>(const std::vector<std::string> &annotation_files,
const std::vector<std::vector<uint64_t>> &linkage,
const std::vector<std::vector<BRWT::Column>> &linkage,
size_t num_parallel_nodes,
size_t num_threads,
const fs::path &tmp_path) {
Expand Down
2 changes: 1 addition & 1 deletion metagraph/src/annotation/annotation_converters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ convert_to_greedy_BRWT(RowDiffColumnAnnotator &&annotation,
template <class StaticAnnotation>
std::unique_ptr<StaticAnnotation>
convert_to_BRWT(const std::vector<std::string> &annotation_files,
const std::vector<std::vector<uint64_t>> &linkage_matrix,
const std::vector<std::vector<matrix::BRWT::Column>> &linkage_matrix,
size_t num_parallel_nodes = 1,
size_t num_threads = 1,
const std::filesystem::path &tmp_dir = "");
Expand Down
4 changes: 2 additions & 2 deletions metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ namespace matrix {
class BinaryMatrix {
public:
typedef uint64_t Row;
typedef uint64_t Column;
typedef uint32_t Column;

typedef Vector<Column> SetBitPositions;
typedef SmallVector<Column> SetBitPositions;
typedef std::function<void(const SetBitPositions &)> RowCallback;
typedef std::function<void(Row, Column)> ValueCallback;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ ColumnMajor::sum_rows(const std::vector<std::pair<Row, size_t>> &index_counts,
if (total_sum_count < min_count)
return {};

std::vector<std::pair<uint64_t, size_t>> result;
std::vector<std::pair<Column, size_t>> result;
result.reserve(num_columns());

for (size_t j = 0; j < num_columns(); ++j) {
Expand Down
4 changes: 2 additions & 2 deletions metagraph/src/annotation/binary_matrix/multi_brwt/brwt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ void BRWT::slice_rows(Row begin, Row end, Vector<Column> *slice) const {
slice_rows(utils::arange<Row>(begin, end - begin), slice);
}

void BRWT::call_rows(const std::function<void(const Vector<Column> &)> &callback,
void BRWT::call_rows(const std::function<void(const SetBitPositions &)> &callback,
bool show_progress) const {
Vector<Column> slice;
ProgressBar progress_bar(num_rows(), "Queried BRWT rows", std::cerr, !show_progress);
Expand All @@ -81,7 +81,7 @@ void BRWT::call_rows(const std::function<void(const Vector<Column> &)> &callback

#pragma omp ordered
{
Vector<Column> row;
SetBitPositions row;
for (auto row_begin = slice.begin(); row_begin < slice.end(); ) {
// every row in `slice` ends with `-1`
auto row_end = std::find(row_begin, slice.end(),
Expand Down
2 changes: 1 addition & 1 deletion metagraph/src/annotation/binary_matrix/multi_brwt/brwt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class BRWT : public BinaryMatrix, public GetEntrySupport {
// query row and get ranks of each set bit in its column
std::vector<Vector<std::pair<Column, uint64_t>>>
get_column_ranks(const std::vector<Row> &rows) const;
void call_rows(const std::function<void(const Vector<Column> &)> &callback,
void call_rows(const std::function<void(const SetBitPositions &)> &callback,
bool show_progress = common::get_verbose()) const;

bool load(std::istream &in) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ BRWT BRWTBottomUpBuilder::concatenate(std::vector<BRWT>&& submatrices,
uint64_t num_columns = 0;
Partition partition;
for (const BRWT &submatrix : submatrices) {
partition.push_back(utils::arange(num_columns, submatrix.num_columns()));
partition.push_back(utils::arange<RangePartition::T>(num_columns, submatrix.num_columns()));
num_columns += submatrix.num_columns();
}
parent.assignments_ = RangePartition(std::move(partition));
Expand Down Expand Up @@ -130,7 +130,7 @@ BRWT BRWTBottomUpBuilder::concatenate_sparse(std::vector<BRWT>&& submatrices,
uint64_t num_columns = 0;
Partition partition;
for (const BRWT &submatrix : submatrices) {
partition.push_back(utils::arange(num_columns, submatrix.num_columns()));
partition.push_back(utils::arange<RangePartition::T>(num_columns, submatrix.num_columns()));
num_columns += submatrix.num_columns();
}
parent.assignments_ = RangePartition(std::move(partition));
Expand Down Expand Up @@ -162,7 +162,7 @@ BRWT BRWTBottomUpBuilder::concatenate_sparse(std::vector<BRWT>&& submatrices,

template <typename T>
std::vector<T> subset(std::vector<T> *vector,
const std::vector<uint64_t> indexes) {
const std::vector<RangePartition::T> &indexes) {
assert(vector);

std::vector<T> result;
Expand Down Expand Up @@ -195,7 +195,7 @@ BRWT BRWTBottomUpBuilder::build(std::vector<std::unique_ptr<bit_vector>>&& colum
// linkage[c] = {} for each c < num_columns
BRWT BRWTBottomUpBuilder::build(
const std::function<void(const CallColumn &)> &get_columns,
const std::vector<std::vector<uint64_t>> &linkage,
const std::vector<std::vector<BRWT::Column>> &linkage,
const std::filesystem::path &tmp_path,
size_t num_nodes_parallel,
size_t num_threads) {
Expand Down Expand Up @@ -322,7 +322,7 @@ BRWT BRWTBottomUpBuilder::build(

ThreadPool thread_pool(num_threads, 100'000 * num_threads);

std::vector<std::vector<uint64_t>> stored_columns(linkage.size());
std::vector<std::vector<RangePartition::T>> stored_columns(linkage.size());

#pragma omp parallel for num_threads(num_nodes_parallel) schedule(dynamic)
for (size_t i = num_leaves; i < linkage.size(); ++i) {
Expand Down Expand Up @@ -454,7 +454,7 @@ BRWT BRWTBottomUpBuilder::merge(std::vector<BRWT>&& nodes,
uint64_t num_columns_total = 0;
Partition current_partition;
for (const BRWT &node : nodes) {
current_partition.push_back(utils::arange(num_columns_total, node.num_columns()));
current_partition.push_back(utils::arange<RangePartition::T>(num_columns_total, node.num_columns()));
num_columns_total += node.num_columns();
}

Expand Down Expand Up @@ -572,7 +572,7 @@ void BRWTOptimizer::reassign(size_t node_rank, BRWT *parent, size_t num_threads)

BRWT &node = dynamic_cast<BRWT&>(*parent->child_nodes_.at(node_rank));

std::vector<uint64_t> column_arrangement;
std::vector<RangePartition::T> column_arrangement;
std::vector<size_t> group_sizes;
for (size_t g = 0; g < parent->assignments_.num_groups(); ++g) {
if (g == node_rank)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace matrix {
class BRWTBottomUpBuilder {
public:
typedef std::vector<const bit_vector *> VectorPtrs;
typedef std::vector<std::vector<BRWT::Column>> Partition;
typedef std::vector<std::vector<RangePartition::T>> Partition;
typedef std::function<Partition(const VectorPtrs &)> Partitioner;

static Partitioner get_basic_partitioner(size_t arity = 2);
Expand All @@ -33,7 +33,7 @@ class BRWTBottomUpBuilder {
= std::function<void(uint64_t, std::unique_ptr<bit_vector>&&)>;

static BRWT build(const std::function<void(const CallColumn &)> &get_columns,
const std::vector<std::vector<uint64_t>> &linkage,
const std::vector<std::vector<BRWT::Column>> &linkage,
const std::filesystem::path &tmp_dir,
size_t num_nodes_parallel = 1,
size_t num_threads = 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace matrix {

using mtg::common::logger;

typedef std::vector<std::vector<uint64_t>> Partition;
typedef std::vector<std::vector<RangePartition::T>> Partition;
typedef std::vector<const bit_vector *> VectorPtrs;


Expand Down Expand Up @@ -258,7 +258,7 @@ Partition greedy_matching(const std::vector<T> &columns, size_t num_threads) {
++progress_bar;
}

for (size_t i = 0; i < columns.size(); ++i) {
for (RangePartition::T i = 0; i < columns.size(); ++i) {
if (!matched[i])
partition.push_back({ i });
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <Eigen/Dense>

#include "common/vectors/bit_vector.hpp"
#include "common/range_partition.hpp"


namespace mtg {
Expand All @@ -25,7 +26,7 @@ struct SparseColumn {
// `SparseColumn` storing the column size and the positions of its set bits.
// Output: a set of greedily matched column pairs.
template <class T>
std::vector<std::vector<uint64_t>>
std::vector<std::vector<RangePartition::T>>
greedy_matching(const std::vector<T> &columns, size_t num_threads = 1);

// Format resembling the Z matrix from scipy.cluster.hierarchy.linkage
Expand Down
6 changes: 3 additions & 3 deletions metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class RowDiff : public IRowDiff, public BinaryMatrix {
BaseMatrix& diffs() { return diffs_; }

private:
static void add_diff(const Vector<uint64_t> &diff, Vector<uint64_t> *row);
static void add_diff(const SetBitPositions &diff, SetBitPositions *row);

BaseMatrix diffs_;
};
Expand Down Expand Up @@ -210,14 +210,14 @@ void RowDiff<BaseMatrix>::serialize(std::ostream &f) const {
}

template <class BaseMatrix>
void RowDiff<BaseMatrix>::add_diff(const Vector<uint64_t> &diff, Vector<uint64_t> *row) {
void RowDiff<BaseMatrix>::add_diff(const SetBitPositions &diff, SetBitPositions *row) {
assert(std::is_sorted(row->begin(), row->end()));
assert(std::is_sorted(diff.begin(), diff.end()));

if (diff.empty())
return;

Vector<uint64_t> result;
SetBitPositions result;
result.reserve(row->size() + diff.size());
std::set_symmetric_difference(row->begin(), row->end(),
diff.begin(), diff.end(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ RowCompressed<Label>::RowCompressed(Vector<RowType>&& annotation_rows,
}
}

template RowCompressed<std::string>::RowCompressed(Vector<SmallVector<uint32_t>>&&, const std::vector<std::string> &);
template RowCompressed<std::string>::RowCompressed(Vector<Vector<uint64_t>>&&, const std::vector<std::string> &);
template RowCompressed<std::string>::RowCompressed(Vector<BinaryMatrix::SetBitPositions>&&, const std::vector<std::string> &);

template <typename Label>
void RowCompressed<Label>::reinitialize(uint64_t num_rows) {
Expand Down Expand Up @@ -286,7 +285,7 @@ RowType* StreamRows<RowType>::next_row() {

while (i_ < inbuf_.size()) {
auto value = inbuf_[i_++];
if (value - 1 > std::numeric_limits<typename RowType::value_type>::max())
if (value > std::numeric_limits<typename RowType::value_type>::max())
throw std::ifstream::failure("Integer overflow: trying to read too"
" large column index: " + std::to_string(value - 1));
if (value) {
Expand Down
8 changes: 3 additions & 5 deletions metagraph/src/cli/transform_annotation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,10 @@ matrix::LinkageMatrix compute_linkage(const std::vector<std::string> &files,
}
}

std::vector<std::vector<uint64_t>>
parse_linkage_matrix(const std::string &filename) {
auto parse_linkage_matrix(const std::string &filename){
std::ifstream in(filename);

std::vector<std::vector<uint64_t>> linkage;
std::vector<std::vector<matrix::BinaryMatrix::Column>> linkage;
std::string line;
while (std::getline(in, line)) {
std::vector<std::string> parts = utils::split_string(line, " ");
Expand Down Expand Up @@ -858,8 +857,7 @@ int transform_annotation(Config *config) {
logger->trace("Generated new linkage and saved to {}",
config->linkage_file);
}
std::vector<std::vector<uint64_t>> linkage
= parse_linkage_matrix(config->linkage_file);
auto linkage = parse_linkage_matrix(config->linkage_file);
logger->trace("Linkage loaded from {}", config->linkage_file);

auto brwt_annotator = convert_to_BRWT<RowDiffBRWTAnnotator>(
Expand Down
25 changes: 11 additions & 14 deletions metagraph/src/common/range_partition.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#include "range_partition.hpp"

#include <algorithm>
#include <cassert>

#include "common/serialization.hpp"


RangePartition::RangePartition(const std::vector<uint64_t> &arrangement,
RangePartition::RangePartition(const std::vector<T> &arrangement,
const std::vector<size_t> &group_sizes) {
size_t offset = 0;
for (size_t group_size : group_sizes) {
Expand All @@ -15,21 +16,17 @@ RangePartition::RangePartition(const std::vector<uint64_t> &arrangement,
offset += group_size;
}

assert(initialize_groups_and_ranks());
initialize_groups_and_ranks();
if (!initialize_groups_and_ranks())
throw std::runtime_error("Invalid partition");
}

RangePartition::RangePartition(std::vector<std::vector<uint64_t>>&& partition) {
partition_.reserve(partition.size());
for (auto &group : partition) {
assert(group.size() && "partition blocks must not be empty");
partition_.emplace_back(group.begin(), group.end());
group.clear();
}
partition.clear();

assert(initialize_groups_and_ranks());
initialize_groups_and_ranks();
RangePartition::RangePartition(std::vector<std::vector<T>>&& partition)
: partition_(std::move(partition)) {
assert(std::all_of(partition_.begin(), partition_.end(),
[](const auto &group) { return !group.empty(); })
&& "partition blocks must not be empty");
if (!initialize_groups_and_ranks())
throw std::runtime_error("Invalid partition");
}

bool RangePartition::initialize_groups_and_ranks() {
Expand Down
4 changes: 2 additions & 2 deletions metagraph/src/common/range_partition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ class RangePartition {
typedef uint32_t R;

RangePartition() {}
RangePartition(const std::vector<uint64_t> &arrangement,
RangePartition(const std::vector<T> &arrangement,
const std::vector<size_t> &group_sizes);
explicit RangePartition(std::vector<std::vector<uint64_t>>&& partition);
explicit RangePartition(std::vector<std::vector<T>>&& partition);

// get group that contains value
inline G group(T value) const;
Expand Down
2 changes: 1 addition & 1 deletion metagraph/src/graph/alignment/aligner_labeled.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ typedef AnnotationBuffer::Columns Columns;
typedef DeBruijnGraph::node_index node_index;

// dummy index for an unfetched annotations
static constexpr size_t nannot = std::numeric_limits<size_t>::max();
static constexpr Column nannot = std::numeric_limits<Column>::max();

template <class T1, class T2>
bool overlap_with_diff(const T1 &tuple1, const T2 &tuple2, int64_t diff) {
Expand Down
2 changes: 1 addition & 1 deletion metagraph/src/graph/alignment/annotation_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ void AnnotationBuffer::fetch_queued_annotations() {
} else {
for (auto&& labels : annotator_.get_matrix().get_rows(queued_rows)) {
std::sort(labels.begin(), labels.end());
push_node_labels(node_it++, row_it++, std::move(labels));
push_node_labels(node_it++, row_it++, Columns(labels.begin(), labels.end()));
}
}

Expand Down
2 changes: 1 addition & 1 deletion metagraph/src/graph/annotated_graph_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ construct_diff_label_count_vector(const AnnotatedDBG &anno_graph,
code_to_indicator[label_encoder.encode(label_out)] |= 2;
}

std::vector<uint64_t> label_codes;
std::vector<annot::matrix::BinaryMatrix::Column> label_codes;
label_codes.reserve(code_to_indicator.size());
for (const auto &[code, indicator] : code_to_indicator) {
label_codes.push_back(code);
Expand Down
8 changes: 5 additions & 3 deletions metagraph/tests/annotation/test_annotation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ TYPED_TEST(AnnotatorPresetTest, GetLabels) {
}

TYPED_TEST(AnnotatorPresetTest, CountLabels) {
using Column = mtg::annot::matrix::BinaryMatrix::Column;

EXPECT_EQ(
convert_to_set(std::vector<std::pair<uint64_t, size_t>>({
convert_to_set(std::vector<std::pair<Column, size_t>>({
{0, 1}, {3, 2}, {1, 4}, {2, 2}
})),
convert_to_set(this->annotation->get_matrix().sum_rows(
Expand All @@ -57,7 +59,7 @@ TYPED_TEST(AnnotatorPresetTest, CountLabels) {
);

EXPECT_EQ(
convert_to_set(std::vector<std::pair<uint64_t, size_t>>({
convert_to_set(std::vector<std::pair<Column, size_t>>({
{0, 1}, {3, 2}, {1, 4}, {2, 2}
})),
convert_to_set(this->annotation->get_matrix().sum_rows(
Expand All @@ -68,7 +70,7 @@ TYPED_TEST(AnnotatorPresetTest, CountLabels) {
);

EXPECT_EQ(
convert_to_set(std::vector<std::pair<uint64_t, size_t>>({
convert_to_set(std::vector<std::pair<Column, size_t>>({
{3, 2}, {1, 4}, {2, 2}
})),
convert_to_set(this->annotation->get_matrix().sum_rows(
Expand Down
2 changes: 1 addition & 1 deletion metagraph/tests/annotation/test_matrix_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ void test_matrix(const TypeParam &matrix, const BitVectorPtrArray &columns) {
for (size_t m : { size_t(0),
size_t(matrix.num_columns() / 2),
size_t(matrix.num_columns()) }) {
std::vector<uint64_t> indices(m);
std::vector<BinaryMatrix::Column> indices(m);
std::iota(indices.begin(), indices.end(), 0);

std::vector<std::vector<BinaryMatrix::Row>> column_map(m);
Expand Down
Loading