Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into fea/plc/io/streams
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 authored Feb 5, 2025
2 parents c4a722e + a5dddb0 commit 46c8ec5
Show file tree
Hide file tree
Showing 31 changed files with 603 additions and 158 deletions.
1 change: 1 addition & 0 deletions .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
jobs:
add_assignees:
runs-on: ubuntu-latest
if: ${{ ! github.event.pull_request.merged }}
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@nvks-runners
with:
enable_check_generated_files: false
ignored_pr_jobs: "telemetry-summarize"
ignored_pr_jobs: "telemetry-summarize spark-rapids-jni"
conda-cpp-build:
needs: checks
secrets: inherit
Expand Down Expand Up @@ -342,6 +342,10 @@ jobs:
node_type: "cpu4"
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
spark-rapids-jni:
needs: changed-files
uses: ./.github/workflows/spark-rapids-jni.yaml
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java

telemetry-summarize:
# This job must use a self-hosted runner to record telemetry traces.
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/spark-rapids-jni.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: spark-rapids-jni

on:
workflow_call:

jobs:
spark-rapids-jni-build:
runs-on: linux-amd64-cpu8
container:
image: rapidsai/ci-spark-rapids-jni:rockylinux8-cuda12.2.0
steps:
- uses: actions/checkout@v4
with:
repository: NVIDIA/spark-rapids-jni
submodules: recursive
- uses: actions/checkout@v4
with:
path: thirdparty/cudf
- name: "Build spark-rapids-jni"
run: |
mkdir target
GPU_ARCHS=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on scl enable gcc-toolset-11 build/buildcpp.sh
3 changes: 0 additions & 3 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ build:
- SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
ignore_run_exports:
# libcudf's run_exports pinning is looser than we would like
- libcudf
ignore_run_exports_from:
- {{ compiler('cuda') }}
{% if cuda_major != "11" %}
Expand Down
4 changes: 1 addition & 3 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ build:
- SCCACHE_S3_KEY_PREFIX=pylibcudf-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
ignore_run_exports:
# libcudf's run_exports pinning is looser than we would like
- libcudf
ignore_run_exports_from:
- {{ compiler('cuda') }}
{% if cuda_major != "11" %}
Expand Down Expand Up @@ -78,6 +75,7 @@ requirements:
- pandas >=2.0,<2.2.4dev0
- numpy >=1.23,<3.0a0
- pyarrow>=14.0.0,<20.0.0a0
- libcudf ={{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
Expand Down
10 changes: 5 additions & 5 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -190,10 +190,10 @@ static void bench_query(nvbench::state& state)
{
srand(5236);

auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
auto const json_path = queries[query];
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
std::string_view const json_path = queries[query];

auto const stream = cudf::get_default_stream();
auto input = build_json_string_column(desired_bytes, num_rows);
Expand Down
6 changes: 3 additions & 3 deletions cpp/benchmarks/string/join_strings.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,8 +41,8 @@ static void bench_join(nvbench::state& state)
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
state.add_global_memory_writes<nvbench::int8_t>(chars_size); // all bytes are written

std::string separator(":");
std::string narep("null");
std::string_view separator(":");
std::string_view narep("null");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::strings::join_strings(input, separator, narep);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/like.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ static void bench_like(nvbench::state& state)
auto input = cudf::strings_column_view(col->view());

// This pattern forces reading the entire target string (when matched expected)
auto pattern = std::string("% 5W4_"); // regex equivalent: ".* 5W4.$"
auto pattern = std::string_view("% 5W4_"); // regex equivalent: ".* 5W4.$"

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
// gather some throughput statistics as well
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/replace_re.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ static void bench_replace(nvbench::state& state)
cudf::strings::replace_with_backrefs(input, *program, replacement);
});
} else {
auto replacement = std::string("77");
auto replacement = std::string_view("77");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::replace_re(input, *program, replacement);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/examples/strings/libcudf_apis.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,7 +53,7 @@ std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,

auto const last_initial_first = cudf::table_view({last_initial->view(), first});

auto result = cudf::strings::concatenate(last_initial_first, std::string(" "));
auto result = cudf::strings::concatenate(last_initial_first, std::string_view(" "));

cudaStreamSynchronize(0);

Expand Down
20 changes: 10 additions & 10 deletions cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -916,7 +916,7 @@ class csv_reader_options_builder {
*/
csv_reader_options_builder& prefix(std::string pfx)
{
options._prefix = pfx;
options._prefix = std::move(pfx);
return *this;
}

Expand Down Expand Up @@ -1450,7 +1450,7 @@ class csv_writer_options {
*
* @return string to used for null entries
*/
[[nodiscard]] std::string get_na_rep() const { return _na_rep; }
[[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }

/**
* @brief Whether to write headers to csv.
Expand All @@ -1471,7 +1471,7 @@ class csv_writer_options {
*
* @return Character used for separating lines
*/
[[nodiscard]] std::string get_line_terminator() const { return _line_terminator; }
[[nodiscard]] std::string const& get_line_terminator() const { return _line_terminator; }

/**
* @brief Returns character used for separating column values.
Expand All @@ -1485,14 +1485,14 @@ class csv_writer_options {
*
* @return string used for values != 0 in INT8 types
*/
[[nodiscard]] std::string get_true_value() const { return _true_value; }
[[nodiscard]] std::string const& get_true_value() const { return _true_value; }

/**
* @brief Returns string used for values == 0 in INT8 types.
*
* @return string used for values == 0 in INT8 types
*/
[[nodiscard]] std::string get_false_value() const { return _false_value; }
[[nodiscard]] std::string const& get_false_value() const { return _false_value; }

/**
* @brief Returns the quote style for the writer.
Expand All @@ -1519,7 +1519,7 @@ class csv_writer_options {
*
* @param val String to represent null value
*/
void set_na_rep(std::string val) { _na_rep = val; }
void set_na_rep(std::string val) { _na_rep = std::move(val); }

/**
* @brief Enables/Disables headers being written to csv.
Expand All @@ -1540,7 +1540,7 @@ class csv_writer_options {
*
* @param term Character to represent line termination
*/
void set_line_terminator(std::string term) { _line_terminator = term; }
void set_line_terminator(std::string term) { _line_terminator = std::move(term); }

/**
* @brief Sets character used for separating column values.
Expand All @@ -1554,14 +1554,14 @@ class csv_writer_options {
*
* @param val String to represent values != 0 in INT8 types
*/
void set_true_value(std::string val) { _true_value = val; }
void set_true_value(std::string val) { _true_value = std::move(val); }

/**
* @brief Sets string used for values == 0 in INT8 types.
*
* @param val String to represent values == 0 in INT8 types
*/
void set_false_value(std::string val) { _false_value = val; }
void set_false_value(std::string val) { _false_value = std::move(val); }

/**
* @brief (Re)sets the table being written.
Expand Down
9 changes: 5 additions & 4 deletions cpp/include/cudf/io/text/detail/trie.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,7 @@
#include <algorithm>
#include <queue>
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>

Expand Down Expand Up @@ -128,7 +129,7 @@ struct trie {
/**
* @brief Insert the string in to the trie tree, growing the trie as necessary
*/
void insert(std::string s) { insert(s.c_str(), s.size(), 0); }
void insert(std::string_view s) { insert(s.data(), s.size(), 0); }

private:
trie_builder_node& insert(char const* s, uint16_t size, uint8_t depth)
Expand Down Expand Up @@ -164,12 +165,12 @@ struct trie {
* @param mr Memory resource to use for the device memory allocation
* @return The trie.
*/
static trie create(std::string const& pattern,
static trie create(std::string pattern,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)

{
return create(std::vector<std::string>{pattern}, stream, mr);
return create(std::vector<std::string>{std::move(pattern)}, stream, mr);
}

/**
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/io/text/multibyte_split.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,7 @@

#include <memory>
#include <optional>
#include <string_view>

namespace CUDF_EXPORT cudf {
namespace io {
Expand Down Expand Up @@ -90,7 +91,7 @@ struct parse_options {
*/
std::unique_ptr<cudf::column> multibyte_split(
data_chunk_source const& source,
std::string const& delimiter,
std::string_view delimiter,
parse_options options = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
Expand Down
26 changes: 14 additions & 12 deletions cpp/include/cudf/io/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ struct source_info {
*
* @param file_paths Input files paths
*/
explicit source_info(std::vector<std::string> const& file_paths)
: _type(io_type::FILEPATH), _filepaths(file_paths)
explicit source_info(std::vector<std::string> file_paths)
: _type(io_type::FILEPATH), _filepaths(std::move(file_paths))
{
}

Expand All @@ -363,8 +363,8 @@ struct source_info {
*
* @param file_path Single input file
*/
explicit source_info(std::string const& file_path)
: _type(io_type::FILEPATH), _filepaths({file_path})
explicit source_info(std::string file_path)
: _type(io_type::FILEPATH), _filepaths({std::move(file_path)})
{
}

Expand Down Expand Up @@ -534,8 +534,8 @@ struct sink_info {
*
* @param file_paths Output files paths
*/
explicit sink_info(std::vector<std::string> const& file_paths)
: _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(file_paths)
explicit sink_info(std::vector<std::string> file_paths)
: _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(std::move(file_paths))
{
}

Expand All @@ -544,8 +544,8 @@ struct sink_info {
*
* @param file_path Single output file path
*/
explicit sink_info(std::string const& file_path)
: _type(io_type::FILEPATH), _filepaths({file_path})
explicit sink_info(std::string file_path)
: _type(io_type::FILEPATH), _filepaths({std::move(file_path)})
{
}

Expand All @@ -554,8 +554,8 @@ struct sink_info {
*
* @param buffers Output host buffers
*/
explicit sink_info(std::vector<std::vector<char>*> const& buffers)
: _type(io_type::HOST_BUFFER), _num_sinks(buffers.size()), _buffers(buffers)
explicit sink_info(std::vector<std::vector<char>*> buffers)
: _type(io_type::HOST_BUFFER), _num_sinks(buffers.size()), _buffers(std::move(buffers))
{
}
/**
Expand All @@ -571,7 +571,9 @@ struct sink_info {
* @param user_sinks Output user-implemented sinks
*/
explicit sink_info(std::vector<cudf::io::data_sink*> const& user_sinks)
: _type(io_type::USER_IMPLEMENTED), _num_sinks(user_sinks.size()), _user_sinks(user_sinks)
: _type(io_type::USER_IMPLEMENTED),
_num_sinks(user_sinks.size()),
_user_sinks(std::move(user_sinks))
{
}

Expand Down Expand Up @@ -821,7 +823,7 @@ class column_in_metadata {
*
* @return The name of this column
*/
[[nodiscard]] std::string get_name() const noexcept { return _name; }
[[nodiscard]] std::string const& get_name() const noexcept { return _name; }

/**
* @brief Get whether nullability has been explicitly set for this column.
Expand Down
Loading

0 comments on commit 46c8ec5

Please sign in to comment.