Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/lib/AST/ClangHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
#include "lib/AST/ClangHelpers.hpp"
#include <mrdocs/Support/Assert.hpp>
#include <mrdocs/Support/Report.hpp>
#include <clang/Driver/Driver.h>
#include <clang/Sema/Template.h>
#include <clang/Index/USRGeneration.h>
#include <llvm/Option/ArgList.h>
#include <ranges>

namespace clang::mrdocs {
Expand Down Expand Up @@ -454,4 +456,32 @@ isDocumented(Decl const* D)
return getDocumentation(D) != nullptr;
}

bool
isClangCL(tooling::CompileCommand const& cc)
{
auto const& cmdline = cc.CommandLine;

// ------------------------------------------------------
// Convert to InputArgList
// ------------------------------------------------------
// InputArgList is the input format for llvm functions
auto cmdLineCStrsView = std::views::transform(cmdline, &std::string::c_str);
std::vector const cmdLineCStrs(cmdLineCStrsView.begin(), cmdLineCStrsView.end());
llvm::opt::InputArgList const args(
cmdLineCStrs.data(),
cmdLineCStrs.data() + cmdLineCStrs.size());

// ------------------------------------------------------
// Get driver mode
// ------------------------------------------------------
// The driver mode distinguishes between clang/gcc and msvc
// command line option formats. The value is deduced from
// the `-drive-mode` option or from `progName`.
// Common values are "gcc", "g++", "cpp", "cl" and "flang".
std::string const& progName = cmdline.front();
StringRef const driver_mode = driver::getDriverMode(progName, cmdLineCStrs);

return driver::IsClangCL(driver_mode);
}

} // clang::mrdocs
8 changes: 8 additions & 0 deletions src/lib/AST/ClangHelpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <lib/AST/InstantiatedFromVisitor.hpp>
#include <clang/AST/Expr.h>
#include <clang/Sema/Sema.h>
#include <clang/Tooling/CompilationDatabase.h>
#include <mrdocs/Platform.hpp>
#include <mrdocs/Metadata.hpp>
#include <mrdocs/Support/TypeTraits.hpp>
Expand Down Expand Up @@ -1081,6 +1082,13 @@ namespace detail {
report::trace("{}", MRDOCS_SYMBOL_TRACE_UNIQUE_NAME)
#endif

/** Determine whether the driver mode is ClangCL.

@param cc The compilation command to insepct.
*/
bool
isClangCL(tooling::CompileCommand const& cc);

} // clang::mrdocs

#endif
8 changes: 6 additions & 2 deletions src/lib/CorpusImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ mrdocs::Expected<std::unique_ptr<Corpus>>
CorpusImpl::
build(
std::shared_ptr<ConfigImpl const> const& config,
tooling::CompilationDatabase const& compilations)
MrDocsCompilationDatabase const& compilations)
{
using clock_type = std::chrono::steady_clock;
auto start_time = clock_type::now();
Expand All @@ -735,6 +735,10 @@ build(
// InfoSet in the execution context.
InfoExecutionContext context(*config);

// Identify if we should use "msvc/clang-cl" or "clang/gcc" format
// for options.
bool const is_clang_cl = compilations.isClangCL();

// ------------------------------------------
// "Process file" task
// ------------------------------------------
Expand Down Expand Up @@ -795,7 +799,7 @@ build(
FSConcrete->addVirtualFile(shimPath, shimContent);
Tool.appendArgumentsAdjuster(
tooling::combineAdjusters(
tooling::getInsertArgumentAdjuster("-include"),
tooling::getInsertArgumentAdjuster(is_clang_cl ? "/FI" : "-include"),
tooling::getInsertArgumentAdjuster(shimPath.data())));
}

Expand Down
9 changes: 5 additions & 4 deletions src/lib/CorpusImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,22 @@
#ifndef MRDOCS_LIB_CORPUSIMPL_HPP
#define MRDOCS_LIB_CORPUSIMPL_HPP

#include "lib/AST/ParseRef.hpp"
#include "ConfigImpl.hpp"
#include "lib/AST/ParseRef.hpp"
#include "lib/Metadata/InfoSet.hpp"
#include "lib/MrDocsCompilationDatabase.hpp"
#include "lib/Support/Debug.hpp"
#include <clang/Tooling/CompilationDatabase.h>
#include <mrdocs/ADT/UnorderedStringMap.hpp>
#include <mrdocs/Corpus.hpp>
#include <mrdocs/Metadata.hpp>
#include <mrdocs/Platform.hpp>
#include <mrdocs/Support/Error.hpp>
#include <functional>
#include <map>
#include <mutex>
#include <string>
#include <set>
#include <functional>

namespace clang::mrdocs {

Expand Down Expand Up @@ -161,15 +162,15 @@ class CorpusImpl final : public Corpus
not need to call this function directly.

@param config A shared pointer to the configuration.
@param compilations A compilations database for the input files.
@param compilations A MrDocs compilations database for the input files.
*/
// MRDOCS_DECL
[[nodiscard]]
static
mrdocs::Expected<std::unique_ptr<Corpus>>
build(
std::shared_ptr<ConfigImpl const> const& config,
tooling::CompilationDatabase const& compilations);
MrDocsCompilationDatabase const& compilations);

void
qualifiedName(
Expand Down
46 changes: 23 additions & 23 deletions src/lib/MrDocsCompilationDatabase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "MrDocsCompilationDatabase.hpp"
#include "lib/ConfigImpl.hpp"
#include "lib/AST/ClangHelpers.hpp"
#include "lib/Support/Debug.hpp"
#include "lib/Support/ExecuteAndWaitWithLogging.hpp"
#include "lib/Support/Path.hpp"
Expand Down Expand Up @@ -275,6 +276,7 @@ std::vector<std::string>
adjustCommandLine(
StringRef const workingDir,
std::vector<std::string> const& cmdline,
bool is_clang_cl,
std::shared_ptr<Config const> const& config,
std::unordered_map<std::string, std::vector<std::string>> const& implicitIncludeDirectories,
std::string_view filename)
Expand All @@ -300,24 +302,14 @@ adjustCommandLine(
cmdLineCStrs.data(),
cmdLineCStrs.data() + cmdLineCStrs.size());

// ------------------------------------------------------
// Get driver mode
// ------------------------------------------------------
// The driver mode distinguishes between clang/gcc and msvc
// command line option formats. The value is deduced from
// the `-drive-mode` option or from `progName`.
// Common values are "gcc", "g++", "cpp", "cl" and "flang".
StringRef const driver_mode = driver::getDriverMode(progName, cmdLineCStrs);
// Identify if we should use "msvc/clang-cl" or "clang/gcc" format
// for options.
bool const is_clang_cl = driver::IsClangCL(driver_mode);
auto const systemIncludeFlag = is_clang_cl ? "-external:I" : "-isystem";

// ------------------------------------------------------
// Supress all warnings
// ------------------------------------------------------
// Add flags to ignore all warnings. Any options that
// affect warnings will be discarded later.
new_cmdline.emplace_back(is_clang_cl ? "/w" : "-w");
new_cmdline.emplace_back("-w");
new_cmdline.emplace_back("-fsyntax-only");

// ------------------------------------------------------
Expand Down Expand Up @@ -411,17 +403,18 @@ adjustCommandLine(
isExplicitCCompileCommand || (!isExplicitCppCompileCommand && isImplicitCSourceFile);

constexpr auto is_std_option = [](std::string_view const opt) {
return opt.starts_with("-std=") || opt.starts_with("--std=") || opt.starts_with("/std:");
return opt.starts_with("-std=") || opt.starts_with("--std=") || // clang options
opt.starts_with("-std:") || opt.starts_with("/std:"); // clang-cl options
};
if (std::ranges::find_if(cmdline, is_std_option) == cmdline.end())
if (std::ranges::none_of(cmdline, is_std_option))
{
if (!isCCompileCommand)
{
new_cmdline.emplace_back("-std=c++23");
new_cmdline.emplace_back(is_clang_cl ? "-std:c++latest" : "-std=c++23");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we have a stable equivalent to latest in the non-CL version?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

@alandefreitas alandefreitas Oct 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah... OK. I see. You can calculate it with something like:

#include <clang/Basic/LangStandard.h>
#include <llvm/ADT/StringRef.h>
#include <string>

/// ...

static
std::string 
latest_cxx_std_supported() 
{
  auto supports = [](llvm::StringRef name) {
    const clang::LangStandard* LS = clang::LangStandard::getLangStandardForName(name);
    return LS && LS->isCPlusPlus();
  };

  std::string cur = "c++23";
  std::string last_ok = "c++23";

  auto bump_by_3 = [](std::string& s) {
    // s is "c++NN" with two decimal digits.
    int n = (s[3] - '0') * 10 + (s[4] - '0');
    n += 3;
    s[3] = char('0' + (n / 10) % 10);
    s[4] = char('0' + (n % 10));
  };

  for (;;) {
    if (!supports(cur)) break;
    last_ok = cur;
    bump_by_3(cur);
  }
  return last_ok;
}

It also has the benefit of giving us the same version for CL and non-CL arguments in case the latest happens to have any special meaning.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have complete control over clang, we can adjust this value manually when bumping llvm. If the target commit hash is exposed (-DLLVM_COMMIT_HASH=...) we can even static assert on this, so it doesn't go undetected.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can adjust this value manually when bumping llvm

Yes. We could. As you noticed, the problem is remembering. MrDocs has dozens, if not hundreds, of moving parts reflecting information back and forth. We always forget to update all these parts because no one remembers to do so. That's why things like the XML generator are broken.

we can even static assert on this

Exactly. For each type of information, we need a strategy that either forces us to review it because it's broken or that does it automatically.

Your solution is in the first category, and I do like it. There's no LLVM_COMMIT_HASH in their headers, but I'm guessing you would end up with something like:

#include <llvm/Config/llvm-config.h>

// ...

template <size_t N, size_t M>
consteval
bool
str_equal(const char (&a)[N], const char (&b)[M]) {
    if (N != M) return false;
    for (size_t i = 0; i < N; ++i)
        if (a[i] != b[i]) return false;
    return true;
}

static_assert(str_equal(LLVM_VERSION_STRING, "22.0.0git"),
              "This code was designed with 22.0.0git in mind."
              "Since you're moving to " LLVM_VERSION_STRING ", please: "
              "1) review the code below and fix the c++ version if necessary"
              "2) update the string in this assertion");

or

#include <llvm/Config/llvm-config.h>

// ...

static_assert(std::string_view(LLVM_VERSION_STRING) == "22.0.0git",
              "This code was designed with 22.0.0git in mind."
              "Since you're moving to " LLVM_VERSION_STRING ", please: "
              "1) review the code below and fix the c++ version if necessary"
              "2) update the string in this assertion");

I'm OK with some variant of that. All we have to be sure is it's always working. Not something the user can bypass by mistake.

By default, I usually prefer things in the second category whenever possible, but this hasn't been the case for every problem. For instance, if you look at the code I wrote, you'll see that I use "c++23" as the baseline. The idea is that there's no cost if we forget to update it, but we wouldn't lose anything in terms of correctness if we did. There's probably some smart solution based on some enum or something somewhere.

Anyway, your macro based solution is also fine to me. I'm just explaining the rationale here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#1047 says other options are "simpler". Do you mean more robust, instead? Because if they're even simpler than copy/pasting that thing, why not just do it now? Does @mizvekov know of a good solution to this?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean simpler. I do agree they're more robust too.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If they're even simpler than copy/pasting that thing, why not just do it now?

Copy link
Collaborator Author

@K-ballo K-ballo Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR fixes a fundamental bug in the tool, the suggested change introduces an unrelated change of behavior (in a different part of the tool), conflating them under a single PR is a poor engineering practice.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. No problem.

My intuition was that

            new_cmdline.emplace_back(is_clang_cl ? "-std:c++latest" : "-std=c++23");

makes the std option vary between "latest" and "c++23" for an arbitrary reason. So this looked like an "unrelated change of behavior" introduced by the PR, even though I don't question your "engineering practices".

To which you would probably respond that the previous behavior (rather than the "expected" behavior) wasn't working anyway, so it can't be a change of behavior.

I'll resolve this conversation then and we can avoid this loop. Could you open an issue for that?

}
else
{
new_cmdline.emplace_back("-std=c23");
new_cmdline.emplace_back(is_clang_cl ? "-std:clatest" : "-std=c23");
}
}

Expand All @@ -448,8 +441,9 @@ adjustCommandLine(
it != implicitIncludeDirectories.end()) {
for (auto const& inc : it->second)
{
new_cmdline.emplace_back(std::format("-isystem{}", inc));
}
new_cmdline.emplace_back(systemIncludeFlag);
new_cmdline.emplace_back(inc);
}
}
}

Expand All @@ -463,11 +457,11 @@ adjustCommandLine(
// implicit include paths and add the standard library
// and system includes manually. That gives MrDocs
// access to libc++ in a portable way.
new_cmdline.emplace_back("-nostdinc++");
new_cmdline.emplace_back("-nostdlib++");
new_cmdline.emplace_back(is_clang_cl ? "-X" : "-nostdinc++");
for (auto const& inc : (*config)->stdlibIncludes)
{
new_cmdline.emplace_back(std::format("-isystem{}", inc));
new_cmdline.emplace_back(systemIncludeFlag);
new_cmdline.emplace_back(inc);
}
}

Expand All @@ -476,7 +470,8 @@ adjustCommandLine(
new_cmdline.emplace_back("-nostdinc");
for (auto const& inc : (*config)->libcIncludes)
{
new_cmdline.emplace_back(std::format("-isystem{}", inc));
new_cmdline.emplace_back(systemIncludeFlag);
new_cmdline.emplace_back(inc);
}
}

Expand All @@ -485,7 +480,8 @@ adjustCommandLine(
// ------------------------------------------------------
for (auto const& inc : (*config)->systemIncludes)
{
new_cmdline.emplace_back(std::format("-isystem{}", inc));
new_cmdline.emplace_back(systemIncludeFlag);
new_cmdline.emplace_back(inc);
}
for (auto const& inc : (*config)->includes)
{
Expand Down Expand Up @@ -556,6 +552,9 @@ MrDocsCompilationDatabase(
using tooling::CompileCommand;

std::vector<CompileCommand> allCommands = inner.getAllCompileCommands();
if (allCommands.empty()) return;

isClangCL_ = mrdocs::isClangCL(allCommands.front());
AllCommands_.reserve(allCommands.size());
SmallPathString temp;
for (CompileCommand const& cmd0 : allCommands)
Expand All @@ -567,6 +566,7 @@ MrDocsCompilationDatabase(
cmd.CommandLine = adjustCommandLine(
workingDir,
cmd0.CommandLine,
isClangCL_,
config,
implicitIncludeDirectories,
cmd0.Filename);
Expand Down
8 changes: 8 additions & 0 deletions src/lib/MrDocsCompilationDatabase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class MrDocsCompilationDatabase
{
std::vector<tooling::CompileCommand> AllCommands_;
llvm::StringMap<std::size_t> IndexByFile_;
bool isClangCL_{};

public:
/**
Expand Down Expand Up @@ -80,6 +81,13 @@ class MrDocsCompilationDatabase
*/
std::vector<tooling::CompileCommand>
getAllCompileCommands() const override;

/** Whether the driver mode for the compilation database is ClangCL */
bool
isClangCL() const noexcept
{
return isClangCL_;
}
};

} // mrdocs
Expand Down
37 changes: 9 additions & 28 deletions src/lib/SingleFileDB.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#ifndef MRDOCS_LIB_SINGLEFILEDB_HPP
#define MRDOCS_LIB_SINGLEFILEDB_HPP

#include <mrdocs/Support/Path.hpp>
#include <clang/Tooling/CompilationDatabase.h>
#include <string>
#include <utility>
Expand All @@ -20,55 +19,37 @@
namespace clang {
namespace mrdocs {

/** Compilation database for a single .cpp file.
/** Compilation database for a single file.
*/
class SingleFileDB
: public tooling::CompilationDatabase
{
std::vector<tooling::CompileCommand> cc_;
tooling::CompileCommand cc_;

public:
explicit
SingleFileDB(
llvm::StringRef pathName)
{
auto fileName = files::getFileName(pathName);
auto parentDir = files::getParentDir(pathName);

std::vector<std::string> cmds;
cmds.emplace_back("clang");
cmds.emplace_back("-fsyntax-only");
cmds.emplace_back("-std=c++23");
cmds.emplace_back("-pedantic-errors");
cmds.emplace_back("-Werror");
cmds.emplace_back(fileName);
cc_.emplace_back(
parentDir,
fileName,
std::move(cmds),
parentDir);
cc_.back().Heuristic = "unit test";
}
explicit SingleFileDB(tooling::CompileCommand cc)
: cc_(std::move(cc))
{}

std::vector<tooling::CompileCommand>
getCompileCommands(
llvm::StringRef FilePath) const override
{
if (FilePath != cc_.front().Filename)
if (FilePath != cc_.Filename)
return {};
return { cc_.front() };
return { cc_ };
}

std::vector<std::string>
getAllFiles() const override
{
return { cc_.front().Filename };
return { cc_.Filename };
}

std::vector<tooling::CompileCommand>
getAllCompileCommands() const override
{
return { cc_.front() };
return { cc_ };
}
};

Expand Down
Loading
Loading