Skip to content

Implement RFC 3503: frontmatters #140035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_ast_passes/src/feature_gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) {
gate_all!(contracts_internals, "contract internal machinery is for internal use only");
gate_all!(where_clause_attrs, "attributes in `where` clause are unstable");
gate_all!(super_let, "`super let` is experimental");
gate_all!(frontmatter, "frontmatters are experimental");

if !visitor.features.never_patterns() {
if let Some(spans) = spans.get(&sym::never_patterns) {
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_feature/src/unstable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ declare_features! (
(incomplete, fn_delegation, "1.76.0", Some(118212)),
/// Allows impls for the Freeze trait.
(internal, freeze_impls, "1.78.0", Some(121675)),
/// Frontmatter `---` blocks for use by external tools.
(unstable, frontmatter, "CURRENT_RUSTC_VERSION", Some(136889)),
/// Allows defining gen blocks and `gen fn`.
(unstable, gen_blocks, "1.75.0", Some(117078)),
/// Infer generic args for both consts and types.
Expand Down
14 changes: 13 additions & 1 deletion compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
use std::str::Chars;

pub enum FrontmatterAllowed {
Yes,
No,
}

/// Peekable iterator over a char sequence.
///
/// Next characters can be peeked via `first` method,
Expand All @@ -8,17 +13,19 @@ pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
chars: Chars<'a>,
pub(crate) frontmatter_allowed: FrontmatterAllowed,
#[cfg(debug_assertions)]
prev: char,
}

pub(crate) const EOF_CHAR: char = '\0';

impl<'a> Cursor<'a> {
pub fn new(input: &'a str) -> Cursor<'a> {
pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
Cursor {
len_remaining: input.len(),
chars: input.chars(),
frontmatter_allowed,
#[cfg(debug_assertions)]
prev: EOF_CHAR,
}
Expand Down Expand Up @@ -95,6 +102,11 @@ impl<'a> Cursor<'a> {
Some(c)
}

/// Moves to a substring by a number of bytes.
pub(crate) fn bump_bytes(&mut self, n: usize) {
self.chars = self.as_str()[n..].chars();
}

/// Eats symbols while predicate returns true or until the end of file is reached.
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
Expand Down
155 changes: 148 additions & 7 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;

use self::LiteralKind::*;
use self::TokenKind::*;
pub use crate::cursor::Cursor;
use crate::cursor::EOF_CHAR;
pub use crate::cursor::{Cursor, FrontmatterAllowed};

/// Parsed token.
/// It doesn't contain information about data that has been parsed,
Expand All @@ -57,17 +57,27 @@ impl Token {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TokenKind {
/// A line comment, e.g. `// comment`.
LineComment { doc_style: Option<DocStyle> },
LineComment {
doc_style: Option<DocStyle>,
},

/// A block comment, e.g. `/* block comment */`.
///
/// Block comments can be recursive, so a sequence like `/* /* */`
/// will not be considered terminated and will result in a parsing error.
BlockComment { doc_style: Option<DocStyle>, terminated: bool },
BlockComment {
doc_style: Option<DocStyle>,
terminated: bool,
},

/// Any whitespace character sequence.
Whitespace,

Frontmatter {
has_invalid_preceding_whitespace: bool,
invalid_infostring: bool,
},

/// An identifier or keyword, e.g. `ident` or `continue`.
Ident,

Expand Down Expand Up @@ -109,10 +119,15 @@ pub enum TokenKind {
/// this type will need to check for and reject that case.
///
/// See [LiteralKind] for more details.
Literal { kind: LiteralKind, suffix_start: u32 },
Literal {
kind: LiteralKind,
suffix_start: u32,
},

/// A lifetime, e.g. `'a`.
Lifetime { starts_with_number: bool },
Lifetime {
starts_with_number: bool,
},

/// `;`
Semi,
Expand Down Expand Up @@ -280,7 +295,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
#[inline]
pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
debug_assert!(!input.is_empty());
let mut cursor = Cursor::new(input);
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
// Move past the leading `r` or `br`.
for _ in 0..prefix_len {
cursor.bump().unwrap();
Expand All @@ -290,7 +305,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>

/// Creates an iterator that produces tokens from the input string.
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
let mut cursor = Cursor::new(input);
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
std::iter::from_fn(move || {
let token = cursor.advance_token();
if token.kind != TokenKind::Eof { Some(token) } else { None }
Expand Down Expand Up @@ -361,7 +376,34 @@ impl Cursor<'_> {
Some(c) => c,
None => return Token::new(TokenKind::Eof, 0),
};

let token_kind = match first_char {
c if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
&& is_whitespace(c) =>
{
let mut last = first_char;
while is_whitespace(self.first()) {
let Some(c) = self.bump() else {
break;
};
last = c;
}
// invalid frontmatter opening as whitespace preceding it isn't newline.
// combine the whitespace and the frontmatter to a single token as we shall
// error later.
if last != '\n' && self.as_str().starts_with("---") {
self.bump();
self.frontmatter(true)
} else {
Whitespace
}
}
'-' if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
&& self.as_str().starts_with("--") =>
{
// happy path
self.frontmatter(false)
}
// Slash, comment or block comment.
'/' => match self.first() {
'/' => self.line_comment(),
Expand Down Expand Up @@ -464,11 +506,110 @@ impl Cursor<'_> {
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
_ => Unknown,
};
if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
&& !matches!(token_kind, Whitespace)
{
// stop allowing frontmatters after first non-whitespace token
self.frontmatter_allowed = FrontmatterAllowed::No;
}
let res = Token::new(token_kind, self.pos_within_token());
self.reset_pos_within_token();
res
}

/// Given that one `-` was eaten, eat the rest of the frontmatter.
fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind {
debug_assert_eq!('-', self.prev());

let pos = self.pos_within_token();
self.eat_while(|c| c == '-');

// one `-` is eaten by the caller.
let length_opening = self.pos_within_token() - pos + 1;

// must be ensured by the caller
debug_assert!(length_opening >= 3);

// whitespace between the opening and the infostring.
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));

// copied from `eat_identifier`, but allows `.` in infostring to allow something like
// `---Cargo.toml` as a valid opener
if is_id_start(self.first()) {
self.bump();
self.eat_while(|c| is_id_continue(c) || c == '.');
}

self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
let invalid_infostring = self.first() != '\n';

let mut s = self.as_str();
let mut found = false;
while let Some(closing) = s.find(&"-".repeat(length_opening as usize)) {
let preceding_chars_start = s[..closing].rfind("\n").map_or(0, |i| i + 1);
if s[preceding_chars_start..closing].chars().all(is_whitespace) {
// candidate found
self.bump_bytes(closing);
// in case like
// ---cargo
// --- blahblah
// or
// ---cargo
// ----
// combine those stuff into this frontmatter token such that it gets detected later.
self.eat_until(b'\n');
found = true;
break;
} else {
s = &s[closing + length_opening as usize..];
}
}

if !found {
// recovery strategy: a closing statement might have precending whitespace/newline
// but not have enough dashes to properly close. In this case, we eat until there,
// and report a mismatch in the parser.
let mut rest = self.as_str();
// We can look for a shorter closing (starting with four dashes but closing with three)
// and other indications that Rust has started and the infostring has ended.
let mut potential_closing = rest
.find("\n---")
// n.b. only in the case where there are dashes, we move the index to the line where
// the dashes start as we eat to include that line. For other cases those are Rust code
// and not included in the frontmatter.
.map(|x| x + 1)
.or_else(|| rest.find("\nuse"))
.or_else(|| rest.find("\n//!"))
.or_else(|| rest.find("\n#!["));

if potential_closing.is_none() {
// a less fortunate recovery if all else fails which finds any dashes preceded by whitespace
// on a standalone line. Might be wrong.
while let Some(closing) = rest.find("---") {
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
// candidate found
potential_closing = Some(closing);
break;
} else {
rest = &rest[closing + 3..];
}
}
}

if let Some(potential_closing) = potential_closing {
// bump to the potential closing, and eat everything on that line.
self.bump_bytes(potential_closing);
self.eat_until(b'\n');
} else {
// eat everything. this will get reported as an unclosed frontmatter.
self.eat_while(|_| true);
}
}

Frontmatter { has_invalid_preceding_whitespace, invalid_infostring }
}

fn line_comment(&mut self) -> TokenKind {
debug_assert!(self.prev() == '/' && self.first() == '/');
self.bump();
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_lexer/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::*;

fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) {
let s = &format!("r{}", s);
let mut cursor = Cursor::new(s);
let mut cursor = Cursor::new(s, FrontmatterAllowed::No);
cursor.bump();
let res = cursor.raw_double_quoted_string(0);
assert_eq!(res, expected);
Expand Down
13 changes: 13 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,19 @@ parse_forgot_paren = perhaps you forgot parentheses?
parse_found_expr_would_be_stmt = expected expression, found `{$token}`
.label = expected expression

parse_frontmatter_extra_characters_after_close = extra characters after frontmatter close are not allowed
parse_frontmatter_invalid_close_preceding_whitespace = invalid preceding whitespace for frontmatter close
.note = frontmatter close should not be preceded by whitespace
parse_frontmatter_invalid_infostring = invalid infostring for frontmatter
.note = frontmatter infostrings must be a single identifier immediately following the opening
parse_frontmatter_invalid_opening_preceding_whitespace = invalid preceding whitespace for frontmatter opening
.note = frontmatter opening should not be preceded by whitespace
parse_frontmatter_length_mismatch = frontmatter close does not match the opening
.label_opening = the opening here has {$len_opening} dashes...
.label_close = ...while the close has {$len_close} dashes
parse_frontmatter_unclosed = unclosed frontmatter
.note = frontmatter opening here was not closed

parse_function_body_equals_expr = function body cannot be `= expression;`
.suggestion = surround the expression with `{"{"}` and `{"}"}` instead of `=` and `;`

Expand Down
55 changes: 55 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,61 @@ pub(crate) struct FoundExprWouldBeStmt {
pub suggestion: ExprParenthesesNeeded,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_extra_characters_after_close)]
pub(crate) struct FrontmatterExtraCharactersAfterClose {
#[primary_span]
pub span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_infostring)]
#[note]
pub(crate) struct FrontmatterInvalidInfostring {
#[primary_span]
pub span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)]
pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_unclosed)]
pub(crate) struct FrontmatterUnclosed {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_close_preceding_whitespace)]
pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_length_mismatch)]
pub(crate) struct FrontmatterLengthMismatch {
#[primary_span]
pub span: Span,
#[label(parse_label_opening)]
pub opening: Span,
#[label(parse_label_close)]
pub close: Span,
pub len_opening: usize,
pub len_close: usize,
}

#[derive(Diagnostic)]
#[diag(parse_leading_plus_not_supported)]
pub(crate) struct LeadingPlusNotSupported {
Expand Down
Loading