From 19d60bf40230f209a60c57f1c2f9c2421468a439 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 30 May 2022 19:29:18 +0200 Subject: [PATCH] define well-formed types --- .gitignore | 1 + README.md | 12 ++++---- lang/step.md | 4 +-- lang/syntax.md | 6 ---- lang/values.md | 6 ---- lang/well-formed.md | 74 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 20 deletions(-) create mode 100644 .gitignore create mode 100644 lang/well-formed.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..78b7e382 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +\#*# diff --git a/README.md b/README.md index 123cc501..eba09b0f 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,8 @@ Even without deciding what exactly the final memory model will look like, we can On the MiniRust language side, the most important concept to understand is that of a *value* and how it relates to *types*. Values form a high-level, structural view of data (e.g. mathematical integers); types serve to relate values with their low-level byte-oriented representation. -Types are just parameters attached to certain operations to define the (de)serialization format. -There is no MiniRust type system (as in, typing rules that would define when a MiniRust program is "well-typed"). -We might have a type system in the future as a basic sanity check, but MiniRust is by design *not* type-safe. +Types are essentially just parameters attached to certain operations to define the (de)serialization format. +Well-formedness of a MiniRust program ensures that expressions and statements satisfy some basic typing discipline, but MiniRust is by design *not* type-safe. ## How to read MiniRust @@ -29,7 +28,7 @@ We use generic type names like `List`, `Map`, `Set` rather than concrete impleme Also, all types except for mutable references are `Copy` (let's just imagine we implicitly `Clone` where needed), and we use `fn(T) -> U` notation even for closures that can capture arbitrarily. We also assume some "obvious" language extensions -- basically, it should always be clear what is meant to anyone with some Rust experience, even if this is not actually legal Rust. -We use `Result` to make operations fallible (where failure indicates UB or machine termination), and omit trailing `Ok(())`. +We use `Result` to make operations fallible (where failure indicates UB or machine termination), and omit trailing `Ok(())` and `Some(())`. We use a `throw_ub!` macro to make the current function return a UB error, and `throw_machine_step!` to indicate that and how the machine has stopped. We use `panic!` (and `unwrap` and similar standard Rust operations) to indicate conditions that should always hold; if execution ever panics, that is a bug in the specification. @@ -85,6 +84,7 @@ There are some [differences between Miri and MiniRust](https://github.com/rust-l * [Prelude](lang/prelude.md): common definitions and parameters of the language * [Values and Types](lang/values.md): the domain of high-level MiniRust values and how types can be used to (de)serialize them to memory * [Syntax](lang/syntax.md): the abstract syntax of MiniRust programs - * [Abstract Machine](lang/machine.md): the state that makes up a MiniRust Abstract Machine (AM) - * [Semantics](lang/step.md): the operational semantics ("`step` function") of the MiniRust Abstract Machine + * [Well-formedness](lang/well-formed.md): the requirements for well-formed types and programs + * [Abstract Machine](lang/machine.md): the state that makes up a MiniRust Abstract Machine + * [Semantics](lang/step.md): the operational semantics ("`step` function") of the Abstract Machine * [Operator semantics](lang/operator.md): the operational semantics of unary and binary operators diff --git a/lang/step.md b/lang/step.md index 5ceb423e..eb928eed 100644 --- a/lang/step.md +++ b/lang/step.md @@ -79,14 +79,14 @@ impl Machine { ### Creating a reference/pointer -The `&` operator simply convert a place to the pointer it denotes. +The `&` operator simply converts a place to the pointer it denotes. ```rust impl Machine { fn eval_value(&mut self, Ref { target, type }: ValueExpr) -> Result { let p = self.eval_place(target)?; // We need a check here, to ensure that encoding this value at the given type is valid. - // (If the type is a reference, and this is a packed struct, it might be insufficiently aligned.) + // (For example, if the type is a reference, and this is a packed struct, it might be insufficiently aligned.) if !check_safe_ptr(p, type) { throw_ub!("creating reference to invalid (null/unaligned/uninhabited) place"); } diff --git a/lang/syntax.md b/lang/syntax.md index de1659c3..a269ce3a 100644 --- a/lang/syntax.md +++ b/lang/syntax.md @@ -151,9 +151,3 @@ enum PlaceExpr { ``` Obviously, these are all quite incomplete still. - -## Well-formed programs - -MiniRust programs need to satisfy some conditions to be well-formed and avoid panics during evaluation, e.g. the operand on an `If` needs to always evaluate to a `bool`. - -- TODO: define this. Or should we just make some of these panics into UB? diff --git a/lang/values.md b/lang/values.md index 3edf0c37..11eb6a6d 100644 --- a/lang/values.md +++ b/lang/values.md @@ -124,12 +124,6 @@ enum TagEncoding { /* ... */ } Note that references have no lifetime, since the lifetime is irrelevant for their representation in memory! They *do* have a mutability since that is (or will be) relevant for the memory model. -### Well-formed types - -Not all types are well-formed; for example, the fields of a `Tuple` must not overlap. - -- TODO: define this - ## Type properties Each type has a layout. diff --git a/lang/well-formed.md b/lang/well-formed.md new file mode 100644 index 00000000..68b347ff --- /dev/null +++ b/lang/well-formed.md @@ -0,0 +1,74 @@ +# MiniRust well-formedness requirements + +The various syntactic constructs of MiniRust (types, functions, ...) come with well-formedness requirements: certain invariants need to be satisfied for this to be considered a well-formed program. +The idea is that for well-formed programs, the `step` function will never panic. +Those requirements are defined in this file. + +Note that `check` functions for testing well-formedness return `Option<()>` rather than `bool` so that we can use `?`. + +## Well-formed layouts and types + +```rust +impl IntType { + fn check(self) -> Option<()> { + if !self.size.bytes().is_power_of_two() { return None; } + } +} + +impl Layout(self) { + fn check(self) -> Option<()> { + // Size must be a multiple of alignment. + if self.size.bytes() % self.align.bytes() != 0 { return None; } + } +} + +impl Type { + fn check_fields(fields: Fields, total_size: Size) -> Option<()> { + // The fields must not overlap. + fields.sort_by_key(|(offset, type)| offset); + let mut last_end = Size::ZERO; + for (offset, type) in fields { + // Recursively check the field type. + type.check()?; + // And ensure it fits after the one we previously checked. + if offset < last_end { return None; } + last_end = offset.checked_add(type.size())?; + } + // And they must all fit into the size. + if size < last_end { return None; } + } + + fn check(self) -> Option<()> { + use Type::*; + match self { + Int(int_type) => { + int_type.check()?; + } + Bool => { + } + Ref { pointee, .. } | Box { pointee } | RawPtr { pointee, .. } => { + pointee.check()?; + } + Tuple { fields, size, align } => { + Type::check_fields(fields, size)?; + } + Enum { variants, size, .. } => { + for variant in variants { + Type::check_fields(variant, size)?; + } + } + Union { fields, size } => { + // These may overlap, but they must all fit the size. + for (offset, type) in fields { + type.check()?; + if size < offset.checked_add(type.size())? { return None; } + } + } + } + } +} +``` + +## Well-formed expressions, functions, and programs + +- TODO: define this