diff --git a/crates/red_knot_python_semantic/resources/mdtest/attributes.md b/crates/red_knot_python_semantic/resources/mdtest/attributes.md index 0a5602e2e57ab..f726328a915e4 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/attributes.md +++ b/crates/red_knot_python_semantic/resources/mdtest/attributes.md @@ -25,25 +25,21 @@ class C: c_instance = C(1) -# TODO: Mypy/pyright infer `int | str` here. We want this to be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_value) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_value) # revealed: Unknown | Literal[1, "a"] # TODO: Same here. This should be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_other_attribute) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_other_attribute) # revealed: Unknown # TODO: should be `int | None` -reveal_type(c_instance.inferred_from_param) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_param) # revealed: Unknown | int | None -# TODO: should be `bytes` -reveal_type(c_instance.declared_only) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_only) # revealed: bytes -# TODO: should be `bool` -reveal_type(c_instance.declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_and_bound) # revealed: bool -# TODO: should be `str` # We probably don't want to emit a diagnostic for this being possibly undeclared/unbound. # mypy and pyright do not show an error here. -reveal_type(c_instance.possibly_undeclared_unbound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.possibly_undeclared_unbound) # revealed: str # This assignment is fine, as we infer `Unknown | Literal[1, "a"]` for `inferred_from_value`. c_instance.inferred_from_value = "value set on instance" @@ -71,7 +67,7 @@ c_instance.declared_and_bound = False # in general (we don't know what else happened to `c_instance` between the assignment and the use # here), but mypy and pyright support this. In conclusion, this could be `bool` but should probably # be `Literal[False]`. -reveal_type(c_instance.declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_and_bound) # revealed: bool ``` #### Variable declared in class body and possibly bound in `__init__` @@ -124,7 +120,7 @@ reveal_type(C.only_declared) # revealed: str C.only_declared = "overwritten on class" ``` -#### Variable only defined in unrelated method +#### Variable defined in non-`__init__` method We also recognize pure instance variables if they are defined in a method that is not `__init__`. @@ -143,20 +139,17 @@ class C: c_instance = C(1) -# TODO: Should be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_value) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_value) # revealed: Unknown | Literal[1, "a"] # TODO: Should be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_other_attribute) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_other_attribute) # revealed: Unknown # TODO: Should be `int | None` -reveal_type(c_instance.inferred_from_param) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_param) # revealed: Unknown | int | None -# TODO: Should be `bytes` -reveal_type(c_instance.declared_only) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_only) # revealed: bytes -# TODO: Should be `bool` -reveal_type(c_instance.declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_and_bound) # revealed: bool # TODO: We already show an error here, but the message might be improved? # error: [unresolved-attribute] @@ -166,6 +159,83 @@ reveal_type(C.inferred_from_value) # revealed: Unknown C.inferred_from_value = "overwritten on class" ``` +#### Variable defined in multiple methods + +If we see multiple un-annotated assignments to a single attribute (`self.x` below), we build the +union of all inferred types (and `Unknown`). If we see multiple conflicting declarations of the same +attribute, that should be an error. + +```py +def get_int() -> int: + return 0 + +def get_str() -> str: + return "a" + +class C: + def __init__(self) -> None: + self.x = get_int() + self.y: int = 1 + + def other_method(self): + self.x = get_str() + + # TODO: this redeclaration should be an error + self.y: str = "a" + +c_instance = C() + +reveal_type(c_instance.x) # revealed: Unknown | int | str + +# TODO: We should probably infer `int | str` here. +reveal_type(c_instance.y) # revealed: int +``` + +#### Attributes defined in tuple unpackings + +```py +class C: + def __init__(self) -> None: + self.x, self.y = (1, "a") + +c_instance = C() + +# TODO: This should be supported (no error; type should be: `Unknown | Literal[1]`) +# error: [unresolved-attribute] +reveal_type(c_instance.x) # revealed: Unknown + +# TODO: This should be supported (no error; type should be: `Unknown | Literal["a"]`) +# error: [unresolved-attribute] +reveal_type(c_instance.y) # revealed: Unknown +``` + +#### Conditionally declared / bound attributes + +We currently do not raise a diagnostic or change behavior if an attribute is only conditionally +defined. This is consistent with what mypy and pyright do. + +```py +def flag() -> bool: + return True + +class C: + def f(self) -> None: + if flag(): + self.a1: str | None = "a" + self.b1 = 1 + if flag(): + def f(self) -> None: + self.a2: str | None = "a" + self.b2 = 1 + +c_instance = C() + +reveal_type(c_instance.a1) # revealed: str | None +reveal_type(c_instance.a2) # revealed: str | None +reveal_type(c_instance.b1) # revealed: Unknown | Literal[1] +reveal_type(c_instance.b2) # revealed: Unknown | Literal[1] +``` + #### Methods that does not use `self` as a first parameter ```py @@ -175,8 +245,7 @@ class C: def __init__(this) -> None: this.declared_and_bound: str | None = "a" -# TODO: should be `str | None` -reveal_type(C().declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(C().declared_and_bound) # revealed: str | None ``` #### Aliased `self` parameter @@ -187,9 +256,10 @@ class C: this = self this.declared_and_bound: str | None = "a" -# TODO: This would ideally be `str | None`, but mypy/pyright don't support this either, +# This would ideally be `str | None`, but mypy/pyright don't support this either, # so `Unknown` + a diagnostic is also fine. -reveal_type(C().declared_and_bound) # revealed: @Todo(implicit instance attribute) +# error: [unresolved-attribute] +reveal_type(C().declared_and_bound) # revealed: Unknown ``` ### Pure class variables (`ClassVar`) @@ -266,7 +336,7 @@ reveal_type(C.pure_class_variable) # revealed: Unknown c_instance = C() # TODO: should be `Literal["overwritten on class"]` -reveal_type(c_instance.pure_class_variable) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.pure_class_variable) # revealed: Unknown | Literal["value set in class method"] # TODO: should raise an error. c_instance.pure_class_variable = "value set on instance" @@ -360,8 +430,7 @@ reveal_type(Derived.declared_in_body) # revealed: int | None reveal_type(Derived().declared_in_body) # revealed: int | None -# TODO: Should be `str | None` -reveal_type(Derived().defined_in_init) # revealed: @Todo(implicit instance attribute) +reveal_type(Derived().defined_in_init) # revealed: str | None ``` ## Union of attributes @@ -646,6 +715,65 @@ reveal_type(b"foo".join) # revealed: @Todo(bound method) reveal_type(b"foo".endswith) # revealed: @Todo(bound method) ``` +## Instance attribute edge cases + +### Assignment to attribute that does not correspond to the instance + +```py +class C: + def __init__(self, other: "C") -> None: + other.x: str = 1 + +def f(c: C): + # error: [unresolved-attribute] + reveal_type(c.x) # revealed: Unknown +``` + +### Nested classes + +```py +class Outer: + def __init__(self): + self.x: int = 1 + + class Inner: + def __init__(self): + self.x: str = "a" + +reveal_type(Outer().x) # revealed: int +reveal_type(Outer.Inner().x) # revealed: str +``` + +### Shadowing of `self` + +```py +class Other: + x: int = 1 + +class C: + def __init__(self) -> None: + # Weird redeclaration of self + self: Other = Other() + self.x: int = 1 + +# TODO: this should be an error +C().x +``` + +### Assignment to `self` from nested function + +```py +class C: + def __init__(self) -> None: + def set_attribute(value: str): + self.x: str = value + set_attribute("a") + +# TODO: ideally, this would be `str`. Mypy supports this, pyright does not. +# error: [unresolved-attribute] +reveal_type(C().x) # revealed: Unknown +``` + ## References Some of the tests in the *Class and instance variables* section draw inspiration from diff --git a/crates/red_knot_python_semantic/src/semantic_index.rs b/crates/red_knot_python_semantic/src/semantic_index.rs index b975966c2db33..208d35056a927 100644 --- a/crates/red_knot_python_semantic/src/semantic_index.rs +++ b/crates/red_knot_python_semantic/src/semantic_index.rs @@ -11,6 +11,7 @@ use ruff_index::{IndexSlice, IndexVec}; use crate::module_name::ModuleName; use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey; use crate::semantic_index::ast_ids::AstIds; +use crate::semantic_index::attribute_assignment::AttributeAssignment; use crate::semantic_index::builder::SemanticIndexBuilder; use crate::semantic_index::definition::{Definition, DefinitionNodeKey}; use crate::semantic_index::expression::Expression; @@ -21,6 +22,7 @@ use crate::semantic_index::use_def::UseDefMap; use crate::Db; pub mod ast_ids; +pub mod attribute_assignment; mod builder; pub(crate) mod constraint; pub mod definition; @@ -139,6 +141,8 @@ pub(crate) struct SemanticIndex<'db> { /// Flags about the global scope (code usage impacting inference) has_future_annotations: bool, + + attribute_assignments: FxHashMap>>>, } impl<'db> SemanticIndex<'db> { @@ -264,6 +268,17 @@ impl<'db> SemanticIndex<'db> { pub(super) fn has_future_annotations(&self) -> bool { self.has_future_annotations } + + pub(super) fn attribute_assignments( + &self, + db: &dyn Db, + class_body_scope_id: ScopeId, + attribute: &str, + ) -> Option<&[AttributeAssignment<'db>]> { + self.attribute_assignments + .get(&class_body_scope_id.file_scope_id(db)) + .and_then(|assignments| assignments.get(attribute).map(Vec::as_slice)) + } } pub struct AncestorsIter<'a> { diff --git a/crates/red_knot_python_semantic/src/semantic_index/attribute_assignment.rs b/crates/red_knot_python_semantic/src/semantic_index/attribute_assignment.rs new file mode 100644 index 0000000000000..2214132dcdefa --- /dev/null +++ b/crates/red_knot_python_semantic/src/semantic_index/attribute_assignment.rs @@ -0,0 +1,18 @@ +use crate::semantic_index::expression::Expression; +use ruff_db::files::File; + +#[salsa::tracked] +pub(crate) struct AttributeAssignment<'db> { + /// The file in which the attribute assignment occurs. + #[id] + pub(crate) file: File, + + /// The type annotation of the assignment, if available + pub(crate) annotation: Option>, + + /// The expression on the right-hand side of the assignment, if available and relevant + pub(crate) value: Option>, + + #[no_eq] + count: countme::Count>, +} diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 8b8a6b56a3e50..aefa2219e3b0a 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -14,6 +14,7 @@ use crate::ast_node_ref::AstNodeRef; use crate::module_name::ModuleName; use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey; use crate::semantic_index::ast_ids::AstIdsBuilder; +use crate::semantic_index::attribute_assignment::AttributeAssignment; use crate::semantic_index::constraint::PatternConstraintKind; use crate::semantic_index::definition::{ AssignmentDefinitionNodeRef, ComprehensionDefinitionNodeRef, Definition, DefinitionNodeKey, @@ -53,17 +54,26 @@ impl LoopState { } } +#[derive(Clone, Copy, Debug, PartialEq)] +enum BuilderScopeKind { + ClassBody, + FunctionBody, + Other, +} + pub(super) struct SemanticIndexBuilder<'db> { // Builder state db: &'db dyn Db, file: File, module: &'db ParsedModule, - scope_stack: Vec<(FileScopeId, LoopState)>, + scope_stack: Vec<(FileScopeId, LoopState, BuilderScopeKind)>, /// The assignments we're currently visiting, with /// the most recent visit at the end of the Vec current_assignments: Vec>, /// The match case we're currently visiting. current_match_case: Option>, + /// The name of the first function parameter of the innermost function that we're currently visiting. + current_first_parameter_name: Option, /// Flow states at each `break` in the current loop. loop_break_states: Vec, @@ -84,6 +94,7 @@ pub(super) struct SemanticIndexBuilder<'db> { definitions_by_node: FxHashMap>, expressions_by_node: FxHashMap>, imported_modules: FxHashSet, + attribute_assignments: FxHashMap>>>, } impl<'db> SemanticIndexBuilder<'db> { @@ -95,6 +106,7 @@ impl<'db> SemanticIndexBuilder<'db> { scope_stack: Vec::new(), current_assignments: vec![], current_match_case: None, + current_first_parameter_name: None, loop_break_states: vec![], try_node_context_stack_manager: TryNodeContextStackManager::default(), @@ -112,6 +124,8 @@ impl<'db> SemanticIndexBuilder<'db> { expressions_by_node: FxHashMap::default(), imported_modules: FxHashSet::default(), + + attribute_assignments: FxHashMap::default(), }; builder.push_scope_with_parent(NodeWithScopeRef::Module, None); @@ -123,7 +137,7 @@ impl<'db> SemanticIndexBuilder<'db> { *self .scope_stack .last() - .map(|(scope, _)| scope) + .map(|(scope, _, _)| scope) .expect("Always to have a root scope") } @@ -134,6 +148,26 @@ impl<'db> SemanticIndexBuilder<'db> { .1 } + fn current_scope_is_function_body(&self) -> bool { + matches!( + self.scope_stack + .last() + .expect("Always to have a root scope") + .2, + BuilderScopeKind::FunctionBody + ) + } + + fn parent_class_body_scope(&self) -> Option { + if let Some((class_body_id, _, BuilderScopeKind::ClassBody)) = + self.scope_stack.iter().nth_back(1) + { + Some(*class_body_id) + } else { + None + } + } + fn set_inside_loop(&mut self, state: LoopState) { self.scope_stack .last_mut() @@ -171,11 +205,18 @@ impl<'db> SemanticIndexBuilder<'db> { debug_assert_eq!(ast_id_scope, file_scope_id); - self.scope_stack.push((file_scope_id, LoopState::NotInLoop)); + let scope_kind = match node { + NodeWithScopeRef::Class(_) => BuilderScopeKind::ClassBody, + NodeWithScopeRef::Function(_) => BuilderScopeKind::FunctionBody, + _ => BuilderScopeKind::Other, + }; + + self.scope_stack + .push((file_scope_id, LoopState::NotInLoop, scope_kind)); } fn pop_scope(&mut self) -> FileScopeId { - let (id, _) = self.scope_stack.pop().expect("Root scope to be present"); + let (id, _, _) = self.scope_stack.pop().expect("Root scope to be present"); let children_end = self.scopes.next_index(); let scope = &mut self.scopes[id]; scope.descendents = scope.descendents.start..children_end; @@ -457,6 +498,18 @@ impl<'db> SemanticIndexBuilder<'db> { /// Record an expression that needs to be a Salsa ingredient, because we need to infer its type /// standalone (type narrowing tests, RHS of an assignment.) fn add_standalone_expression(&mut self, expression_node: &ast::Expr) -> Expression<'db> { + self.add_standalone_expression_impl(expression_node, false) + } + + fn add_standalone_type_expression(&mut self, expression_node: &ast::Expr) -> Expression<'db> { + self.add_standalone_expression_impl(expression_node, true) + } + + fn add_standalone_expression_impl( + &mut self, + expression_node: &ast::Expr, + infer_as_type_expression: bool, + ) -> Expression<'db> { let expression = Expression::new( self.db, self.file, @@ -465,6 +518,7 @@ impl<'db> SemanticIndexBuilder<'db> { unsafe { AstNodeRef::new(self.module.clone(), expression_node) }, + infer_as_type_expression, countme::Count::default(), ); self.expressions_by_node @@ -668,6 +722,7 @@ impl<'db> SemanticIndexBuilder<'db> { use_def_maps, imported_modules: Arc::new(self.imported_modules), has_future_annotations: self.has_future_annotations, + attribute_assignments: self.attribute_assignments, } } } @@ -706,7 +761,17 @@ where builder.declare_parameters(parameters); + let mut first_parameter_name = parameters + .iter_non_variadic_params() + .next() + .map(|first_param| first_param.parameter.name.id().clone()); + std::mem::swap( + &mut builder.current_first_parameter_name, + &mut first_parameter_name, + ); builder.visit_body(body); + builder.current_first_parameter_name = first_parameter_name; + builder.pop_scope() }, ); @@ -840,6 +905,33 @@ where unpack: None, first: false, }), + ast::Expr::Attribute(ast::ExprAttribute { + value: object, + attr, + .. + }) => { + if let Some(class_body_scope) = self.parent_class_body_scope() { + if object.as_name_expr().as_ref().is_some_and(|name| { + Some(&name.id) == self.current_first_parameter_name.as_ref() + }) && self.current_scope_is_function_body() + { + self.attribute_assignments + .entry(class_body_scope) + .or_default() + .entry(attr.id().as_str().to_owned()) + .or_default() + .push(AttributeAssignment::new( + self.db, + self.file, + None, + Some(value), + countme::Count::default(), + )); + } + } + + None + } _ => None, }; @@ -858,6 +950,7 @@ where ast::Stmt::AnnAssign(node) => { debug_assert_eq!(&self.current_assignments, &[]); self.visit_expr(&node.annotation); + let annotation_expr = self.add_standalone_type_expression(&node.annotation); if let Some(value) = &node.value { self.visit_expr(value); } @@ -869,6 +962,34 @@ where ) { self.push_assignment(node.into()); self.visit_expr(&node.target); + + if let Some(class_body_scope) = self.parent_class_body_scope() { + if let ast::Expr::Attribute(ast::ExprAttribute { + value: object, + attr, + .. + }) = &*node.target + { + if object.as_name_expr().as_ref().is_some_and(|name| { + Some(&name.id) == self.current_first_parameter_name.as_ref() + }) && self.current_scope_is_function_body() + { + self.attribute_assignments + .entry(class_body_scope) + .or_default() + .entry(attr.id().as_str().to_owned()) + .or_default() + .push(AttributeAssignment::new( + self.db, + self.file, + Some(annotation_expr), + None, + countme::Count::default(), + )); + } + } + } + self.pop_assignment(); } else { self.visit_expr(&node.target); diff --git a/crates/red_knot_python_semantic/src/semantic_index/expression.rs b/crates/red_knot_python_semantic/src/semantic_index/expression.rs index 327d40a4b13ef..008bfaf32899e 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/expression.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/expression.rs @@ -35,6 +35,9 @@ pub(crate) struct Expression<'db> { #[return_ref] pub(crate) node_ref: AstNodeRef, + #[id] + pub(crate) infer_as_type_expression: bool, + #[no_eq] count: countme::Count>, } diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index a056c176c7cd0..7e97bef195c6c 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -4135,7 +4135,52 @@ impl<'db> Class<'db> { // TODO: The symbol is not present in any class body, but it could be implicitly // defined in `__init__` or other methods anywhere in the MRO. - todo_type!("implicit instance attribute").into() + SymbolAndQualifiers(Symbol::Unbound, TypeQualifiers::empty()) + } + + /// Tries to find declarations/bindings of an instance attribute named `name` that are + /// only "implicitly" defined in a method of the class that corresponds to `body_scope`. + fn implicit_instance_attribute( + db: &'db dyn Db, + class_body_scope: ScopeId<'db>, + name: &str, + ) -> Option> { + let index = semantic_index(db, class_body_scope.file(db)); + let mut union_of_inferred_types = UnionBuilder::new(db).add(Type::unknown()); + + for attribute_assignment in index.attribute_assignments(db, class_body_scope, name)? { + if let Some(annotation_expr) = attribute_assignment.annotation(db) { + // We found an annotated assignment of one of the following forms (using 'self' in these + // examples, but we support arbitrary names for the first parameters of methods): + // + // self.name: type + // self.name: type = + + let inference = infer_expression_types(db, annotation_expr); + let expr_scope = annotation_expr.scope(db); + let annotation_ty = inference.expression_type( + annotation_expr + .node_ref(db) + .scoped_expression_id(db, expr_scope), + ); + + // TODO: check if there are conflicting declarations + return Some(annotation_ty); + } else if let Some(value_expr) = attribute_assignment.value(db) { + // We found an assignment of the form: + // + // self.name = + + let inference = infer_expression_types(db, value_expr); + let expr_scope = value_expr.scope(db); + let inferred_ty = inference + .expression_type(value_expr.node_ref(db).scoped_expression_id(db, expr_scope)); + + union_of_inferred_types = union_of_inferred_types.add(inferred_ty); + } + } + + Some(union_of_inferred_types.build()) } /// A helper function for `instance_member` that looks up the `name` attribute only on @@ -4148,6 +4193,7 @@ impl<'db> Class<'db> { // - The descriptor protocol let body_scope = self.body_scope(db); + let table = symbol_table(db, body_scope); if let Some(symbol_id) = table.symbol_id_by_name(name) { @@ -4171,6 +4217,10 @@ impl<'db> Class<'db> { } } Ok(symbol @ SymbolAndQualifiers(Symbol::Unbound, qualifiers)) => { + if let Some(ty) = Self::implicit_instance_attribute(db, body_scope, name) { + return ty.into(); + } + let bindings = use_def.public_bindings(symbol_id); let inferred = symbol_from_bindings(db, bindings); @@ -4185,6 +4235,10 @@ impl<'db> Class<'db> { } } } else { + if let Some(ty) = Self::implicit_instance_attribute(db, body_scope, name) { + return ty.into(); + } + Symbol::Unbound.into() } } diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 7dd3b257474f3..905c88cf6ed71 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -823,7 +823,11 @@ impl<'db> TypeInferenceBuilder<'db> { } fn infer_region_expression(&mut self, expression: Expression<'db>) { - self.infer_expression_impl(expression.node_ref(self.db())); + if expression.infer_as_type_expression(self.db()) { + self.infer_type_expression(expression.node_ref(self.db())); + } else { + self.infer_expression_impl(expression.node_ref(self.db())); + } } /// Raise a diagnostic if the given type cannot be divided by zero.