diff --git a/packages/plugin-autocapture-browser/src/constants.ts b/packages/plugin-autocapture-browser/src/constants.ts index 2aa8e4228..eefef71f2 100644 --- a/packages/plugin-autocapture-browser/src/constants.ts +++ b/packages/plugin-autocapture-browser/src/constants.ts @@ -16,6 +16,7 @@ export const AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_LEFT = '[Amplitude] Element P export const AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_TOP = '[Amplitude] Element Position Top'; export const AMPLITUDE_EVENT_PROP_ELEMENT_ARIA_LABEL = '[Amplitude] Element Aria Label'; export const AMPLITUDE_EVENT_PROP_ELEMENT_ATTRIBUTES = '[Amplitude] Element Attributes'; +export const AMPLITUDE_EVENT_PROP_ELEMENT_PATH = '[Amplitude] Element Path'; export const AMPLITUDE_EVENT_PROP_ELEMENT_PARENT_LABEL = '[Amplitude] Element Parent Label'; export const AMPLITUDE_EVENT_PROP_PAGE_URL = '[Amplitude] Page URL'; diff --git a/packages/plugin-autocapture-browser/src/data-extractor.ts b/packages/plugin-autocapture-browser/src/data-extractor.ts index 10bdb0b49..36b35c867 100644 --- a/packages/plugin-autocapture-browser/src/data-extractor.ts +++ b/packages/plugin-autocapture-browser/src/data-extractor.ts @@ -23,6 +23,7 @@ import type { BaseTimestampedEvent, ElementBasedTimestampedEvent, TimestampedEve import { getAncestors, getElementProperties } from './hierarchy'; import { getDataSource } from './pageActions/actions'; import { Hierarchy } from './typings/autocapture'; +import { cssPath } from './libs/element-path'; export class DataExtractor { private readonly additionalMaskTextPatterns: RegExp[]; @@ -126,6 +127,17 @@ export class DataExtractor { return this.getNearestLabel(parent); }; + getElementPath = (element: Element): string => { + const startTime = performance.now(); + + const elementPath = cssPath(element); + + const endTime = performance.now(); + this.diagnosticsClient?.recordHistogram('autocapturePlugin.getElementPath', endTime - startTime); + + return elementPath; + }; + // Returns the Amplitude event properties for the given element. getEventProperties = (actionType: ActionType, element: Element, dataAttributePrefix: string) => { /* istanbul ignore next */ @@ -147,6 +159,7 @@ export class DataExtractor { [constants.AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_LEFT]: rect.left == null ? null : Math.round(rect.left), [constants.AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_TOP]: rect.top == null ? null : Math.round(rect.top), [constants.AMPLITUDE_EVENT_PROP_ELEMENT_ATTRIBUTES]: attributes, + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_PATH]: this.getElementPath(element), [constants.AMPLITUDE_EVENT_PROP_ELEMENT_PARENT_LABEL]: nearestLabel, [constants.AMPLITUDE_EVENT_PROP_PAGE_URL]: getDecodeURI(window.location.href.split('?')[0]), [constants.AMPLITUDE_EVENT_PROP_PAGE_TITLE]: ( diff --git a/packages/plugin-autocapture-browser/src/libs/element-path.ts b/packages/plugin-autocapture-browser/src/libs/element-path.ts new file mode 100644 index 000000000..25294113f --- /dev/null +++ b/packages/plugin-autocapture-browser/src/libs/element-path.ts @@ -0,0 +1,178 @@ +// Code is adapted from The Chromium Authors. +// Source: https://github.com/ChromeDevTools/devtools-frontend/blob/main/front_end/panels/elements/DOMPath.ts#L14 +// License: BSD-style license +// +// Copyright 2014 The Chromium Authors +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +class Step { + constructor(public value: string, public optimized: boolean) {} + toString() { + return this.value; + } +} + +export const cssPath = function (node: Element, optimized?: boolean): string { + // `node` is already an Element; this check is defensive. + if (node.nodeType !== Node.ELEMENT_NODE) { + return ''; + } + + const steps: Step[] = []; + let contextNode: Element | null = node; + + while (contextNode) { + const step = cssPathStep(contextNode, Boolean(optimized), contextNode === node); + if (!step) { + break; + } // bail out early + steps.push(step); + if (step.optimized) { + break; + } + contextNode = contextNode.parentElement; + } + + steps.reverse(); + return steps.join(' > '); +}; + +const cssPathStep = function (node: Element, optimized: boolean, isTargetNode: boolean): Step | null { + if (node.nodeType !== Node.ELEMENT_NODE) { + return null; + } + + const id = node.getAttribute('id'); + if (optimized) { + if (id) { + return new Step(idSelector(id), true); + } + + const nodeNameLower = node.tagName.toLowerCase(); + if (nodeNameLower === 'body' || nodeNameLower === 'head' || nodeNameLower === 'html') { + return new Step(nodeNameLower, true); + } + } + + const nodeName = node.tagName.toLowerCase(); + + if (id) { + return new Step(nodeName + idSelector(id), true); + } + + const parent = node.parentNode; + if (!parent || parent.nodeType === Node.DOCUMENT_NODE) { + return new Step(nodeName, true); + } + + function prefixedElementClassNames(el: Element): string[] { + const classAttribute = el.getAttribute('class'); + if (!classAttribute) { + return []; + } + return classAttribute + .split(/\s+/g) + .filter(Boolean) + .map(function (name) { + // The prefix is required to store "__proto__" in a object-based map. + return '$' + name; + }); + } + + function idSelector(id: string): string { + return '#' + CSS.escape(id); + } + + const prefixedOwnClassNamesArray = prefixedElementClassNames(node); + let needsClassNames = false; + let needsNthChild = false; + let ownIndex = -1; + let elementIndex = -1; + + const siblings: HTMLCollectionOf = parent.children; + + for (let i = 0; siblings && (ownIndex === -1 || !needsNthChild) && i < siblings.length; ++i) { + const sibling = siblings[i]; + if (sibling.nodeType !== Node.ELEMENT_NODE) { + continue; + } + + elementIndex += 1; + if (sibling === node) { + ownIndex = elementIndex; + continue; + } + if (needsNthChild) { + continue; + } + + if (sibling.tagName.toLowerCase() !== nodeName) { + continue; + } + + needsClassNames = true; + const ownClassNames = new Set(prefixedOwnClassNamesArray); + if (!ownClassNames.size) { + needsNthChild = true; + continue; + } + + const siblingClassNamesArray = prefixedElementClassNames(sibling); + for (let j = 0; j < siblingClassNamesArray.length; ++j) { + const siblingClass = siblingClassNamesArray[j]; + if (!ownClassNames.has(siblingClass)) { + continue; + } + ownClassNames.delete(siblingClass); + if (!ownClassNames.size) { + needsNthChild = true; + break; + } + } + } + + let result = nodeName; + if ( + isTargetNode && + nodeName.toLowerCase() === 'input' && + node.getAttribute('type') && + !node.getAttribute('id') && + !node.getAttribute('class') + ) { + result += '[type=' + CSS.escape(node.getAttribute('type') || '') + ']'; + } + if (needsNthChild) { + result += ':nth-child(' + String(ownIndex + 1) + ')'; + } else if (needsClassNames) { + for (const prefixedName of prefixedOwnClassNamesArray) { + result += '.' + CSS.escape(prefixedName.slice(1)); + } + } + + return new Step(result, false); +}; diff --git a/packages/plugin-autocapture-browser/src/libs/finder.ts b/packages/plugin-autocapture-browser/src/libs/finder.ts deleted file mode 100644 index 3fec33ded..000000000 --- a/packages/plugin-autocapture-browser/src/libs/finder.ts +++ /dev/null @@ -1,325 +0,0 @@ -/* istanbul ignore file */ - -// License: MIT -// Author: Anton Medvedev -// Source: https://github.com/antonmedv/finder - -type Knot = { - name: string; - penalty: number; - level?: number; -}; - -type Path = Knot[]; - -export type Options = { - root: Element; - idName: (name: string) => boolean; - className: (name: string) => boolean; - tagName: (name: string) => boolean; - attr: (name: string, value: string) => boolean; - seedMinLength: number; - optimizedMinLength: number; - threshold: number; - maxNumberOfTries: number; -}; - -let config: Options; -let rootDocument: Document | Element; - -export function finder(input: Element, options?: Partial) { - if (input.nodeType !== Node.ELEMENT_NODE) { - throw new Error(`Can't generate CSS selector for non-element node type.`); - } - if ('html' === input.tagName.toLowerCase()) { - return 'html'; - } - const defaults: Options = { - root: document.body, - idName: (_name: string) => true, - className: (_name: string) => true, - tagName: (_name: string) => true, - attr: (_name: string, _value: string) => false, - seedMinLength: 1, - optimizedMinLength: 2, - threshold: 1000, - maxNumberOfTries: 10000, - }; - - config = { ...defaults, ...options }; - rootDocument = findRootDocument(config.root, defaults); - - let path = bottomUpSearch(input, 'all', () => - bottomUpSearch(input, 'two', () => bottomUpSearch(input, 'one', () => bottomUpSearch(input, 'none'))), - ); - - if (path) { - const optimized = sort(optimize(path, input)); - if (optimized.length > 0) { - path = optimized[0]; - } - return selector(path); - } else { - throw new Error(`Selector was not found.`); - } -} - -function findRootDocument(rootNode: Element | Document, defaults: Options) { - if (rootNode.nodeType === Node.DOCUMENT_NODE) { - return rootNode; - } - if (rootNode === defaults.root) { - return rootNode.ownerDocument; - } - return rootNode; -} - -function bottomUpSearch( - input: Element, - limit: 'all' | 'two' | 'one' | 'none', - fallback?: () => Path | null, -): Path | null { - let path: Path | null = null; - const stack: Knot[][] = []; - let current: Element | null = input; - let i = 0; - while (current) { - let level: Knot[] = maybe(id(current)) || - maybe(...attr(current)) || - maybe(...classNames(current)) || - maybe(tagName(current)) || [any()]; - const nth = index(current); - if (limit == 'all') { - if (nth) { - level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth))); - } - } else if (limit == 'two') { - level = level.slice(0, 1); - if (nth) { - level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth))); - } - } else if (limit == 'one') { - const [node] = (level = level.slice(0, 1)); - if (nth && dispensableNth(node)) { - level = [nthChild(node, nth)]; - } - } else if (limit == 'none') { - level = [any()]; - if (nth) { - level = [nthChild(level[0], nth)]; - } - } - for (const node of level) { - node.level = i; - } - stack.push(level); - if (stack.length >= config.seedMinLength) { - path = findUniquePath(stack, fallback); - if (path) { - break; - } - } - current = current.parentElement; - i++; - } - if (!path) { - path = findUniquePath(stack, fallback); - } - if (!path && fallback) { - return fallback(); - } - return path; -} - -function findUniquePath(stack: Knot[][], fallback?: () => Path | null): Path | null { - // Check first the total number of combinations first since generating the combinations can cause memory exhaustion - const numCombinations = stack.reduce((acc, i) => acc * i.length, 1); - if (numCombinations > config.threshold) { - return fallback ? fallback() : null; - } - - const paths = sort(combinations(stack)); - for (const candidate of paths) { - if (unique(candidate)) { - return candidate; - } - } - return null; -} - -function selector(path: Path): string { - let node = path[0]; - let query = node.name; - for (let i = 1; i < path.length; i++) { - const level = path[i].level || 0; - if (node.level === level - 1) { - query = `${path[i].name} > ${query}`; - } else { - query = `${path[i].name} ${query}`; - } - node = path[i]; - } - return query; -} - -function penalty(path: Path): number { - return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0); -} - -function unique(path: Path) { - const css = selector(path); - switch (rootDocument.querySelectorAll(css).length) { - case 0: - throw new Error(`Can't select any node with this selector: ${css}`); - case 1: - return true; - default: - return false; - } -} - -function id(input: Element): Knot | null { - const elementId = input.getAttribute('id'); - if (elementId && config.idName(elementId)) { - return { - name: '#' + CSS.escape(elementId), - penalty: 0, - }; - } - return null; -} - -function attr(input: Element): Knot[] { - const attrs = Array.from(input.attributes).filter((attr) => config.attr(attr.name, attr.value)); - return attrs.map( - (attr): Knot => ({ - name: `[${CSS.escape(attr.name)}="${CSS.escape(attr.value)}"]`, - penalty: 0.5, - }), - ); -} - -function classNames(input: Element): Knot[] { - const names = Array.from(input.classList).filter(config.className); - return names.map( - (name): Knot => ({ - name: '.' + CSS.escape(name), - penalty: 1, - }), - ); -} - -function tagName(input: Element): Knot | null { - const name = input.tagName.toLowerCase(); - if (config.tagName(name)) { - return { - name, - penalty: 2, - }; - } - return null; -} - -function any(): Knot { - return { - name: '*', - penalty: 3, - }; -} - -function index(input: Element): number | null { - const parent = input.parentNode; - if (!parent) { - return null; - } - let child = parent.firstChild; - if (!child) { - return null; - } - let i = 0; - while (child) { - if (child.nodeType === Node.ELEMENT_NODE) { - i++; - } - if (child === input) { - break; - } - child = child.nextSibling; - } - return i; -} - -function nthChild(node: Knot, i: number): Knot { - return { - name: node.name + `:nth-child(${i})`, - penalty: node.penalty + 1, - }; -} - -function dispensableNth(node: Knot) { - return node.name !== 'html' && !node.name.startsWith('#'); -} - -function maybe(...level: (Knot | null)[]): Knot[] | null { - const list = level.filter(notEmpty); - if (list.length > 0) { - return list; - } - return null; -} - -function notEmpty(value: T | null | undefined): value is T { - return value !== null && value !== undefined; -} - -function* combinations(stack: Knot[][], path: Knot[] = []): Generator { - if (stack.length > 0) { - for (const node of stack[0]) { - yield* combinations(stack.slice(1, stack.length), path.concat(node)); - } - } else { - yield path; - } -} - -function sort(paths: Iterable): Path[] { - return [...paths].sort((a, b) => penalty(a) - penalty(b)); -} - -type Scope = { - counter: number; - visited: Map; -}; - -function* optimize( - path: Path, - input: Element, - scope: Scope = { - counter: 0, - visited: new Map(), - }, -): Generator { - if (path.length > 2 && path.length > config.optimizedMinLength) { - for (let i = 1; i < path.length - 1; i++) { - if (scope.counter > config.maxNumberOfTries) { - return; // Okay At least I tried! - } - scope.counter += 1; - const newPath = [...path]; - newPath.splice(i, 1); - const newPathKey = selector(newPath); - if (scope.visited.has(newPathKey)) { - return; - } - if (unique(newPath) && same(newPath, input)) { - yield newPath; - scope.visited.set(newPathKey, true); - yield* optimize(newPath, input, scope); - } - } - } -} - -function same(path: Path, input: Element) { - return rootDocument.querySelector(selector(path)) === input; -}