-
Notifications
You must be signed in to change notification settings - Fork 4
ENG-1211 Refactor exportUtils, factor out utility functions and markdown #647
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5970ac4
4d811e4
2b81cec
a52ccd4
c2924c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| import type { Result } from "roamjs-components/types/query-builder"; | ||
| import { PullBlock, TreeNode, ViewType } from "roamjs-components/types"; | ||
| import type { DiscourseNode } from "./getDiscourseNodes"; | ||
| import matchDiscourseNode from "./matchDiscourseNode"; | ||
|
|
||
| type DiscourseExportResult = Result & { type: string }; | ||
|
|
||
| export const uniqJsonArray = <T extends Record<string, unknown>>(arr: T[]) => | ||
| Array.from( | ||
| new Set( | ||
| arr.map((r) => | ||
| JSON.stringify( | ||
| Object.entries(r).sort(([k], [k2]) => k.localeCompare(k2)), | ||
| ), | ||
| ), | ||
| ), | ||
| ).map((entries) => Object.fromEntries(JSON.parse(entries))) as T[]; | ||
|
|
||
| export const getPageData = ({ | ||
| results, | ||
| allNodes, | ||
| isExportDiscourseGraph, | ||
| }: { | ||
| results: Result[]; | ||
| allNodes: DiscourseNode[]; | ||
| isExportDiscourseGraph?: boolean; | ||
| }): (Result & { type: string })[] => { | ||
| if (isExportDiscourseGraph) return results as DiscourseExportResult[]; | ||
|
|
||
| const matchedTexts = new Set(); | ||
| const mappedResults = results.flatMap((r) => | ||
| Object.keys(r) | ||
| .filter((k) => k.endsWith(`-uid`) && k !== "text-uid") | ||
| .map((k) => ({ | ||
| ...r, | ||
| text: r[k.slice(0, -4)].toString(), | ||
| uid: r[k] as string, | ||
| })) | ||
| .concat({ | ||
| text: r.text, | ||
| uid: r.uid, | ||
| }), | ||
| ); | ||
| return allNodes.flatMap((n) => | ||
| mappedResults | ||
| .filter(({ text }) => { | ||
| if (!text) return false; | ||
| if (matchedTexts.has(text)) return false; | ||
| const isMatch = matchDiscourseNode({ title: text, ...n }); | ||
| if (isMatch) matchedTexts.add(text); | ||
|
|
||
| return isMatch; | ||
| }) | ||
| .map((node) => ({ ...node, type: n.text })), | ||
| ); | ||
| }; | ||
|
|
||
| const getContentFromNodes = ({ | ||
| title, | ||
| allNodes, | ||
| }: { | ||
| title: string; | ||
| allNodes: DiscourseNode[]; | ||
| }) => { | ||
| const nodeFormat = allNodes.find((a) => | ||
| matchDiscourseNode({ title, ...a }), | ||
| )?.format; | ||
| if (!nodeFormat) return title; | ||
| const regex = new RegExp( | ||
| `^${nodeFormat | ||
| .replace(/[[\]\\^$.|?*+()]/g, "\\$&") | ||
| .replace("{content}", "(.*?)") | ||
| .replace(/{[^}]+}/g, "(?:.*?)")}$`, | ||
| ); | ||
| return regex.exec(title)?.[1] || title; | ||
|
Comment on lines
+68
to
+75
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # Find DiscourseNode type definition and format field
rg -n "type DiscourseNode|interface DiscourseNode" -A 20Repository: DiscourseGraphs/discourse-graph Length of output: 22213 🏁 Script executed: # Check how format patterns are created/validated in the codebase
rg -n "\.format\s*=" apps/roam/src -B 2 -A 2Repository: DiscourseGraphs/discourse-graph Length of output: 57 🏁 Script executed: # Look at the full context of exportUtils.ts around lines 79-87
cat -n apps/roam/src/utils/exportUtils.ts | head -100 | tail -50Repository: DiscourseGraphs/discourse-graph Length of output: 1584 🏁 Script executed: # Check if there's any validation of format field when it's set/saved
rg -n "format" apps/roam/src -l | head -20Repository: DiscourseGraphs/discourse-graph Length of output: 933 🏁 Script executed: # Search for format field validation or sanitization
rg -n "format.*validate|validateFormat|format.*check" apps/roam/src -B 2 -A 2Repository: DiscourseGraphs/discourse-graph Length of output: 1338 🏁 Script executed: # Check DiscourseNodeConfigPanel to see format field editing
cat -n apps/roam/src/components/settings/DiscourseNodeConfigPanel.tsx | head -200Repository: DiscourseGraphs/discourse-graph Length of output: 8237 🏁 Script executed: # Search for other uses of format field and regex construction
rg -n "getContentFromNodes|format.*RegExp|new RegExp" apps/roam/src/utilsRepository: DiscourseGraphs/discourse-graph Length of output: 1790 🏁 Script executed: # Check if there's any try-catch or error handling for regex in similar code
rg -n "try.*RegExp|RegExp.*catch" apps/roam/src -B 2 -A 2Repository: DiscourseGraphs/discourse-graph Length of output: 57 🏁 Script executed: # Find the validate function in NodeConfig
rg -n "const validate|function validate" apps/roam/src/components/settings/NodeConfig.tsx -B 5 -A 10Repository: DiscourseGraphs/discourse-graph Length of output: 506 🏁 Script executed: # Check getDiscourseNodeFormatExpression since it also processes format
cat -n apps/roam/src/utils/getDiscourseNodeFormatExpression.tsRepository: DiscourseGraphs/discourse-graph Length of output: 544 🏁 Script executed: # Look for format validation logic more broadly
rg -n "validate.*format|format.*valid" apps/roam/src -B 2 -A 5Repository: DiscourseGraphs/discourse-graph Length of output: 2231 🏁 Script executed: # Check if format field has any UI constraints or warnings
cat -n apps/roam/src/components/settings/NodeConfig.tsx | grep -A 30 -B 5 "format"Repository: DiscourseGraphs/discourse-graph Length of output: 7712 Use the proper format escaping function to prevent ReDoS. The Rather than adding a try-catch, replace the custom escaping with 🧰 Tools🪛 ast-grep (0.40.3)[warning] 79-85: Regular expression constructed from variable input detected. This can lead to Regular Expression Denial of Service (ReDoS) attacks if the variable contains malicious patterns. Use libraries like 'recheck' to validate regex safety or use static patterns. (regexp-from-variable) 🤖 Prompt for AI Agents
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, methodology question: On the one hand, we are saying we should have only one logical change per PR. I tried to make this PR do only the refactor, nothing else, so that you could see it as innocuous and focus on the substance in the other PRs.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @maparent, this is a great question that highlights a real tension in software engineering practice. Here's my perspective on the tradeoffs: Arguments for keeping refactoring PRs pure:
Arguments for fixing issues during refactoring:
Practical heuristics I'd suggest:
For this specific case: The ReDoS issue is a security concern that exists in code you're moving. I'd recommend fixing it in PR 1213 (or this PR if you consolidate) rather than leaving it unfixed. You could also note in the PR description "refactor + fix ReDoS in getContentFromNodes" so reviewers know to expect both. What does your team prefer? 🧠 Learnings used
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I forgot to tag @mdroidian that question was addressed to him.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But @CodeRabbit good point about the security implications!
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @maparent, you're welcome! Security issues like ReDoS can be subtle but have real impact, especially in user-facing tools where format strings are configurable. Since you're already touching this code in the refactor, addressing it now makes sense. If you do decide to fix it in this PR or PR 1213, using 🧠 Learnings used
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is probably best decided on a case by case basis depending on severity, effort, number of changes, etc. Use your best judgement and just be sure to document the rationale with an inline GitHub comment.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. Here's what I'm going to do:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feel free to bring this up at our next 1:1 if you'd like to discuss further. |
||
| }; | ||
|
|
||
| export const getFilename = ({ | ||
| title = "", | ||
| maxFilenameLength, | ||
| simplifiedFilename, | ||
| allNodes, | ||
| removeSpecialCharacters, | ||
| extension = ".md", | ||
| }: { | ||
| title?: string; | ||
| maxFilenameLength: number; | ||
| simplifiedFilename: boolean; | ||
| allNodes: DiscourseNode[]; | ||
| removeSpecialCharacters: boolean; | ||
| extension?: string; | ||
| }) => { | ||
| const baseName = simplifiedFilename | ||
| ? getContentFromNodes({ title, allNodes }) | ||
| : title; | ||
| const name = `${ | ||
| removeSpecialCharacters | ||
| ? baseName.replace(/[<>:"/\\|\?*[\]]/g, "") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary escape character: ?.eslintno-useless-escape |
||
| : baseName | ||
| }${extension}`; | ||
|
|
||
| return name.length > maxFilenameLength | ||
| ? `${name.substring( | ||
| 0, | ||
| Math.ceil((maxFilenameLength - 3) / 2), | ||
| )}...${name.slice(-Math.floor((maxFilenameLength - 3) / 2))}` | ||
| : name; | ||
|
Comment on lines
+102
to
+107
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Edge case: Very small If 🔎 Suggested guard+ const effectiveMaxLength = Math.max(maxFilenameLength, 4);
return name.length > maxFilenameLength
? `${name.substring(
0,
- Math.ceil((maxFilenameLength - 3) / 2),
- )}...${name.slice(-Math.floor((maxFilenameLength - 3) / 2))}`
+ Math.ceil((effectiveMaxLength - 3) / 2),
+ )}...${name.slice(-Math.floor((effectiveMaxLength - 3) / 2))}`
: name;
🤖 Prompt for AI Agents
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Valid concern, but I think that should be done as a guard on the setting, not an adjustment downstream.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @maparent That makes sense! Validating 🧠 Learnings used |
||
| }; | ||
|
|
||
| export const toLink = (filename: string, uid: string, linkType: string) => { | ||
| const extensionRemoved = filename.replace(/\.\w+$/, ""); | ||
| if (linkType === "wikilinks") return `[[${extensionRemoved}]]`; | ||
| if (linkType === "alias") return `[${filename}](${filename})`; | ||
| if (linkType === "roam url") | ||
| return `[${extensionRemoved}](https://roamresearch.com/#/app/${window.roamAlphaAPI.graph.name}/page/${uid})`; | ||
| return filename; | ||
| }; | ||
|
|
||
| export const pullBlockToTreeNode = ( | ||
| n: PullBlock, | ||
| v: `:${ViewType}`, | ||
| ): TreeNode => ({ | ||
| text: n[":block/string"] || n[":node/title"] || "", | ||
| open: typeof n[":block/open"] === "undefined" ? true : n[":block/open"], | ||
| order: n[":block/order"] || 0, | ||
| uid: n[":block/uid"] || "", | ||
| heading: n[":block/heading"] || 0, | ||
| viewType: (n[":children/view-type"] || v).slice(1) as ViewType, | ||
| editTime: new Date(n[":edit/time"] || 0), | ||
| props: { imageResize: {}, iframe: {} }, | ||
| textAlign: n[":block/text-align"] || "left", | ||
| children: (n[":block/children"] || []) | ||
| .sort(({ [":block/order"]: a = 0 }, { [":block/order"]: b = 0 }) => a - b) | ||
| .map((r) => pullBlockToTreeNode(r, n[":children/view-type"] || v)), | ||
| parents: (n[":block/parents"] || []).map((p) => p[":db/id"] || 0), | ||
| }); | ||
|
|
||
| export const collectUids = (t: TreeNode): string[] => [ | ||
| t.uid, | ||
| ...t.children.flatMap(collectUids), | ||
| ]; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unsafe argument of type
anyassigned to a parameter of typeIterable<readonly [PropertyKey, unknown]>.eslint@typescript-eslint/no-unsafe-argument