Skip to content

Commit b5a5627

Browse files
committed
Address PR feedback
1 parent fd4368a commit b5a5627

File tree

5 files changed

+117
-30
lines changed

5 files changed

+117
-30
lines changed

codegen/core/build.gradle.kts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,4 @@ dependencies {
1515
implementation(libs.smithy.protocol.test.traits)
1616
// We have this because we're using RestJson1 as a 'generic' protocol.
1717
implementation(libs.smithy.aws.traits)
18-
implementation(libs.jsoup)
19-
implementation(libs.commonmark)
2018
}

codegen/core/src/main/java/software/amazon/smithy/python/codegen/generators/SetupGenerator.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,6 @@ private static void writePyproject(
161161
requires = ["hatchling"]
162162
build-backend = "hatchling.build"
163163
164-
[tool.hatch.build]
165-
exclude = [
166-
"tests",
167-
]
168-
169164
[tool.pyright]
170165
typeCheckingMode = "strict"
171166
reportPrivateUsage = false

codegen/core/src/main/java/software/amazon/smithy/python/codegen/writer/MarkdownConverter.java

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.io.IOException;
99
import java.io.InputStreamReader;
1010
import java.nio.charset.StandardCharsets;
11+
import java.util.List;
1112
import java.util.concurrent.TimeUnit;
1213
import java.util.regex.Matcher;
1314
import java.util.regex.Pattern;
@@ -27,8 +28,12 @@
2728
@SmithyInternalApi
2829
public final class MarkdownConverter {
2930

31+
private static final int PANDOC_WRAP_COLUMNS = 72;
3032
private static final int TIMEOUT_SECONDS = 10;
3133

34+
// List of HTML tags to exclude from documentation (including their content).
35+
private static final List<String> EXCLUDED_TAGS = List.of("fullname");
36+
3237
// Private constructor to prevent instantiation
3338
private MarkdownConverter() {}
3439

@@ -49,6 +54,8 @@ public static String convert(String input, GenerationContext context) {
4954
}
5055

5156
try {
57+
input = preProcessPandocInput(input);
58+
5259
if (!CodegenUtils.isAwsService(context)) {
5360
// Commonmark may include embedded HTML so we first normalize the input to HTML format
5461
input = convertWithPandoc(input, "commonmark", "html");
@@ -64,6 +71,39 @@ public static String convert(String input, GenerationContext context) {
6471
}
6572
}
6673

74+
/**
75+
* Pre-processes input before passing to pandoc.
76+
*
77+
* @param input The raw input text
78+
* @return Pre-processed input ready for pandoc conversion
79+
*/
80+
private static String preProcessPandocInput(String input) {
81+
// Trim leading and trailing spaces in hrefs i.e href=" https://example.com "
82+
Pattern p = Pattern.compile("href\\s*=\\s*\"([^\"]*)\"");
83+
input = p.matcher(input).replaceAll(match -> "href=\"" + match.group(1).trim() + "\"");
84+
85+
// Remove excluded HTML tags and their content
86+
for (String tagName : EXCLUDED_TAGS) {
87+
input = removeHtmlTag(input, tagName);
88+
}
89+
90+
return input;
91+
}
92+
93+
/**
94+
* Removes HTML tags and their content from the input string
95+
*
96+
* @param text The text to process
97+
* @param tagName The tag name to remove (e.g., "fullname")
98+
* @return Text with tags and their content removed
99+
*/
100+
private static String removeHtmlTag(String text, String tagName) {
101+
// Remove <tag>content</tag> completely
102+
Pattern p = Pattern.compile(
103+
"<" + Pattern.quote(tagName) + ">[\\s\\S]*?</" + Pattern.quote(tagName) + ">");
104+
return p.matcher(text).replaceAll("");
105+
}
106+
67107
/**
68108
* Calls pandoc CLI to convert documentation.
69109
*
@@ -81,7 +121,7 @@ private static String convertWithPandoc(String input, String fromFormat, String
81121
"--from=" + fromFormat,
82122
"--to=" + toFormat,
83123
"--wrap=auto",
84-
"--columns=72");
124+
"--columns=" + PANDOC_WRAP_COLUMNS);
85125
processBuilder.redirectErrorStream(true);
86126

87127
Process process = processBuilder.start();
@@ -118,6 +158,12 @@ private static String convertWithPandoc(String input, String fromFormat, String
118158
return output.toString();
119159
}
120160

161+
/**
162+
* Post-processes pandoc output for Python docstrings.
163+
*
164+
* @param output The raw output from pandoc
165+
* @return Post-processed Markdown suitable for Python docstrings
166+
*/
121167
private static String postProcessPandocOutput(String output) {
122168
// Remove empty lines at the start and end
123169
output = output.trim();

codegen/core/src/test/java/software/amazon/smithy/python/codegen/writer/MarkdownConverterTest.java

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class MarkdownConverterTest {
1919

2020
@Test
2121
public void testConvertHtmlToMarkdownWithTitleAndParagraph() {
22-
String html = "<html><body><h1>Title</h1><p>Paragraph</p></body></html>";
22+
String html = "<h1>Title</h1><p>Paragraph</p>";
2323
String result = MarkdownConverter.convert(html, createMockContext(true));
2424
// Should contain markdown heading and paragraph
2525
String expected = """
@@ -31,7 +31,7 @@ public void testConvertHtmlToMarkdownWithTitleAndParagraph() {
3131

3232
@Test
3333
public void testConvertHtmlToMarkdownWithList() {
34-
String html = "<html><body><ul><li>Item 1</li><li>Item 2</li></ul></body></html>";
34+
String html = "<ul><li>Item 1</li><li>Item 2</li></ul>";
3535
String result = MarkdownConverter.convert(html, createMockContext(true));
3636
// Should contain markdown list
3737
String expected = """
@@ -42,35 +42,35 @@ public void testConvertHtmlToMarkdownWithList() {
4242

4343
@Test
4444
public void testConvertHtmlToMarkdownWithBoldTag() {
45-
String html = "<html><body><b>Bold text</b></body></html>";
45+
String html = "<b>Bold text</b>";
4646
String result = MarkdownConverter.convert(html, createMockContext(true));
4747
assertEquals("**Bold text**", result);
4848
}
4949

5050
@Test
5151
public void testConvertHtmlToMarkdownWithItalicTag() {
52-
String html = "<html><body><i>Italic text</i></body></html>";
52+
String html = "<i>Italic text</i>";
5353
String result = MarkdownConverter.convert(html, createMockContext(true));
5454
assertEquals("*Italic text*", result);
5555
}
5656

5757
@Test
5858
public void testConvertHtmlToMarkdownWithCodeTag() {
59-
String html = "<html><body><code>code snippet</code></body></html>";
59+
String html = "<code>code snippet</code>";
6060
String result = MarkdownConverter.convert(html, createMockContext(true));
6161
assertEquals("`code snippet`", result);
6262
}
6363

6464
@Test
6565
public void testConvertHtmlToMarkdownWithAnchorTag() {
66-
String html = "<html><body><a href='https://example.com'>Link</a></body></html>";
66+
String html = "<a href=\"https://example.com\">Link</a>";
6767
String result = MarkdownConverter.convert(html, createMockContext(true));
6868
assertEquals("[Link](https://example.com)", result);
6969
}
7070

7171
@Test
7272
public void testConvertHtmlToMarkdownWithNestedList() {
73-
String html = "<html><body><ul><li>Item 1<ul><li>Subitem 1</li></ul></li><li>Item 2</li></ul></body></html>";
73+
String html = "<ul><li>Item 1<ul><li>Subitem 1</li></ul></li><li>Item 2</li></ul>";
7474
String result = MarkdownConverter.convert(html, createMockContext(true));
7575
// Should contain nested markdown list with proper indentation
7676
String expected = """
@@ -83,7 +83,7 @@ public void testConvertHtmlToMarkdownWithNestedList() {
8383
@Test
8484
public void testConvertHtmlToMarkdownWithFormatSpecifierCharacters() {
8585
// Test that Smithy format specifier characters ($) are properly escaped
86-
String html = "<html><body><p>Testing $placeholderOne and $placeholderTwo</p></body></html>";
86+
String html = "<p>Testing $placeholderOne and $placeholderTwo</p>";
8787
String result = MarkdownConverter.convert(html, createMockContext(true));
8888
// $ should be escaped to $$
8989
assertEquals("Testing $$placeholderOne and $$placeholderTwo", result);
@@ -122,23 +122,23 @@ public void testConvertPureCommonmarkForNonAwsService() {
122122
@Test
123123
public void testConvertRemovesUnnecessaryBackslashEscapes() {
124124
// Pandoc adds escapes for these characters but they're not needed in Python docstrings
125-
String html = "<html><body><p>Text with [brackets] and {braces} and (parens)</p></body></html>";
125+
String html = "<p>Text with [brackets] and {braces} and (parens)</p>";
126126
String result = MarkdownConverter.convert(html, createMockContext(true));
127127
// Should not have backslash escapes for these characters
128128
assertEquals("Text with [brackets] and {braces} and (parens)", result.trim());
129129
}
130130

131131
@Test
132132
public void testConvertMixedElements() {
133-
String html = "<html><body><h1>Title</h1><p>Paragraph</p><ul><li>Item 1</li><li>Item 2</li></ul></body></html>";
133+
String html = "<h1>Title</h1><p>Paragraph</p><ul><li>Item 1</li><li>Item 2</li></ul>";
134134
String result = MarkdownConverter.convert(html, createMockContext(true));
135135
String expected = "# Title\n\nParagraph\n\n- Item 1\n- Item 2";
136136
assertEquals(expected, result.trim());
137137
}
138138

139139
@Test
140140
public void testConvertNestedElements() {
141-
String html = "<html><body><h1>Title</h1><p>Paragraph with <strong>bold</strong> text</p></body></html>";
141+
String html = "<h1>Title</h1><p>Paragraph with <strong>bold</strong> text</p>";
142142
String result = MarkdownConverter.convert(html, createMockContext(true));
143143
String expected = """
144144
# Title
@@ -149,10 +149,9 @@ public void testConvertNestedElements() {
149149

150150
@Test
151151
public void testConvertMultilineText() {
152-
// Create a note with content > 72 chars to trigger wrapping
153-
String longLine =
152+
// Create a string with content > 72 chars to trigger wrapping
153+
String html =
154154
"This is a very long line that exceeds seventy-two characters and should wrap into two lines.";
155-
String html = "<html><body>" + longLine + "</body></html>";
156155
String result = MarkdownConverter.convert(html, createMockContext(true));
157156
String expected = """
158157
This is a very long line that exceeds seventy-two characters and should
@@ -162,7 +161,7 @@ public void testConvertMultilineText() {
162161

163162
@Test
164163
public void testConvertHtmlToMarkdownWithNoteTag() {
165-
String html = "<html><body><note>Note text</note></body></html>";
164+
String html = "<note>Note text</note>";
166165
String result = MarkdownConverter.convert(html, createMockContext(true));
167166
// Should convert to admonition format
168167
String expected = """
@@ -173,7 +172,7 @@ public void testConvertHtmlToMarkdownWithNoteTag() {
173172

174173
@Test
175174
public void testConvertHtmlToMarkdownWithImportantTag() {
176-
String html = "<html><body><important>Important text</important></body></html>";
175+
String html = "<important>Important text</important>";
177176
String result = MarkdownConverter.convert(html, createMockContext(true));
178177
// Should convert to warning admonition
179178
String expected = """
@@ -187,7 +186,7 @@ public void testConvertMultilineAdmonitionTag() {
187186
// Create a note with content > 72 chars to trigger wrapping
188187
String longLine =
189188
"This is a very long line that exceeds seventy-two characters and should wrap into two lines.";
190-
String html = "<html><body><note>" + longLine + "</note></body></html>";
189+
String html = "<note>" + longLine + "</note>";
191190
String result = MarkdownConverter.convert(html, createMockContext(true));
192191

193192
// Expected: first line up to 72 chars, rest on second line, both indented
@@ -198,6 +197,60 @@ public void testConvertMultilineAdmonitionTag() {
198197
assertEquals(expected, result.trim());
199198
}
200199

200+
@Test
201+
public void testConvertExcludesFullnameTag() {
202+
String html = "<p>Some text</p><fullname>AWS Service Name</fullname><p>More text</p>";
203+
String result = MarkdownConverter.convert(html, createMockContext(true));
204+
String expected = """
205+
Some text
206+
207+
More text""";
208+
assertEquals(expected, result);
209+
}
210+
211+
@Test
212+
public void testConvertBoldWithNestedFormatting() {
213+
String html = "<b><code>Test</code> test <a href=\"https://testing.com\">Link</a></b>";
214+
String result = MarkdownConverter.convert(html, createMockContext(true));
215+
assertEquals("**`Test` test [Link](https://testing.com)**", result);
216+
}
217+
218+
@Test
219+
public void testConvertHrefWithSpaces() {
220+
String html = "<p><a href=\" https://testing.com \">Link</a></p>";
221+
String result = MarkdownConverter.convert(html, createMockContext(true));
222+
// Leading and trailing spaces must be trimmed
223+
assertEquals("[Link](https://testing.com)", result);
224+
}
225+
226+
@Test
227+
public void testConvertLinkWithNestedCode() {
228+
String html = "<a href=\"https://testing.com\"><code>Link</code></a>";
229+
String result = MarkdownConverter.convert(html, createMockContext(true));
230+
assertEquals("[`Link`](https://testing.com)", result);
231+
}
232+
233+
@Test
234+
public void testConvertCodeWithNestedLinkSpaces() {
235+
String html = "<code> <a href=\"https://testing.com\">Link</a> </code>";
236+
String result = MarkdownConverter.convert(html, createMockContext(true));
237+
assertEquals("` `[`Link`](https://testing.com)` `", result);
238+
}
239+
240+
@Test
241+
public void testConvertCodeWithNestedLinkNoSpaces() {
242+
String html = "<code><a href=\"https://testing.com\">Link</a></code>";
243+
String result = MarkdownConverter.convert(html, createMockContext(true));
244+
assertEquals("[`Link`](https://testing.com)", result);
245+
}
246+
247+
@Test
248+
public void testConvertCodeWithNestedEmptyLink() {
249+
String html = "<code><a>Link</a></code>";
250+
String result = MarkdownConverter.convert(html, createMockContext(true));
251+
assertEquals("`Link`", result);
252+
}
253+
201254
private GenerationContext createMockContext(boolean isAwsService) {
202255
GenerationContext context = mock(GenerationContext.class);
203256
Model model = mock(Model.class);

codegen/gradle/libs.versions.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ spotbugs = "6.0.22"
77
spotless = "8.0.0"
88
smithy-gradle-plugins = "1.3.0"
99
dep-analysis = "3.4.1"
10-
jsoup = "1.21.2"
11-
commonmark = "0.17.0"
1210

1311
[libraries]
1412
smithy-model = { module = "software.amazon.smithy:smithy-model", version.ref = "smithy" }
@@ -32,9 +30,6 @@ spotbugs = { module = "com.github.spotbugs.snom:spotbugs-gradle-plugin", version
3230
spotless = { module = "com.diffplug.spotless:spotless-plugin-gradle", version.ref = "spotless" }
3331
dependency-analysis = { module = "com.autonomousapps:dependency-analysis-gradle-plugin", version.ref = "dep-analysis" }
3432

35-
jsoup = { module = "org.jsoup:jsoup", version.ref = "jsoup" }
36-
commonmark = { module = "com.atlassian.commonmark:commonmark", version.ref ="commonmark" }
37-
3833
[plugins]
3934
smithy-gradle-base = { id = "software.amazon.smithy.gradle.smithy-base", version.ref = "smithy-gradle-plugins" }
4035
smithy-gradle-jar = { id = "software.amazon.smithy.gradle.smithy-jar", version.ref = "smithy-gradle-plugins" }

0 commit comments

Comments
 (0)