Skip to content

Commit

Permalink
[CI] More title normalization tweaks (open-telemetry#6089)
Browse files Browse the repository at this point in the history
  • Loading branch information
chalin authored Jan 28, 2025
1 parent d12f1e4 commit 60a361a
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 59 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"all": "bash -c 'x=0; for c in \"$@\"; do npm run $c || x=$((x+1)); done; ((!x)) || (echo \"ERROR: some scripts failed!\" && exit 1)' -",
"build:preview": "set -x && npm run _build -- --minify",
"build:production": "npm run _hugo -- --minify",
"build": "npm run _build",
"build": "npm run _build --",
"cd:public": "cd public &&",
"check:expired": "find content -name '*.md' | xargs ./scripts/list-expired.pl",
"check:filenames": "test -z \"$(npm run -s _ls-bad-filenames)\" || npm run -s _filename-error",
Expand Down
118 changes: 60 additions & 58 deletions scripts/content-modules/normalize-titles.pl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/perl -w -i
#
# DRAFT script used to normalize semconv doc-page tiles and add Hugo front matter
#

$^W = 1;

Expand Down Expand Up @@ -30,43 +29,31 @@
my $beforeTitle = '';

sub toTitleCase($) {
my $str = shift;
my ($str) = @_;

my @mixedCaseWords; # mixed-case or ALLCAPS
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
push @mixedCaseWords, $1;
}
# Capitalize non-mixedcase words
$str =~ s/(^|\s)([a-zA-Z])([a-z][a-z0-9]*)\b/$1\u$2$3/g;

$str =~ s/(\w+)/\u\L$1/g;
# Revert / lowercase articles etc.
$str =~ s/\b(A|And|As|By|For|In|On)\b/\L$1/g;

foreach my $word (@mixedCaseWords) {
my $lc_word = lc($word);
$str =~ s/\b$lc_word\b/$word/ig;
}
$str =~ s/\b(A|And|As|By|For|In|On|\.Js)\b/\L$1/g;
return $str;
}

my @specialWords = qw(Core); # for .NET

sub toSentenceCase($) {
my $str = shift;
my ($str) = @_;

my @mixedCaseWords = @specialWords; # mixed-case or ALLCAPS
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
push @mixedCaseWords, $1;
}
# Lowercase non-mixedcase words
$str =~ s/\b([A-Z]?[a-z][a-z0-9]*)\b/\l$1/g;

$str = lc $str;
# Capitalize the first word unless it is mixed case
$str =~ s/^([a-z][a-z0-9]*)\b/\u$1/;

# Replace words with their mixed-case or ALL CAPS versions
foreach my $word (@mixedCaseWords) {
my $lc_word = lc($word);
$str =~ s/\b\Q$lc_word\E\b/$word/g;
}

# Capitalize the first letter of the string
$str =~ s/^(\s*\w)/\u$1/;
# Handle exceptions
$str =~ s/(.NET) (core)/$1 \u$2/;
$str =~ s/(AI) (inference)/$1 \u$2/;
$str =~ s|google cloud|Google Cloud|i;
$str =~ s|pub/sub|Pub/Sub|;

return $str;
}
Expand All @@ -75,7 +62,7 @@ ()
my $frontMatter = '';
if ($frontMatterFromFile) {
# printf STDOUT "> $file has front matter:\n$frontMatterFromFile\n"; # if $gD;
$frontMatterFromFile = '' unless $frontMatterFromFile =~ /aliases|cSpell|cascade/i;
$frontMatterFromFile = '' unless $frontMatterFromFile =~ /auto_gen|aliases/i;
# printf STDOUT "> $file\n" if $ARGV =~ /\/system\b/;
}
$linkTitle = $title;
Expand Down Expand Up @@ -104,19 +91,20 @@ ()
$linkTitle = $1;
}

$linkTitle = 'Attributes' if $title eq 'General Attributes';
$linkTitle = 'Events' if $linkTitle =~ /Mobile Events/;
$linkTitle = 'Connect' if $title =~ /Connect RPC$/i;
$linkTitle = 'HTTP' if $linkTitle =~ /^HTTP Client and Server/i;
$linkTitle = 'SQL' if $title =~ /SQL Databases$/i;
$linkTitle = 'System use cases' if $title =~ /System .*?General Use Cases/i;
$linkTitle = $1 if $title =~ /Gen(?:erative) ?AI (\w+)$/i && $title !~ /Systems$/i;
$linkTitle = $1 if $title =~ /(OpenAI) \w+$/i;

# Missing an `s` in "Semantic Convention"?
if ($title =~ /^Semantic Convention\b/i and $title !~ /Groups$/i) {
$title =~ s/Semantic Convention\b/$&s/ig;
printf STDOUT "> $title -> $linkTitle - added 's' to 'Conventions'\n";
}
$linkTitle =~ s/^Database Client //;
$linkTitle =~ s/^(Database|Messaging) Client //;
if ($ARGV =~ /docs\/azure/) {
$linkTitle =~ s/ Resource Logs?//i;
$linkTitle =~ s/Azure //i;
Expand All @@ -129,6 +117,8 @@ ()
unless $ARGV =~ /gen-ai-metrics/;
$linkTitle =~ s/ (components|guide|queries|supplementary information|systems|platform)$//i;
$linkTitle =~ s/ \(command line interface\)//i;
$linkTitle =~ s/ resources$//i;
$linkTitle =~ s/(Process) and process runtime$/$1/i;

$linkTitle = '.NET' if $linkTitle =~ /.net common language runtime/i;
$linkTitle = 'CLI' if $linkTitle =~ /\(command line interface\) programs/i;
Expand All @@ -137,53 +127,68 @@ ()
$linkTitle =~ s/Semantic Conventions? Stability //i;
}

if ($linkTitle and $linkTitle ne $title) {
$linkTitle = toSentenceCase($linkTitle) unless $linkTitle =~ /^gRPC/;
if ($frontMatterFromFile =~ /linkTitle: /) {
$frontMatterFromFile =~ s/^(linkTitle: ).*$/$1$linkTitle/m;
} else {
$frontMatter .= "linkTitle: $linkTitle\n"
$frontMatter .= $frontMatterFromFile if $frontMatterFromFile;

if ($linkTitle && $linkTitle ne $title) {
$linkTitle = toSentenceCase($linkTitle); # unless $linkTitle =~ /^gRPC/;
if ($frontMatter !~ /linkTitle: /) {
$frontMatter .= "linkTitle: $linkTitle\n";
} elsif ($frontMatter !~ /^auto_gen:/m) {
$frontMatter =~ s/^(linkTitle: ).*$/$1$linkTitle/m;
}
}

$frontMatter .= $frontMatterFromFile if $frontMatterFromFile;

if ($ARGV =~ /docs\/(.*?)(README|_index).md$/) {
$frontMatter .= "path_base_for_github_subdir:\n";
$frontMatter .= " from: tmp/semconv/docs/$1_index.md\n";
$frontMatter .= " to: $1README.md\n";
}
$frontMatter .= "weight: -1\n" if $title eq 'General Semantic Conventions';

return $frontMatter;
}

sub printTitleAndFrontMatter() {
my $frontMatter;


# if ($ARGV =~ /docs\/(README|_index)/) {
# print STDOUT "> $ARGV\n > frontMatterFromFile: $frontMatterFromFile\n";
# print STDOUT " > title: $title\n";
# print STDOUT " > linkTitle: $linkTitle\n";
# }

if ($frontMatterFromFile && $frontMatterFromFile =~ /auto_gen:\s*false/) {
$frontMatter = $frontMatterFromFile;
} else {
$frontMatter = computeTitleAndFrontMatter();
}
my $frontMatter = computeTitleAndFrontMatter();

if ($frontMatter) {
$frontMatter = "<!--- Hugo front matter used to generate the website version of this page:\n" . $frontMatter;
$frontMatter .= "--->\n";
print "$frontMatter\n";
}

print $beforeTitle if $beforeTitle;
$title = toTitleCase($title);
print "# $title\n"
}

sub gatherFrontMatter() {
my $autoGenValues = 'false|below';
my $autoGenDirective = '';
my $autoGenSkip = 0;

while(<>) {
last if /^--->/;
next if $autoGenSkip;

my ($keyWord, $autoGenDirective) = /^(auto.?gen): ([^\#]+)/;
if ($keyWord) {
# print STDOUT ">> $ARGV:\n$frontMatterFromFile";
if ($keyWord ne 'auto_gen') {
warn "$ARGV: WARN: misspelled keyword, should be 'auto_gen' not '$keyWord'\n";
} elsif (!$autoGenDirective or $autoGenDirective !~ /^($autoGenValues)/) {
warn "$ARGV: WARN: missing or unrecognized 'auto_gen' value, should match '$autoGenValues', not $autoGenDirective\n";
} elsif ($autoGenDirective =~ /^below/) {
$autoGenSkip = 1;
# print STDOUT ">>>> skipping\n";
} else {
# print STDOUT ">> wa?\n";
}
}

$frontMatterFromFile .= $_;
}
}

# main

my $titleRegexStr = '^#\s+(.*)';
Expand All @@ -200,10 +205,7 @@ ()
$beforeTitle = '';
$linkTitle = '';
if (/^<!--- Hugo/) {
while(<>) {
last if /^--->/;
$frontMatterFromFile .= $_;
}
gatherFrontMatter();
next;
}
}
Expand Down

0 comments on commit 60a361a

Please sign in to comment.