11#!/usr/bin/env python3
22from argparse import ArgumentParser
3+ from typing import Generator
34import tempfile
45import re
56import urllib .request
2324
2425CERT_WIKI = "https://wiki.sei.cmu.edu"
2526RULES_LIST_C = "/confluence/display/c/2+Rules"
27+ RECOMMENDED_LIST_C = "/confluence/display/c/3+Recommendations"
2628RULES_LIST_CPP = "/confluence/display/cplusplus/2+Rules"
2729
2830cache_path = script_path .parent / '.cache'
@@ -47,16 +49,22 @@ def soupify(url: str) -> BeautifulSoup:
4749
4850 return BeautifulSoup (content , 'html.parser' )
4951
50-
51- def get_rules ():
52- rules = []
53- for soup in [soupify (f"{ CERT_WIKI } { RULES_LIST_C } " ), soupify (f"{ CERT_WIKI } { RULES_LIST_CPP } " )]:
52+ def get_rule_listings () -> Generator [Tag , None , None ]:
53+ for rule_list_id in [RULES_LIST_C , RULES_LIST_CPP ]:
54+ soup = soupify (f"{ CERT_WIKI } { rule_list_id } " )
5455 if soup == None :
55- return None
56-
57- rule_listing_start = soup .find (
56+ continue
57+
58+ yield soup .find (
5859 "h1" , string = "Rule Listing" )
5960
61+ soup = soupify (f"{ CERT_WIKI } { RECOMMENDED_LIST_C } " )
62+ if soup != None :
63+ yield soup .find ("h1" , string = "Recommendation Listing" )
64+
65+ def get_rules ():
66+ rules = []
67+ for rule_listing_start in get_rule_listings ():
6068 for link in rule_listing_start .next_element .next_element .find_all ('a' ):
6169 if '-C' in link .string :
6270 rule , title = map (str .strip , link .string .split ('.' , 1 ))
@@ -214,6 +222,8 @@ def helper(node):
214222 # Fix a broken url present in many CERT-C pages
215223 if node .name == 'a' and 'href' in node .attrs and node ['href' ] == "http://BB. Definitions#vulnerability" :
216224 node ['href' ] = "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-vulnerability"
225+ elif node .name == 'a' and 'href' in node .attrs and node ['href' ] == "http://BB. Definitions#unexpected behavior" :
226+ node ['href' ] = "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-unexpectedbehavior"
217227 # Turn relative URLs into absolute URLS
218228 elif node .name == 'a' and 'href' in node .attrs and node ['href' ].startswith ("/confluence" ):
219229 node ['href' ] = f"{ CERT_WIKI } { node ['href' ]} "
0 commit comments