Skip to content

Commit ecab553

Browse files
authored
Merge pull request #152 from advanced-security/v2_9_1
V2.9.1
2 parents da84215 + aad03d0 commit ecab553

File tree

15 files changed

+314
-148
lines changed

15 files changed

+314
-148
lines changed

.release.yml

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
name: "policy-as-code"
2-
version: "2.9.0"
2+
repository: "advanced-security/policy-as-code"
3+
version: "2.9.1"
4+
5+
ecosystems:
6+
- Python
37

48
locations:
59
- name: "Update Docs"
610
paths:
711
- "*.md"
12+
- "docs/*.md"
813
patterns:
9-
- 'advanced-security/policy-as-code@v([0-9]\.[0-9]\.[0-9])'
10-
- '--branch "v([0-9]\.[0-9]\.[0-9])"'
11-
14+
- "{repository}@v{version}"
15+
- '--branch "v{version}"'

Pipfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ verify_ssl = true
66
[packages]
77
pyyaml = "*"
88
semantic-version = "*"
9-
ghastoolkit = "==0.14.2"
9+
ghastoolkit = "==0.15.1"
1010

1111
[dev-packages]
1212
sphinx = "*"

Pipfile.lock

+110-95
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ Here is how you can quickly setup policy-as-code.
4545
```yaml
4646
# Policy as Code
4747
- name: Advance Security Policy as Code
48-
uses: advanced-security/[email protected].0
48+
uses: advanced-security/[email protected].1
4949
```
5050
5151
> [!WARNING]
@@ -61,15 +61,15 @@ The Policy as Code project is a self-contained Python based CLI tool.
6161
**Bash / Zsh:**
6262

6363
```bash
64-
git clone --branch "v2.9.0" https://github.com/advanced-security/policy-as-code.git && cd ./policy-as-code
64+
git clone --branch "v2.9.1" https://github.com/advanced-security/policy-as-code.git && cd ./policy-as-code
6565
6666
./policy-as-code --help
6767
```
6868

6969
**Powershell:**
7070

7171
```Powershell
72-
git clone --branch "v2.9.0" https://github.com/advanced-security/policy-as-code.git
72+
git clone --branch "v2.9.1" https://github.com/advanced-security/policy-as-code.git
7373
cd policy-as-code
7474
7575
.\policy-as-code.ps1 --help
@@ -128,7 +128,7 @@ Here is an example of using a simple yet cross-organization using Policy as Code
128128
```yaml
129129
# Compliance
130130
- name: Advance Security Policy as Code
131-
uses: advanced-security/[email protected].0
131+
uses: advanced-security/[email protected].1
132132
with:
133133
# The owner/repo of where the policy is stored
134134
policy: GeekMasher/security-queries

ghascompliance/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python
2-
__version__ = "2.9.0"
2+
__version__ = "2.9.1"
33

44
__title__ = "GitHub Advanced Security Policy as Code"
55
__name__ = "ghascompliance"

vendor/charset_normalizer/api.py

+60-18
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ def from_bytes(
159159

160160
results: CharsetMatches = CharsetMatches()
161161

162+
early_stop_results: CharsetMatches = CharsetMatches()
163+
162164
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
163165

164166
if sig_encoding is not None:
@@ -221,16 +223,20 @@ def from_bytes(
221223
try:
222224
if is_too_large_sequence and is_multi_byte_decoder is False:
223225
str(
224-
sequences[: int(50e4)]
225-
if strip_sig_or_bom is False
226-
else sequences[len(sig_payload) : int(50e4)],
226+
(
227+
sequences[: int(50e4)]
228+
if strip_sig_or_bom is False
229+
else sequences[len(sig_payload) : int(50e4)]
230+
),
227231
encoding=encoding_iana,
228232
)
229233
else:
230234
decoded_payload = str(
231-
sequences
232-
if strip_sig_or_bom is False
233-
else sequences[len(sig_payload) :],
235+
(
236+
sequences
237+
if strip_sig_or_bom is False
238+
else sequences[len(sig_payload) :]
239+
),
234240
encoding=encoding_iana,
235241
)
236242
except (UnicodeDecodeError, LookupError) as e:
@@ -367,7 +373,13 @@ def from_bytes(
367373
and not lazy_str_hard_failure
368374
):
369375
fallback_entry = CharsetMatch(
370-
sequences, encoding_iana, threshold, False, [], decoded_payload
376+
sequences,
377+
encoding_iana,
378+
threshold,
379+
False,
380+
[],
381+
decoded_payload,
382+
preemptive_declaration=specified_encoding,
371383
)
372384
if encoding_iana == specified_encoding:
373385
fallback_specified = fallback_entry
@@ -421,28 +433,58 @@ def from_bytes(
421433
),
422434
)
423435

424-
results.append(
425-
CharsetMatch(
426-
sequences,
427-
encoding_iana,
428-
mean_mess_ratio,
429-
bom_or_sig_available,
430-
cd_ratios_merged,
431-
decoded_payload,
432-
)
436+
current_match = CharsetMatch(
437+
sequences,
438+
encoding_iana,
439+
mean_mess_ratio,
440+
bom_or_sig_available,
441+
cd_ratios_merged,
442+
(
443+
decoded_payload
444+
if (
445+
is_too_large_sequence is False
446+
or encoding_iana in [specified_encoding, "ascii", "utf_8"]
447+
)
448+
else None
449+
),
450+
preemptive_declaration=specified_encoding,
433451
)
434452

453+
results.append(current_match)
454+
435455
if (
436456
encoding_iana in [specified_encoding, "ascii", "utf_8"]
437457
and mean_mess_ratio < 0.1
438458
):
459+
# If md says nothing to worry about, then... stop immediately!
460+
if mean_mess_ratio == 0.0:
461+
logger.debug(
462+
"Encoding detection: %s is most likely the one.",
463+
current_match.encoding,
464+
)
465+
if explain:
466+
logger.removeHandler(explain_handler)
467+
logger.setLevel(previous_logger_level)
468+
return CharsetMatches([current_match])
469+
470+
early_stop_results.append(current_match)
471+
472+
if (
473+
len(early_stop_results)
474+
and (specified_encoding is None or specified_encoding in tested)
475+
and "ascii" in tested
476+
and "utf_8" in tested
477+
):
478+
probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment]
439479
logger.debug(
440-
"Encoding detection: %s is most likely the one.", encoding_iana
480+
"Encoding detection: %s is most likely the one.",
481+
probable_result.encoding,
441482
)
442483
if explain:
443484
logger.removeHandler(explain_handler)
444485
logger.setLevel(previous_logger_level)
445-
return CharsetMatches([results[encoding_iana]])
486+
487+
return CharsetMatches([probable_result])
446488

447489
if encoding_iana == sig_encoding:
448490
logger.debug(

vendor/charset_normalizer/cli/__main__.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
109109
dest="force",
110110
help="Replace file without asking if you are sure, use this flag with caution.",
111111
)
112+
parser.add_argument(
113+
"-i",
114+
"--no-preemptive",
115+
action="store_true",
116+
default=False,
117+
dest="no_preemptive",
118+
help="Disable looking at a charset declaration to hint the detector.",
119+
)
112120
parser.add_argument(
113121
"-t",
114122
"--threshold",
@@ -133,31 +141,47 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
133141
args = parser.parse_args(argv)
134142

135143
if args.replace is True and args.normalize is False:
144+
if args.files:
145+
for my_file in args.files:
146+
my_file.close()
136147
print("Use --replace in addition of --normalize only.", file=sys.stderr)
137148
return 1
138149

139150
if args.force is True and args.replace is False:
151+
if args.files:
152+
for my_file in args.files:
153+
my_file.close()
140154
print("Use --force in addition of --replace only.", file=sys.stderr)
141155
return 1
142156

143157
if args.threshold < 0.0 or args.threshold > 1.0:
158+
if args.files:
159+
for my_file in args.files:
160+
my_file.close()
144161
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
145162
return 1
146163

147164
x_ = []
148165

149166
for my_file in args.files:
150-
matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)
167+
matches = from_fp(
168+
my_file,
169+
threshold=args.threshold,
170+
explain=args.verbose,
171+
preemptive_behaviour=args.no_preemptive is False,
172+
)
151173

152174
best_guess = matches.best()
153175

154176
if best_guess is None:
155177
print(
156178
'Unable to identify originating encoding for "{}". {}'.format(
157179
my_file.name,
158-
"Maybe try increasing maximum amount of chaos."
159-
if args.threshold < 1.0
160-
else "",
180+
(
181+
"Maybe try increasing maximum amount of chaos."
182+
if args.threshold < 1.0
183+
else ""
184+
),
161185
),
162186
file=sys.stderr,
163187
)
@@ -258,8 +282,8 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
258282
try:
259283
x_[0].unicode_path = join(dir_path, ".".join(o_))
260284

261-
with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
262-
fp.write(str(best_guess))
285+
with open(x_[0].unicode_path, "wb") as fp:
286+
fp.write(best_guess.output())
263287
except IOError as e:
264288
print(str(e), file=sys.stderr)
265289
if my_file.closed is False:

vendor/charset_normalizer/constant.py

+2
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,8 @@
544544
"|",
545545
'"',
546546
"-",
547+
"(",
548+
")",
547549
}
548550

549551

vendor/charset_normalizer/legacy.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,24 @@
1-
from typing import Any, Dict, Optional, Union
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Any, Optional
24
from warnings import warn
35

46
from .api import from_bytes
57
from .constant import CHARDET_CORRESPONDENCE
68

9+
# TODO: remove this check when dropping Python 3.7 support
10+
if TYPE_CHECKING:
11+
from typing_extensions import TypedDict
12+
13+
class ResultDict(TypedDict):
14+
encoding: Optional[str]
15+
language: str
16+
confidence: Optional[float]
17+
718

819
def detect(
920
byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
10-
) -> Dict[str, Optional[Union[str, float]]]:
21+
) -> ResultDict:
1122
"""
1223
chardet legacy method
1324
Detect the encoding of the given byte string. It should be mostly backward-compatible.

vendor/charset_normalizer/md.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def reset(self) -> None: # pragma: no cover
236236

237237
@property
238238
def ratio(self) -> float:
239-
if self._character_count <= 24:
239+
if self._character_count <= 13:
240240
return 0.0
241241

242242
ratio_of_suspicious_range_usage: float = (
@@ -260,6 +260,7 @@ def __init__(self) -> None:
260260

261261
self._buffer: str = ""
262262
self._buffer_accent_count: int = 0
263+
self._buffer_glyph_count: int = 0
263264

264265
def eligible(self, character: str) -> bool:
265266
return True
@@ -279,6 +280,14 @@ def feed(self, character: str) -> None:
279280
and is_thai(character) is False
280281
):
281282
self._foreign_long_watch = True
283+
if (
284+
is_cjk(character)
285+
or is_hangul(character)
286+
or is_katakana(character)
287+
or is_hiragana(character)
288+
or is_thai(character)
289+
):
290+
self._buffer_glyph_count += 1
282291
return
283292
if not self._buffer:
284293
return
@@ -291,17 +300,20 @@ def feed(self, character: str) -> None:
291300
self._character_count += buffer_length
292301

293302
if buffer_length >= 4:
294-
if self._buffer_accent_count / buffer_length > 0.34:
303+
if self._buffer_accent_count / buffer_length >= 0.5:
295304
self._is_current_word_bad = True
296305
# Word/Buffer ending with an upper case accentuated letter are so rare,
297306
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
298-
if (
307+
elif (
299308
is_accentuated(self._buffer[-1])
300309
and self._buffer[-1].isupper()
301310
and all(_.isupper() for _ in self._buffer) is False
302311
):
303312
self._foreign_long_count += 1
304313
self._is_current_word_bad = True
314+
elif self._buffer_glyph_count == 1:
315+
self._is_current_word_bad = True
316+
self._foreign_long_count += 1
305317
if buffer_length >= 24 and self._foreign_long_watch:
306318
camel_case_dst = [
307319
i
@@ -325,6 +337,7 @@ def feed(self, character: str) -> None:
325337
self._foreign_long_watch = False
326338
self._buffer = ""
327339
self._buffer_accent_count = 0
340+
self._buffer_glyph_count = 0
328341
elif (
329342
character not in {"<", ">", "-", "=", "~", "|", "_"}
330343
and character.isdigit() is False

0 commit comments

Comments
 (0)