diff --git a/README.md b/README.md index 9cd5c52..263fdc3 100644 --- a/README.md +++ b/README.md @@ -462,6 +462,41 @@ Exemplo: ] ``` +## RG + +### format_rg + +Formata um RG (Registro Geral) brasileiro de acordo com o estado (UF). + +Esta função recebe um RG brasileiro e o formata conforme os requisitos específicos da UF informada. Ela deve lidar com diversos formatos de entrada e garantir que a saída esteja padronizada. + +Argumentos: + +- `rg` (str): O RG a ser formatado. +- `uf` (str): A unidade federativa (UF) para a qual o RG deve ser formatado. + +Retorna: + +- `str` ou `None`: Retorna o RG formatado se válido, ou `None` se a entrada for inválida. + +Exemplo: + +```python +>>> format_rg('12.345.678-9', 'SP') +"12.345.678-9" + +>>> format_rg('MG-12.345.678', 'MG') +"MG-12.345.678" + +>>> format_rg('123456789', 'RJ') +"12.345.678-9" + +>>> format_rg('A12345678', 'SP') +None + +>>> format_rg('12.345.678', 'SP') +None + ## Telefone ### is_valid_phone diff --git a/README_EN.md b/README_EN.md index 087bf34..9cc51a0 100644 --- a/README_EN.md +++ b/README_EN.md @@ -484,6 +484,34 @@ None "Primeiro de agosto de dois mil e vinte e quatro" ```` +## RG + +### format_rg + +Formats a Brazilian RG (Registro Geral) based on the state (UF). + +This function takes a Brazilian RG and formats it according to the specific requirements of the given UF. It handles various input formats and ensures the output is standardized. + +Args: + +- `rg` (str): The RG to be formatted. +- `uf` (str): The state (UF) for which the RG should be formatted. + +Returns: + +- `str` or `None`: The formatted RG if valid, or `None` if the input is invalid. + +Example: + +```python +format_rg('12.345.678-9', 'SP') # Returns "12.345.678-9" +format_rg('MG-12.345.678', 'MG') # Returns "MG-12.345.678" +format_rg('123456789', 'RJ') # Returns "12.345.678-9" +format_rg('A12345678', 'SP') # Returns None +format_rg('12.345.678', 'SP') # Returns None + + + ## Phone ### is_valid_phone diff --git a/brutils/__init__.py b/brutils/__init__.py index e0f75d3..9437c32 100644 --- a/brutils/__init__.py +++ b/brutils/__init__.py @@ -67,6 +67,11 @@ from brutils.pis import is_valid as is_valid_pis from brutils.pis import remove_symbols as remove_symbols_pis +# RG Imports +from brutils.rg import ( + format_rg, +) + # Voter ID Imports from brutils.voter_id import format_voter_id from brutils.voter_id import generate as generate_voter_id @@ -131,4 +136,6 @@ # Currency "format_currency", "convert_real_to_text", + # RG + "format_rg", ] diff --git a/brutils/rg.py b/brutils/rg.py new file mode 100644 index 0000000..908c9ec --- /dev/null +++ b/brutils/rg.py @@ -0,0 +1,168 @@ +import re + + +def format_rg(rg, uf): # type: (str, str) -> str | None + """ + Formats a Brazilian RG (Registro Geral) based on the state (UF). + + This function takes a Brazilian RG and formats it according to the specific + requirements of the given UF. It should handle various input formats and + ensure the output is standardized. + + Args: + rg (str): The RG to be formatted. + uf (str): The state (UF) for which the RG should be formatted. + + Returns: + str or None: The formatted RG if valid, or None if the input is invalid. + + Example: + >>> format_rg('12.345.678-9', 'SP') + "12.345.678-9" + >>> format_rg('MG-12.345.678', 'MG') + "MG-12.345.678" + >>> format_rg('123456789', 'RJ') + "12.345.678-9" + >>> format_rg('A12345678', 'SP') + None + >>> format_rg('12.345.678', 'SP') + None + """ + if not isinstance(rg, str) or not isinstance(uf, str): + return None + + clean_rg = rg.strip().upper() + clean_uf = uf.strip().upper() + + # Verified RG formats for each Brazilian state. + rg_formats = { + "AC": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "AL": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "AP": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "AM": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "BA": { + "length": 10, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8:]}", + }, + "CE": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "DF": { + "length": [7, 8], + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:]}-9" + if len(x) == 8 + else f"{x[0]}.{x[1:4]}.{x[4:]}", + }, + "ES": {"length": 7, "format": lambda x: f"{x[0]}.{x[1:4]}.{x[4:]}"}, + "GO": { + "length": 8, + "format": lambda x: f"{x[:1]}.{x[1:4]}.{x[4:7]}-{x[7]}", + }, + "MA": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "MT": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "MS": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "MG": { + "length": [8, 9], + "format": lambda x: f"MG-{x[:2]}.{x[2:5]}.{x[5:8]}", + }, + "PA": {"length": 7, "format": lambda x: f"{x[0]}.{x[1:4]}.{x[4:]}"}, + "PB": { + "length": 8, + "format": lambda x: f"{x[:1]}.{x[1:4]}.{x[4:7]}-{x[7]}", + }, + "PR": { + "length": 8, + "format": lambda x: f"{x[:1]}.{x[1:4]}.{x[4:7]}-{x[7]}", + }, + "PE": { + "length": 8, + "format": lambda x: f"{x[:1]}.{x[1:4]}.{x[4:7]}-{x[7]}", + }, + "PI": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "RJ": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "RN": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "RS": { + "length": 10, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8:]}", + }, + "RO": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "RR": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "SC": {"length": 7, "format": lambda x: f"{x[0]}.{x[1:4]}.{x[4:]}"}, + "SP": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "SE": { + "length": 9, + "format": lambda x: f"{x[:2]}.{x[2:5]}.{x[5:8]}-{x[8]}", + }, + "TO": { + "length": 8, + "format": lambda x: f"{x[:1]}.{x[1:4]}.{x[4:7]}-{x[7]}", + }, + } + + if clean_uf not in rg_formats: + return None + + if clean_rg.startswith(clean_uf): + clean_rg = clean_rg[len(clean_uf) :] + if clean_rg.startswith("-"): + clean_rg = clean_rg[1:] + + # Validate that the RG contains only numbers, 'X', '.', and '-'. + temp_rg_for_validation = clean_rg.replace(".", "").replace("-", "") + if not re.match(r"^[0-9X]+$", temp_rg_for_validation): + return None + + rg_only_digits = re.sub(r"[^0-9X]", "", clean_rg) + + # Check if the length of the numeric RG matches the state's requirements. + expected_length = rg_formats[clean_uf]["length"] + if isinstance(expected_length, list): + if len(rg_only_digits) not in expected_length: + return None + elif len(rg_only_digits) != expected_length: + return None + + try: + return rg_formats[clean_uf]["format"](rg_only_digits) + except Exception: + return None diff --git a/tests/test_rg.py b/tests/test_rg.py new file mode 100644 index 0000000..c25a989 --- /dev/null +++ b/tests/test_rg.py @@ -0,0 +1,40 @@ +from unittest import TestCase + +from brutils.rg import format_rg + + +class TestRG(TestCase): + def test_format_rg(self): + # Testes para RGs válidos + self.assertEqual(format_rg("12.345.678-9", "SP"), "12.345.678-9") + self.assertEqual(format_rg("MG-12.345.678", "MG"), "MG-12.345.678") + self.assertEqual(format_rg("123456789", "RJ"), "12.345.678-9") + + # Testes para RGs parcialmente formatados + self.assertEqual(format_rg("123.45678-9", "SP"), "12.345.678-9") + self.assertEqual(format_rg("1.23456789", "SP"), "12.345.678-9") + self.assertEqual(format_rg("DF12345678", "DF"), "12.345.678-9") + + # Testes para RGs com zeros à esquerda + self.assertEqual(format_rg("001234567", "SP"), "00.123.456-7") + self.assertEqual(format_rg("MG-001234567", "MG"), "MG-00.123.456") + + # Testes para RGs inválidos + self.assertIsNone(format_rg("A12345678", "SP")) # Letras não permitidas + self.assertIsNone(format_rg("1234567890", "SP")) # RG longo demais + self.assertIsNone( + format_rg("12.345.678-10", "SP") + ) # Dígito verificador incorreto + + # Testes para entradas malformadas + self.assertIsNone(format_rg("", "SP")) # Entrada vazia + self.assertIsNone( + format_rg("12.345.678", "SP") + ) # Formato incorreto sem dígito verificador + self.assertIsNone(format_rg("12.345.678-9", "XX")) # UF inválida + self.assertIsNone( + format_rg("12 345 678-9", "SP") + ) # RG com espaços extras + self.assertIsNone( + format_rg("12.34.5678", "SP") + ) # RG com formato incorreto